Source code for tuiview.viewerRAT


"""
Contains the ViewerRAT class
"""
# This file is part of 'TuiView' - a simple Raster viewer
# Copyright (C) 2012  Sam Gillingham
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

import keyword
import numpy
import json
from osgeo import gdal
from PyQt5.QtCore import QObject, pyqtSignal

from . import viewererrors

NEWCOL_INT = 0
NEWCOL_FLOAT = 1
NEWCOL_STRING = 2

DEFAULT_INT_FMT = "%d"
DEFAULT_FLOAT_FMT = "%.2f"
DEFAULT_STRING_FMT = "%s"

VIEWER_COLUMN_ORDER_METADATA_KEY = 'VIEWER_COLUMN_ORDER'
VIEWER_COLUMN_LOOKUP_METADATA_KEY = 'VIEWER_COLUMN_LOOKUP'

DEFAULT_CACHE_SIZE = 500000

GDAL_COLTYPE_LOOKUP = {gdal.GFT_Integer: "Integer", 
        gdal.GFT_Real: "Floating point", gdal.GFT_String: "String"}
GDAL_COLUSAGE_LOOKUP = {gdal.GFU_Generic: "General purpose field",
        gdal.GFU_PixelCount: "Histogram pixel count",
        gdal.GFU_Name: "Class name", gdal.GFU_Min: "Class range minimum",
        gdal.GFU_Max: "Class range maximum", gdal.GFU_MinMax: "Class value",
        gdal.GFU_Red: "Red class color", gdal.GFU_Green: "Green class color",
        gdal.GFU_Blue: "Blue class color", gdal.GFU_Alpha: "Alpha",
        gdal.GFU_RedMin: "Color Range Red Minimum",
        gdal.GFU_GreenMin: "Color Range Green Minimum",
        gdal.GFU_BlueMin: "Color Range Blue Minimum",
        gdal.GFU_AlphaMin: "Color Range Alpha Minimum",
        gdal.GFU_RedMax: "Color Range Red Maximum",
        gdal.GFU_GreenMax: "Color Range Green Maximum",
        gdal.GFU_BlueMax: "Color Range Blue Maximum",
        gdal.GFU_AlphaMax: "Color Range Alpha Maximum"}


[docs]def formatException(code): """ Formats an exception for display and returns string """ import sys import traceback # extract the current traceback and turn it into a list (ttype, value, tb) = sys.exc_info() stack = traceback.extract_tb(tb) # replace all instances of <string> with actual code fixedstack = [] codearr = code.split('\n') for (filename, line, function, text) in stack: if filename == '<string>' and text is None: text = codearr[line - 1] fixedstack.append((filename, line, function, text)) trace = '\n'.join(traceback.format_list(fixedstack)) # if a SyntaxError the error won't be part of the trace if ttype.__name__ == 'SyntaxError' and value.offset is not None: # simulate the offset pointer offset = ' ' * value.offset + '^' value = str(value) + '\n' + value.text + offset # add on the actual exceptions trace = '%s\n%s: %s' % (trace, ttype.__name__, value) return trace
[docs]class ViewerRAT(QObject): """ Represents an attribute table in memory. Has method to read from GDAL. Also will apply a user expression. """ # signals newProgress = pyqtSignal('QString', name='newProgress') newPercent = pyqtSignal(int, name='newPercent') endProgress = pyqtSignal(name='endProgress') def __init__(self): QObject.__init__(self) self.clear() self.count = 0 # is incremented each time attributes read into class # so querywindow can tell if it is new data or not
[docs] def hasAttributes(self): """ Returns True if there are actually attributes in this class """ return self.columnNames is not None
[docs] def getColumnNames(self): "return the column names" return self.columnNames
[docs] def getSaneColumnNames(self, colNameList=None): """ Gets column names made sane. This means adding '_' to Python keywords and replacing spaces with '_' etc """ sane = [] if colNameList is None: colNameList = self.columnNames for colName in colNameList: if keyword.iskeyword(colName): # append an underscore. colName = colName + '_' elif colName.find(' ') != -1: colName = colName.replace(' ', '_') if colName[0].isdigit(): colName = '_' + colName sane.append(colName) return sane
[docs] def getType(self, colName): "return the type for a given column name" return self.columnTypes[colName]
[docs] def getUsage(self, colName): "return the usage for a given column name" return self.columnUsages[colName]
[docs] def getFormat(self, colName): "return the preferred format string for a given column name" return self.columnFormats[colName]
[docs] def setFormat(self, colName, fmt): "replace the format string for a given column name" self.columnFormats[colName] = fmt
[docs] def getNumColumns(self): "get the number of columns" if self.columnNames is not None: return len(self.columnNames) else: return 0
[docs] def getNumRows(self): "get the number of rows" if self.columnNames is not None and len(self.columnNames) > 0: return self.gdalRAT.GetRowCount() else: return 0
[docs] def getCacheObject(self, chunkSize): """ Creates a new cache object to cache chunks of the RAT """ return RATCache(self.gdalRAT, chunkSize)
[docs] def getEntireAttribute(self, colName): """ Reads and entire column (in chunks) and returns a long array with all the data - for colour table use """ colIdx = -1 ncols = self.gdalRAT.GetColumnCount() for col in range(ncols): name = self.gdalRAT.GetNameOfCol(col) if name == colName: colIdx = col break if colIdx == -1: msg = 'unable to find column %s' % colName raise viewererrors.InvalidParameters(msg) return self.gdalRAT.ReadAsArray(colIdx)
[docs] def getLookupColName(self): "Return column to be used to lookup color table" return self.lookupColName
[docs] def setLookupColName(self, name): "Set column to be used to lookup color table" self.lookupColName = name
[docs] def clear(self): """ Removes attributes from this class """ self.columnNames = None # list self.columnTypes = None # dict self.columnUsages = None # dict self.columnFormats = None # dict self.lookupColName = None # string self.gdalRAT = None # object self.redColumnIdx = None # int self.greenColumnIdx = None # int self.blueColumnIdx = None # int self.alphaColumnIdx = None # int self.hasColorTable = False
[docs] def addColumn(self, colname, coltype): """ Adds a new column with the specified name. Pass one of the NEWCOL constants as coltype. """ if self.columnNames is None: msg = 'No valid RAT for this file' raise viewererrors.InvalidDataset(msg) if colname in self.columnNames: msg = 'Already have a column called %s' % colname raise viewererrors.InvalidParameters(msg) self.columnNames.append(colname) if coltype == NEWCOL_INT: self.columnTypes[colname] = gdal.GFT_Integer self.columnFormats[colname] = DEFAULT_INT_FMT elif coltype == NEWCOL_FLOAT: self.columnTypes[colname] = gdal.GFT_Real self.columnFormats[colname] = DEFAULT_FLOAT_FMT elif coltype == NEWCOL_STRING: self.columnTypes[colname] = gdal.GFT_String self.columnFormats[colname] = DEFAULT_STRING_FMT else: msg = 'invalid column type' raise viewererrors.InvalidParameters(msg) # new cols always this type self.columnUsages[colname] = gdal.GFU_Generic self.gdalRAT.CreateColumn(colname, self.columnTypes[colname], self.columnUsages[colname])
[docs] @staticmethod def readColumnName(rat, colName): """ Same as readColumnIndex, but takes a name of column. Returns None if not found. """ colArray = None ncols = rat.GetColumnCount() for col in range(ncols): if rat.GetNameOfCol(col) == colName: colArray = ViewerRAT.readColumnIndex(rat, col) break return colArray
[docs] @staticmethod def readColumnIndex(rat, colIndex): """ Read a column from the rat at index colIndex into a numpy array """ return rat.ReadAsArray(colIndex)
[docs] def readFromGDALBand(self, gdalband, gdaldataset): """ Reads attributes from a GDAL band Does nothing if no attribute table or file not marked as thematic. """ # reset vars self.clear() # have rat and thematic? self.newProgress.emit("Reading Attributes...") rat = gdalband.GetDefaultRAT() thematic = gdalband.GetMetadataItem('LAYER_TYPE') == 'thematic' if rat is not None and rat.GetRowCount() != 0 and thematic: # looks like we have attributes self.count += 1 self.columnNames = [] self.attributeData = {} self.columnTypes = {} self.columnUsages = {} self.columnFormats = {} self.gdalRAT = rat # first get the column names # we do this so we can preserve the order # of the columns in the attribute table ncols = rat.GetColumnCount() percent_per_col = 100.0 / float(ncols) for col in range(ncols): colname = rat.GetNameOfCol(col) self.columnNames.append(colname) dtype = rat.GetTypeOfCol(col) self.columnTypes[colname] = dtype usage = rat.GetUsageOfCol(col) self.columnUsages[colname] = usage # format depdendent on type if dtype == gdal.GFT_Integer: self.columnFormats[colname] = DEFAULT_INT_FMT elif dtype == gdal.GFT_Real: self.columnFormats[colname] = DEFAULT_FLOAT_FMT else: self.columnFormats[colname] = DEFAULT_STRING_FMT self.newPercent.emit(col * percent_per_col) # read in a preferred column order (if any) prefColOrder, lookup = self.readColumnOrderFromGDAL(gdaldataset) if len(prefColOrder) > 0: # rearrange our columns given this self.arrangeColumnOrder(prefColOrder) # see if there is a colour table self.findColorTableColumns() # remember the lookup column if set (None if not) self.lookupColName = lookup self.endProgress.emit()
[docs] def findColorTableColumns(self): """ Update the variables that define which are the columns in the colour table """ col = 0 for colname in self.columnNames: usage = self.columnUsages[colname] if usage == gdal.GFU_Red: self.redColumnIdx = col elif usage == gdal.GFU_Green: self.greenColumnIdx = col elif usage == gdal.GFU_Blue: self.blueColumnIdx = col elif usage == gdal.GFU_Alpha: self.alphaColumnIdx = col col += 1 # if we have all the columns, we have a color table self.hasColorTable = (self.redColumnIdx is not None and self.greenColumnIdx is not None and self.blueColumnIdx is not None and self.alphaColumnIdx is not None)
[docs] def arrangeColumnOrder(self, prefColOrder): """ rearrange self.columnNames given the preferred column order that is passed. Any columns not included in prefColOrder are tacked onto the end. Any columns in prefColOrder that don't exist are ignored. """ newColOrder = [] for pref in prefColOrder: if pref in self.columnNames: newColOrder.append(pref) self.columnNames.remove(pref) # ok all columns in prefColOrder should now have # been added to newColOrder. Add the remaining # values from self.columnNames newColOrder.extend(self.columnNames) # finally clobber the old self.columnNames self.columnNames = newColOrder # this needs to be updated self.findColorTableColumns()
[docs] def getUserExpressionGlobals(self, cache, isselected, queryRow, lastselected=None, colNameList=None): """ Get globals for user in user expression """ if not self.hasAttributes(): msg = 'no attributes to work on' raise viewererrors.AttributeTableTypeError(msg) globaldict = {} # give them access to 'row' which is the row number globaldict['row'] = numpy.arange(self.getNumRows()) # access to 'queryrow' with is the currently queried row globaldict['queryrow'] = queryRow # give them access to 'isselected' which is the currently # selected rows so they can do subselections globaldict['isselected'] = isselected # lastselected if lastselected is not None: globaldict['lastselected'] = lastselected # insert each column into the global namespace # as the array it represents if colNameList is None: colNameList = self.columnNames for colName, saneName in ( zip(colNameList, self.getSaneColumnNames(colNameList))): # use sane names so as not to confuse Python globaldict[saneName] = cache.cacheDict[colName] # give them access to numpy globaldict['numpy'] = numpy return globaldict
[docs] @staticmethod def findVarNamesUsed(expression): """ Work out what variable names are used in the given expression. The variable names are those apart from the special ones provided for in getUserExpressionGlobals(), and is intended to be just those which might be column names. Returns a list of the variable name strings. """ # Just for safety, should never try any where near this many times MAX_TRIES = 10000 numTries = 0 ok = False # Initialize a name space with the special names varDict = {'row': 0, 'queryrow': 0, 'isselected': 0, 'lastselected': 0, 'numpy': numpy} specialNames = list(varDict.keys()) while (not ok and numTries < MAX_TRIES): try: eval(expression, varDict) ok = True except NameError as e: # Some name in the expression was not found. Find that name, # and add it to the dictionary msg = str(e) varName = msg.split()[1].replace("'", "") varDict[varName] = None except Exception: # Ignore all other exceptions. If we got this far, then we have # fixed all the NameError exceptions, and so have all the # required names ok = True numTries += 1 # The eval() call has added __builtins__, so remove it again. # Also remove the special names we started with. varNamesUsed = [varName for varName in list(varDict.keys()) if varName != "__builtins__" and varName not in specialNames] return varNamesUsed
[docs] def evaluateUserSelectExpression(self, expression, isselected, queryRow, lastselected): """ Evaluate a user expression for selection. It is expected that a fragment of numpy code will be passed. numpy is provided in the global namespace. An exception is raised if code is invalid, or does not return an array of bools. """ self.newProgress.emit("Evaluating User Expression...") cache = self.getCacheObject(DEFAULT_CACHE_SIZE) nrows = self.getNumRows() columnsUsed = self.findVarNamesUsed(expression) # create the new selected array the full size of the rat # we will fill in each chunk as we go result = numpy.empty(nrows, dtype=bool) currRow = 0 while currRow < nrows: cache.setStartRow(currRow, colName=columnsUsed) length = cache.getLength() isselectedSub = isselected[currRow:currRow + length] if lastselected is not None: lastselectedSub = lastselected[currRow:currRow + length] else: lastselectedSub = None globaldict = self.getUserExpressionGlobals(cache, isselectedSub, queryRow, lastselectedSub, colNameList=columnsUsed) try: resultSub = eval(expression, globaldict) except Exception: msg = formatException(expression) raise viewererrors.UserExpressionSyntaxError(msg) # check type of result if not isinstance(resultSub, numpy.ndarray): msg = 'must return a numpy array' raise viewererrors.UserExpressionTypeError(msg) if resultSub.dtype.kind != 'b': msg = 'must return a boolean array' raise viewererrors.UserExpressionTypeError(msg) result[currRow:currRow + length] = resultSub currRow += DEFAULT_CACHE_SIZE self.newPercent.emit(int((currRow / nrows) * 100)) self.endProgress.emit() return result
[docs] def evaluateUserEditExpression(self, colName, expression, isselected, queryRow): """ Evaluate a user expression for editing and apply result to rat where isselected == True It is expected that a fragment of numpy code will be passed. numpy is provided in the global namespace. An exception is raised if code is invalid. """ self.newProgress.emit("Evaluating User Expression...") cache = self.getCacheObject(DEFAULT_CACHE_SIZE) nrows = self.getNumRows() currRow = 0 done = False isScalar = False # user code returns a scalar - we # can take shortcuts since not all the cols need to be read while currRow < nrows and not done: # guess the length isselectedSub = isselected[currRow:currRow + DEFAULT_CACHE_SIZE] if isselectedSub.any(): if isScalar: cache.setStartRow(currRow, colName) else: cache.setStartRow(currRow) length = cache.getLength() # re do with correct length isselectedSub = isselected[currRow:currRow + length] globaldict = self.getUserExpressionGlobals(cache, isselectedSub, queryRow) if not isScalar: # can re-use the first result if scalar # all calls should be the same try: resultSub = eval(expression, globaldict) except Exception: msg = formatException(expression) raise viewererrors.UserExpressionSyntaxError(msg) cache.updateColumn(colName, resultSub, isselected) if numpy.isscalar(resultSub): isScalar = True currRow += DEFAULT_CACHE_SIZE self.newPercent.emit(int((currRow / nrows) * 100)) self.endProgress.emit()
[docs] def setColumnToConstant(self, colName, value, isselected): """ Sets whole column to be a constant value (where isselected == True) for keyboard shortcuts etc """ self.newProgress.emit("Evaluating User Expression...") cache = self.getCacheObject(DEFAULT_CACHE_SIZE) nrows = self.getNumRows() currRow = 0 done = False while currRow < nrows and not done: # guess size isselectedSub = isselected[currRow:currRow + DEFAULT_CACHE_SIZE] if isselectedSub.any(): cache.setStartRow(currRow, colName) cache.updateColumn(colName, value, isselected) currRow += DEFAULT_CACHE_SIZE self.newPercent.emit(int((currRow / nrows) * 100)) self.endProgress.emit()
[docs] def writeColumnOrderToGDAL(self, gdaldataset): """ Given a GDAL dataset opened in update mode, writes the currently selected column order to the file (this can be changed by the querywindow) Ideally, this would be the band but writing metadata to the band causes problems with some Imagine files that have been opened in different versions of Imagine. Also writes the lookup column if there is one. """ string = json.dumps(self.columnNames) gdaldataset.SetMetadataItem(VIEWER_COLUMN_ORDER_METADATA_KEY, string) if self.lookupColName is not None: name = str(self.lookupColName) else: # remove it name = '' gdaldataset.SetMetadataItem(VIEWER_COLUMN_LOOKUP_METADATA_KEY, name)
[docs] @staticmethod def readColumnOrderFromGDAL(gdaldataset): """ Reads the column order out of the gdaldataset. Returns empty list if none. Also returns the lookup column """ string = gdaldataset.GetMetadataItem(VIEWER_COLUMN_ORDER_METADATA_KEY) if string is not None and string != '': columns = json.loads(string) else: columns = [] string = gdaldataset.GetMetadataItem(VIEWER_COLUMN_LOOKUP_METADATA_KEY) name = None if string is not None and string != '': name = string return columns, name
[docs]class RATCache(object): """ Class that caches a 'chunk' of the RAT """ def __init__(self, gdalRAT, chunkSize): self.gdalRAT = gdalRAT self.chunkSize = chunkSize self.currStartRow = 0 self.length = 0 self.cacheDict = {}
[docs] def getLength(self): "Return the length of the current RAT chunk" return self.length
[docs] def columnAdded(self, colName): """ Shortcut to be called when a new column added saves having to re-read all the data - just updates the cache with the new data """ ncols = self.gdalRAT.GetColumnCount() for col in range(ncols): name = self.gdalRAT.GetNameOfCol(col) if name == colName: data = self.gdalRAT.ReadAsArray(col, self.currStartRow, self.length) self.cacheDict[name] = data break
[docs] def updateCache(self, colName=None): """ Internal method, called when self.currStartRow changed If colName is None all columns will be updated, if it is a single name or a list of names, then just the named one(s) will be update. """ rowCount = self.gdalRAT.GetRowCount() self.length = self.chunkSize if (self.currStartRow + self.length) > rowCount: self.length = rowCount - self.currStartRow ncols = self.gdalRAT.GetColumnCount() for col in range(ncols): name = self.gdalRAT.GetNameOfCol(col) if colName is None or name == colName or name in colName: data = self.gdalRAT.ReadAsArray(col, int(self.currStartRow), self.length) # for some reason, with HFA this can return None # fake some zero data if data is None: coltype = self.gdalRAT.GetTypeOfCol(col) if coltype == gdal.GFT_Integer: data = numpy.zeros(self.length, dtype=numpy.integer) elif coltype == gdal.GFT_Real: data = numpy.zeros(self.length, dtype=float) else: data = numpy.zeros(self.length, dtype='S10') # write back to file self.gdalRAT.WriteArray(data, col, int(self.currStartRow)) self.cacheDict[name] = data
[docs] def setStartRow(self, startRow, colName=None): """ Call this to set the cache to contain the new data If colName is None all columns will be updated otherwise just the named one """ self.currStartRow = startRow self.updateCache(colName)
[docs] def getValueFromCol(self, colName, row): """ Return the actual value given name of col and a row count based on the full rat """ data = self.cacheDict[colName] return data[row - self.currStartRow]
[docs] def autoScrollToIncludeRow(self, row): """ For calling from GUI. Qt will ask for a given row but we don't want to re-read every time. Most requests will be around a location so we only update when we have to. """ if row >= self.currStartRow and row < (self.currStartRow + self.chunkSize) and len(self.cacheDict) > 0: # no need - already have that data return newStartRow = int(row / self.chunkSize) * self.chunkSize self.setStartRow(newStartRow)
[docs] def updateColumn(self, colName, data, selectionArray): """ New data for a column. selectionArray is the size of the file's RAT. data is just the subset for this cache. Updates only done where selectionArray == True (for the subset we are caching) updates cache and data in file """ if not numpy.isscalar(data) and len(data) != self.length: msg = 'data wrong length' raise viewererrors.AttributeTableTypeError(msg) selectionArraySubset = selectionArray[ self.currStartRow:self.currStartRow + self.length] if not selectionArraySubset.any(): # nothing to be updated return # need to do some massaging based on coltype ncols = self.gdalRAT.GetColumnCount() coltype = gdal.GFT_Integer colIdx = -1 for col in range(ncols): name = self.gdalRAT.GetNameOfCol(col) if name == colName: colIdx = col coltype = self.gdalRAT.GetTypeOfCol(col) break if colIdx == -1: msg = 'unable to find column %s' % colName raise viewererrors.AttributeTableTypeError(msg) if not selectionArraySubset.all(): # some need to be updated # keep old where selectionArray == False olddata = self.cacheDict[colName] try: if coltype == gdal.GFT_Integer: if numpy.isscalar(data): data = int(data) else: data = data.astype(numpy.integer) elif coltype == gdal.GFT_Real: if numpy.isscalar(data): data = float(data) else: data = data.astype(float) else: if numpy.isscalar(data): data = str(data) # there is a slight probo here # where doesn't resize the string # arrays automatically so we manually # convert olddata to a large enough array # for data lendata = len(data) if lendata > olddata.itemsize: vdtype = numpy.dtype('S%d' % lendata) olddata = olddata.astype(vdtype) else: # this converts to a string array # of the right dtype to handle data data = numpy.array(data, dtype=str) except ValueError as e: msg = str(e) raise viewererrors.UserExpressionTypeError(msg) # do the masking # it is assumed this will do the right thing when # string lengths are different data = numpy.where(selectionArraySubset, data, olddata) else: # all new data if numpy.isscalar(data): if coltype == gdal.GFT_Integer: dataarr = numpy.empty(self.length, dtype=numpy.integer) dataarr.fill(data) elif coltype == gdal.GFT_Real: dataarr = numpy.empty(self.length, dtype=float) dataarr.fill(data) else: lendata = len(data) vdtype = numpy.dtype('S%d' % lendata) dataarr = numpy.empty(self.length, dtype=vdtype) dataarr.fill(data) data = dataarr # else: already an array # update cache self.cacheDict[colName] = data # write back to file self.gdalRAT.WriteArray(data, colIdx, self.currStartRow)