Source code for ABXpy.sideop.filter_manager

# make sure the rest of the ABXpy package is accessible

import ABXpy.sideop.side_operations_manager as side_operations_manager
import ABXpy.dbfun.dbfun_compute as dbfun_compute
import ABXpy.dbfun.dbfun_lookuptable as dbfun_lookuptable
import ABXpy.dbfun.dbfun_column as dbfun_column

import numpy as np


[docs]class FilterManager(side_operations_manager.SideOperationsManager): """Manage the filters on attributes (on, across, by) or elements (A, B, X) for further processing""" def __init__(self, db_hierarchy, on, across, by, filters): side_operations_manager.SideOperationsManager.__init__( self, db_hierarchy, on, across, by) # this case is specific to filters, it applies a generic filter to the # database before considering A, B and X stuff. self.generic = [] # associate each of the provided filters to the appropriate point in # the computation flow # filt can be: the name of a column of the database (possibly # extended), the name of lookup file, the name of a script, a script # under the form of a string (that doesnt end by .dbfun...) for filt in filters: # instantiate appropriate dbfun if filt in self.extended_cols: # column already in db db_fun = dbfun_column.DBfun_Column(filt, indexed=False) # evaluate context is wasteful in this case... not even # necessary to have a dbfun at all elif len(filt) >= 6 and filt[-6:] == '.dbfun': # lookup table # ask for re-interpreted indexed outputs db_fun = dbfun_lookuptable.DBfun_LookupTable( filt, indexed=False) else: # on the fly computation db_fun = dbfun_compute.DBfun_Compute(filt, self.extended_cols) self.add(db_fun)
[docs] def classify_generic(self, elements, db_fun, db_variables): # check if there are only non-extended names and, only if this is the # case, instantiate 'generic' field of db_variables if {s for r, s in elements} == set(['']): db_variables['generic'] = set(elements) self.generic.append(db_fun) self.generic_context['generic'].update(db_variables['generic']) elements = {} return elements, db_variables
[docs] def by_filter(self, by_values): return singleton_filter(self.evaluate_by(by_values))
[docs] def generic_filter(self, by_values, db): return db.iloc[vectorial_filter(lambda context: self.evaluate_generic(by_values, db, context), np.arange(len(db)))]
[docs] def on_across_by_filter(self, on_across_by_values): return singleton_filter(self.evaluate_on_across_by(on_across_by_values))
[docs] def A_filter(self, on_across_by_values, db, indices): # Caution: indices contains db-related indices # but the returned result contains indices with respect to indices indices_ind = np.arange(len(indices)) return vectorial_filter(lambda context: self.evaluate_A(on_across_by_values, db, indices, context), indices_ind)
[docs] def B_filter(self, on_across_by_values, db, indices): # Caution: indices contains db-related indices # but the returned result contains indices with respect to indices indices_ind = np.arange(len(indices)) return vectorial_filter(lambda context: self.evaluate_B(on_across_by_values, db, indices, context), indices_ind)
[docs] def X_filter(self, on_across_by_values, db, indices): # Caution: indices contains db-related indices # but the returned result contains indices with respect to indices indices_ind = np.arange(len(indices)) return vectorial_filter(lambda context: self.evaluate_X(on_across_by_values, db, indices, context), indices_ind)
[docs] def ABX_filter(self, on_across_by_values, db, triplets): # triplets contains db-related indices # the returned result contains indices with respect to triplets indices = np.arange(len(triplets)) return vectorial_filter(lambda context: self.evaluate_ABX(on_across_by_values, db, triplets, context), indices)
[docs]def singleton_filter(generator): keep = True for result in generator: if not(result): keep = False break return keep
[docs]def vectorial_filter(generator, indices): """ .. note:: To allow a lazy evaluation of the filter, the context is filtered explicitly which acts on the generator by a side-effect (dict being mutable in python) """ kept = np.array(indices) context = {} for result in generator(context): still_up = np.where(result)[0] kept = kept[still_up] for var in context: # keep testing only the case that are still possibly True context[var] = [context[var][e] for e in still_up] # FIXME wouldn't using only numpy arrays be more performant ? if not(kept.size): break return kept