Source code for ABXpy.dbfun.dbfun_compute

# -*- coding: utf-8 -*-
"""
Created on Mon Oct 14 16:59:27 2013

@author: Thomas Schatz

Class for defining and computing efficiently functions of the columns of a
database.
Implements the DBfun API
"""

import ast
import sys

# Only solution I found for circular
# imports in both Python 2 and 3
from . import *
from . import dbfun


# FIXME remove dbfun prefix from dbfun_lookuptable and dbfun_connector ?
[docs]class DBfun_Compute(dbfun.DBfun): def __init__(self, definition, columns): self.columns = set(columns) # set script if len(definition) >= 3 and definition[-3:] == '.py': with open(definition) as script_file: self.script = script_file.read() else: self.script = definition self.parse() # FIXME allow users to specify the content of n_outputs and/or # output_names from command-line (currently it will always be 1, # None...) self.n_outputs = 1 self.output_names = None # slow but not likely to be critical
[docs] def parse(self): """ first separate the script defining the function into various components (import statements, with 'h5file' statement, synopsis definition, main code) """ tree = ast.parse(self.script) # find and extract imports imports = [stat for stat in tree.body if isinstance( stat, (ast.ImportFrom, ast.Import))] rest = [stat for stat in tree.body if not( isinstance(stat, (ast.ImportFrom, ast.Import)))] # store ast with import statements # can be executed later using: exec(self.import_bytecode) if sys.version_info >= (3, 8): # ast.Module spec changed in python-3.8 self.import_bytecode = compile(ast.Module(imports, []), '', mode='exec') tree = ast.Module(rest, []) else: self.import_bytecode = compile(ast.Module(imports), '', mode='exec') tree = ast.Module(rest) # look for a with statement with a string, store context info and # remove with statement tree = self.process_with(tree) # check that last line is an expression expression = tree.body[-1] if not(isinstance(expression, ast.Expr)): raise ValueError( 'The following script should finish by an expression: %s' % self.script) # store what is left self.main_ast = tree """ second find column names in the main code so as to determine what context is used and check coherence """ # find the list of the names of the variables in the main ast visitor = nameVisitor() # see definition of this class below visitor.visit(self.main_ast) names = set(visitor.names) # FIXME could add a check that all names correspond either to a bound # variable or is in self.columns # need a way to get a list of unbound variable ???? # then would raise ValueError('There are unbound variables in script # %s' % self.script) # For now: just consider that the inputs are the intersection of the # element of names and of self.columns # FIXME document that this means that using local variables with the # same name as db_columns in the scripts will affect the synopsis of # the dbfun ... self.input_names = list(names.intersection(self.columns)) """ third parse final expression and additional code asts for nodes that involve aliases of the aux_files, get the hierarchy of calls to aux_files, check that it is flat, compile the corresponding partial bytecodes and collect all the info necessary for efficient evaluation of the code """ if self.aux_functions: connector = lookuptable_connector.LookupTableConnector( self.script, self.aliases) connector.visit(self.main_ast) # self.code_nodes contains a list of dictionaries containing the # info for the various calls to function defined in auxiliary h5 # files # each dictionry contains entries: 'child_asts', 'varname' and # 'function' and optionnally 'output_cols' self.code_nodes = self.main_ast.code_nodes # clean the main ast del(self.main_ast.code_nodes) else: self.code_nodes = [] # compile all asts, with final expr apart # without the .value you only get a ast.Expr instead of getting the # actual expr self.final_ast = ast.Expression(self.main_ast.body[-1].value) self.main_ast.body = self.main_ast.body[:-1] for dico in self.code_nodes: bytecodes = [] for child in dico['child_asts']: bytecodes.append(compile(child, '', mode='eval')) dico['child_bytecodes'] = bytecodes self.main_bytecode = compile(self.main_ast, '', mode='exec') self.final_bytecode = compile(self.final_ast, '', mode='eval')
# just an auxiliary function for parse, dealing with 'with h5file' # statements
[docs] def process_with(self, tree): self.aux_files = [] self.aliases = [] self.aux_functions = [] withs = [stat for stat in tree.body if isinstance(stat, ast.With)] kept = [] for i, w in enumerate(withs): if isinstance(w.context_expr, ast.Str): kept.append((i, w)) if len(kept) > 1: raise ValueError( 'There is more than one with statement for re-using auxiliary' ' ABX files in script: %s' % self.script) if len(kept) == 1: # find the h5 files and aliases s = kept[0][1] while (isinstance(s, ast.With) and isinstance(w.context_expr, ast.Str)): self.aux_files.append(s.context_expr.s) self.aliases.append(s.optional_vars.id) s = s.body[0] # remove with statement from ast stats = [] with_i = 0 for stat in tree.body: if isinstance(stat, ast.With): if with_i == kept[0][0]: stats.append(s) else: stats.append(stat) with_i = with_i + 1 else: stats.append(stat) tree = ast.Module(stats) # instantiate corresponding DBfun_LookupTables: for f in self.aux_files: self.aux_functions.append( dbfun_lookuptable.DBfun_LookupTable(f, indexed=False)) return tree
# FIXME if there is any sense in having indexed outputs for dbfun_compute, # implement it
[docs] def output_specs(self): return self.n_outputs, self.output_names, {}
# function for evaluating the column function given data for the context # context is a dictionary with just the right name/content associations
[docs] def evaluate(self, context): # set up context ns_local = context ns_global = {} # exec imports in that context exec(self.import_bytecode, ns_global, ns_local) # evaluate the calls to aux functions for node in self.code_nodes: # evaluate the arguments to the call aux_context = {} args = node['function'].in_names for code, arg in zip(node['child_bytecodes'], args): aux_context[arg] = eval(code, ns_global, ns_local) # call the aux function and assign it in the main namespace ns_local[node['varname']] = node['function'].evaluate(aux_context) # FIXME if aux files, could use the output_cols here ? and maybe # need to do it also in direct case for consistency ? # also is output format for vlen output going to work ? # exec main_bytecode exec(self.main_bytecode, ns_global, ns_local) return eval(self.final_bytecode, ns_global, ns_local)
# visitor class for getting the list of the names of the variables in expr # (minus the import statements)
[docs]class nameVisitor(ast.NodeVisitor): def __init__(self, *args, **kwargs): ast.NodeVisitor.__init__(self, *args, **kwargs) self.names = []
[docs] def visit_Name(self, node): self.names.append(node.id)