"""Base classes for all shennong components"""
import abc
import collections
import inspect
from shennong.logger import get_logger
[docs]class BaseProcessor:
"""Base class for all processors in shennong
Notes
-----
All processors should specify all the parameters that can be set
at the class level in their ``__init__`` as explicit keyword
arguments (no ``*args`` or ``**kwargs``).
The methods :meth:`get_params` and :meth:`set_params` are adapted
from :class:`sklearn.base.BaseEstimator`
"""
def __init__(self):
self._logger = get_logger(self.name, level='info')
def __repr__(self):
return self.__class__.__name__
@abc.abstractproperty
def name(self):
"""Processor name"""
@property
def log(self):
"""Processor logger"""
return self._logger
[docs] def set_logger(self, level,
formatter='%(levelname)s - %(name)s - %(message)s'):
"""Change level and/or format of the processor's logger
Parameters
----------
level : str
The minimum log level handled by the logger (any message above this
level will be ignored). Must be 'debug', 'info', 'warning' or
'error'.
formatter : str, optional
A string to format the log messages, see
https://docs.python.org/3/library/logging.html#formatter-objects.
By default display level and message. Use '%(asctime)s -
%(levelname)s - %(name)s - %(message)s' to display time, level,
name and message.
"""
self._logger = get_logger(self.name, level=level, formatter=formatter)
@classmethod
def _get_param_names(cls):
"""Get parameter names for the processor"""
# fetch the constructor or the original constructor before
# deprecation wrapping if any
init = getattr(cls.__init__, 'deprecated_original', cls.__init__)
if init is object.__init__: # pragma: nocover
# No explicit constructor to introspect
return []
# introspect the constructor arguments to find the model parameters
# to represent
init_signature = inspect.signature(init)
# Consider the constructor parameters excluding 'self'
parameters = [p for p in init_signature.parameters.values()
if p.name != 'self' and p.kind != p.VAR_KEYWORD]
for param in parameters:
if param.kind == param.VAR_POSITIONAL:
raise RuntimeError(
f'shennong processors should always '
f'specify their parameters in the signature '
f'of their __init__ (no varargs). '
f'{cls} with constructor {init_signature} does not '
f'follow this convention.')
# Extract and sort argument names excluding 'self'
return sorted([p.name for p in parameters])
[docs] def get_params(self, deep=True):
"""Get parameters for this processor.
Parameters
----------
deep : boolean, optional
If True, will return the parameters for this processor and
contained subobjects that are processors. Default to True.
Returns
-------
params : mapping of string to any
Parameter names mapped to their values.
"""
out = dict()
for key in self._get_param_names():
value = getattr(self, key, None)
if deep and hasattr(value, 'get_params'):
deep_items = value.get_params().items()
out.update((key + '__' + k, val) for k, val in deep_items)
out[key] = value
return out
[docs] def set_params(self, **params):
"""Set the parameters of this processor.
Returns
-------
self
Raises
------
ValueError
If any given parameter in ``params`` is invalid for the
processor.
"""
if not params:
# Simple optimization to gain speed (inspect is slow)
return self
valid_params = self.get_params(deep=True)
nested_params = collections.defaultdict(dict) # grouped by prefix
for key, value in params.items():
key, delim, sub_key = key.partition('__')
if key not in valid_params:
raise ValueError(
f'invalid parameter {key} for processor {self}, '
f'check the list of available parameters '
f'with `processor.get_params().keys()`.')
if delim:
nested_params[key][sub_key] = value
else:
try:
setattr(self, key, value)
except AttributeError:
raise ValueError(f'cannot set attribute {key} for {self}')
valid_params[key] = value
for key, sub_params in nested_params.items():
valid_params[key].set_params(**sub_params)
return self