Source code for shennong.features

"""Provides the `Features` class to manipulate speech features

A `Features` instance is designed to store the features extracted from a single
utterance. It is made of three fields:

- ``data`` is a numpy array storing the underlying features matrix with the
  shape ``(nframes, ndims)``

- ``times`` is a numpy array containg the timestamps for each frame

- ``properties`` is a dictionary containing metadata about the features, such
  as generation processor and parameters, original ausdio file, etc...

A `Features` alone cannot be saved to or loaded from file, it must be
encapsulated into a :class:`~shennong.features_collection.FeaturesCollection`.

Examples
--------

>>> import numpy as np
>>> from shennong import Features

Build a random Features instance with timestamps

>>> feat = Features(np.random.random((5, 2)), np.linspace(0, 4, num=5))
>>> feat.shape
(5, 2)
>>> feat.nframes
5
>>> feat.ndims
2
>>> feat.properties
{}

Copy the features and add some properties to it

>>> feat2 = Features(feat.data, feat.times, properties={'str': 'a', 'int': 0})
>>> feat2.properties
{'str': 'a', 'int': 0}
>>> feat == feat2
False
>>> feat.data == feat2.data
array([[ True,  True],
       [ True,  True],
       [ True,  True],
       [ True,  True],
       [ True,  True]])
>>> feat.times == feat2.times
array([ True,  True,  True,  True,  True])

"""


import copy
import numpy as np


from shennong.logger import get_logger
from shennong.utils import dict_equal


[docs]class Features: """Handles features data with attached timestamps and properties""" def __init__(self, data, times, properties=None, validate=True): self._data = data self._times = times self._properties = {} if properties is None else properties # make sure the features are in a valid state if validate is True: self.validate() @property def data(self): """The underlying features data as a numpy matrix""" return self._data @property def times(self): """The frames timestamps on the vertical axis""" return self._times @property def dtype(self): """The type of the features data samples""" return self.data.dtype @property def shape(self): """The shape of the features data, as (nframes, ndims)""" return self.data.shape @property def ndims(self): """The number of dimensions of a features frame (feat.shape[1])""" return self.shape[1] @property def nframes(self): """The number of features frames (feat.shape[0])""" return self.shape[0] @property def properties(self): """A dictionnary of properties used to build the features Properties are references to the features extraction pipeline, parameters and source audio file used to generate the features. """ return self._properties def _to_dict(self, with_properties=True): """Returns the features as a dictionary Returns ------- features : dict A dictionary with the following keys: 'data', 'times' and optional 'properties'. """ features = {} features['data'] = self.data features['times'] = self.times if with_properties: features['properties'] = self.properties return features @staticmethod def _from_dict(features, validate=True): """Return an instance of Features loaded from a dictionary Parameters ---------- features : dict The dictionary to load the features from. Must have the following keys: 'data', 'times' and optional 'properties'. validate : bool, optional When True, validate the features before returning. Default to True Returns ------- An instance of ``Features`` Raises ------ ValueError If the ``features`` don't have the requested keys or if the underlying features data is not valid. """ requested_keys = {'data', 'times'} missing_keys = requested_keys - set(features.keys()) if missing_keys: raise ValueError( 'cannot read features from dict, missing keys: {}' .format(', '.join(missing_keys))) properties = features['properties'] if 'properties' in features else {} return Features( features['data'], features['times'], properties=properties, validate=validate) def __eq__(self, other): """Returns True if `self` is equal `other`, False otherwise""" # object identity if self is other: return True # quick tests on attributes if self.shape != other.shape or self.dtype != other.dtype: return False # properties equality if not dict_equal(self.properties, other.properties): return False # timestamps equality if not np.array_equal(self.times, other.times): return False # features matrices equality if not np.array_equal(self.data, other.data): return False return True
[docs] def is_close(self, other, rtol=1e-5, atol=1e-8): """Returns True if `self` is approximately equal to `other` Parameters ---------- other : Features The Features instance to be compared to this one rtol : float, optional Relative tolerance atol : float, optional Absolute tolerance Returns ------- equal : bool True if these features are almost equal to the `other` See Also -------- FeaturesCollection.is_close, numpy.allclose """ if self is other: return True if self.shape != other.shape: return False if not dict_equal(self.properties, other.properties): return False if not np.array_equal(self.times, other.times): return False if not np.allclose(self.data, other.data, atol=atol, rtol=rtol): return False return True
[docs] def copy(self, dtype=None, subsample=None): """Returns a copy of the features Allocates new arrays for data, times and properties Parameters ---------- dtype : type, optional When specified converts the data and times arrays to the requested `dtype` subsample : int, optional When specified subsample the features every `subsample` frames. When not specified do not do subsampling. Raises ------ ValueError If `subsample` is defined but is not a strictly positive integer. Returns ------- features : Features A new instance of Features copied from this one. """ # by default we do not subsample if subsample is None: subsample = 1 else: if not isinstance(subsample, int) or subsample <= 0: raise ValueError( f'subsample must be a strictly positive integer, ' f'it is: {subsample}') if dtype: return Features( self.data[0:self.nframes:subsample].astype(dtype), self.times[0:self.nframes:subsample].astype(dtype), properties=copy.deepcopy(self.properties), validate=False) return Features( self.data[0:self.nframes:subsample].copy(), self.times[0:self.nframes:subsample].copy(), properties=copy.deepcopy(self.properties), validate=False)
[docs] def is_valid(self): """Returns True if the features are in a valid state Returns False otherwise. Consistency is checked for features's data, times and properties. See Also -------- Features.validate """ try: self.validate() except ValueError: return False return True
[docs] def validate(self): """Raises a ValueError if the features are not in a valid state""" # accumulate detected errors and display them at the end errors = [] # basic checks on types if not isinstance(self.data, np.ndarray): errors.append('data must be a numpy array') if not isinstance(self.times, np.ndarray): errors.append('times must be a numpy array') if not isinstance(self.properties, dict): errors.append('properties must be a dictionnary') if errors: raise ValueError( 'invalid features data types: {}'.format(', '.join(errors))) # check arrays dimensions if not self.data.ndim == 2: errors.append( 'data dimension must be 2 but is {}'.format(self.data.ndim)) if self.times.ndim > 2: errors.append( 'times dimension must be 1 or 2 but is {}'.format( self.times.ndim)) if self.times.ndim == 2 and self.times.shape[1] != 2: errors.append('times shape[1] must be 2, it is {}'.format( self.times.shape[1])) nframes1 = self.data.shape[0] nframes2 = self.times.shape[0] if not nframes1 == nframes2: errors.append( 'mismatch in number of frames: {} for data but {} ' 'for times'.format(nframes1, nframes2)) if errors: raise ValueError( 'invalid features dimensions: {}'.format(', '.join(errors))) # check if time is increasing. This check comes from # h5features/labels.py index = (np.argsort(self.times) if self.times.ndim == 1 else np.lexsort(self.times.T)) if not all(n == index[n] for n in range(self.nframes)): raise ValueError('times is not sorted in increasing order') # check all values in array are finite (not infinity nor nan) if not np.all(np.isfinite(self.data)): raise ValueError( 'data contains non-finite numbers (nan of infinity)')
[docs] def concatenate( self, other, tolerance=0, log=get_logger('features', 'info')): """Returns the concatenation of this features with `other` Build a new Features instance made of the concatenation of this instance with the other instance. Their `times` must be the equal. Parameters ---------- other : Features, shape = [nframes +/- tolerance, ndim2] The other features to concatenate at the end of this one tolerance : int, optional If the number of frames of the two features is different, trim the longest one up to a frame difference of `tolerance`, otherwise raise a ValueError. This option is usefull when concatenating pitch with other 'standard' features because pitch processing includes a downsampling which can alter the resulting number of frames (the same tolerance is applied in Kaldi, e.g. in paste-feats). Default to 0. log : logging.Logger, optional Where to send log messages Returns ------- features : Features, shape = [nframes +/- tolerance, ndim1 + ndim2] Raises ------ ValueError If `other` cannot be concatenated because of inconsistencies: number of frames difference greater than tolerance, inequal times values. """ # check the number of frames is within the tolerance need_trim = False diff = abs(self.nframes - other.nframes) if diff: if not tolerance: raise ValueError( 'features have a different number of frames') if tolerance and diff > tolerance: raise ValueError( 'features differs number of frames, and ' 'greater than tolerance: |{} - {}| > {}'.format( self.nframes, other.nframes, tolerance)) log.warning( 'features differs in number of frames, but ' 'within tolerance (|%s - %s| <= %s), trim the longest one', self.nframes, other.nframes, tolerance) need_trim = True # trim the longest features to the size of the shortest one data1 = self.data data2 = other.data times1 = self.times times2 = other.times if need_trim: if self.nframes > other.nframes: data1 = data1[:-diff] times1 = times1[:-diff] else: data2 = data2[:-diff] times2 = times2[:-diff] # ensures time axis is shared accross the two features if not np.allclose(times1, times2): raise ValueError('times are not equal') # merge properties of the two features properties = copy.deepcopy(self.properties) other_properties = copy.deepcopy(other.properties) properties.update( {k: v for k, v in other_properties.items() if k != 'pipeline'}) if 'pipeline' not in properties: properties['pipeline'] = [] if 'pipeline' in other_properties: for k in other_properties['pipeline']: properties['pipeline'].append(k) columns = properties['pipeline'][-1]['columns'] properties['pipeline'][-1]['columns'] = [ columns[0] + self.ndims, columns[1] + self.ndims] return Features( np.hstack((data1, data2)), times1, properties=properties)