Source code for shennong.features.processor.filterbank

"""Provides the FilterbankProcessor class to extract filterbank features

Extract mel-filterbank features from an audio signal. Use the Kaldi
implementation (see [kaldi-fbank]_).

    :class:`~shennong.audio.Audio` ---> FilterbankProcessor \
    ---> :class:`~shennong.features.features.Features`


Examples
--------

>>> from shennong.audio import Audio
>>> from shennong.features.processor.filterbank import FilterbankProcessor
>>> audio = Audio.load('./test/data/test.wav')

Initialize the filterbank processor with some options and compute the
features:

>>> processor = FilterbankProcessor(sample_rate=audio.sample_rate)
>>> processor.use_energy = False
>>> fbank = processor.process(audio)
>>> fbank.shape
(140, 23)

Using energy adds a column to the output:

>>> processor.use_energy = True
>>> fbank = processor.process(audio)
>>> fbank.shape
(140, 24)

References
----------

.. [kaldi-fbank] http://kaldi-asr.org/doc/structkaldi_1_1FbankOptions.html

"""

import kaldi.feat.fbank
import kaldi.matrix

from shennong.features.processor.base import MelFeaturesProcessor


[docs]class FilterbankProcessor(MelFeaturesProcessor): """Mel-filterbank features""" def __init__(self, sample_rate=16000, frame_shift=0.01, frame_length=0.025, dither=1.0, preemph_coeff=0.97, remove_dc_offset=True, window_type='povey', round_to_power_of_two=True, blackman_coeff=0.42, snip_edges=True, num_bins=23, low_freq=20, high_freq=0, vtln_low=100, vtln_high=-500, use_energy=False, energy_floor=0.0, raw_energy=True, htk_compat=False, use_log_fbank=True, use_power=True): # Forward options to MelFeaturesProcessor super().__init__( sample_rate=sample_rate, frame_shift=frame_shift, frame_length=frame_length, dither=dither, preemph_coeff=preemph_coeff, remove_dc_offset=remove_dc_offset, window_type=window_type, round_to_power_of_two=round_to_power_of_two, blackman_coeff=blackman_coeff, snip_edges=snip_edges, num_bins=num_bins, low_freq=low_freq, high_freq=high_freq, vtln_low=vtln_low, vtln_high=vtln_high) self._options = kaldi.feat.fbank.FbankOptions() self._options.frame_opts = self._frame_options self._options.mel_opts = self._mel_options self.use_energy = use_energy self.energy_floor = energy_floor self.raw_energy = raw_energy self.htk_compat = htk_compat self.use_log_fbank = use_log_fbank self.use_power = use_power self._kaldi_processor = kaldi.feat.fbank.Fbank @property def name(self): return 'filterbank' @property def use_energy(self): """Add an extra dimension with energy to the filterbank output""" return self._options.use_energy @use_energy.setter def use_energy(self, value): self._options.use_energy = value @property def energy_floor(self): """Floor on energy (absolute, not relative) in filterbanks""" return self._options.energy_floor @energy_floor.setter def energy_floor(self, value): self._options.energy_floor = value @property def raw_energy(self): """If true, compute energy before preemphasis and windowing""" return self._options.raw_energy @raw_energy.setter def raw_energy(self, value): self._options.raw_energy = value @property def htk_compat(self): """If True, get closer to HTK filterbank features. Put energy last. Warning: Not sufficient to get HTK compatible features (need to change other parameters) """ return self._options.htk_compat @htk_compat.setter def htk_compat(self, value): self._options.htk_compat = value @property def use_log_fbank(self): """If true, produce log-filterbank, else produce linear""" return self._options.use_log_fbank @use_log_fbank.setter def use_log_fbank(self, value): self._options.use_log_fbank = value @property def use_power(self): """If true, use power, else use magnitude""" return self._options.use_power @property def ndims(self): if self.use_energy: return self.num_bins + 1 return self.num_bins @use_power.setter def use_power(self, value): self._options.use_power = value