Source code for slang.featurizers

"""Featurizers. Functions to get from waveform to feature vectors"""

from numpy import hanning, kaiser, abs, diff, array, std
from numpy.fft import rfft
from functools import wraps, partial
from slang.stypes import Chunk, Chunks, Featurizer


########################################################################################################################
# FFT

DFLT_WIN_FUNC = hanning


[docs]def tile_fft(tile, window=DFLT_WIN_FUNC, amp_function=abs): """Compute the power fft for a single tile """ if callable(window): window = window(len(tile)) fft_amplitudes = amp_function(rfft(tile * window)) return fft_amplitudes
[docs]def identity_func(x): """The identify (a.k.a. transparent) function that returns it's input as is.""" return x
def mk_window_func(window_func, *args, **kwargs): window_wf = window_func(*args, **kwargs) def wf_preproc(wf): return window_wf * wf return wf_preproc mk_window_func.hanning = wraps(hanning)(partial(mk_window_func, hanning)) mk_window_func.kaiser = wraps(kaiser)(partial(mk_window_func, kaiser))
[docs]def mk_wf_to_spectr( preproc: callable = None, fft_func: callable = rfft, postproc: callable = abs, ): """Make a function that computes the spectrogram of a waveform By spectrum, we mean the output of the pipeline: `tile -> preproc -> fft -> postproc -> spectrum Because typically, we preprocess the input waveform (say, transform with a hanning function), and post process the fft (say take the norm of the complex vector). >>> import numpy as np >>> chk = np.ones(2048) # the really interesting waveform we'll use as an example. >>> chk_size = len(chk) >>> >>> wf_to_spectr = mk_wf_to_spectr() # default preproc is None (i.e. the waveform is not preprocessed) >>> s = wf_to_spectr(chk) >>> len(s) 1025 >>> assert s[1] == 0 # the second value is zero (with the hanning window, we wouldn't have that!) >>> >>> wf_to_spectr = mk_wf_to_spectr.w_hanning(chk_size) # let's try the hanning window >>> s = wf_to_spectr(chk) >>> len(s) 1025 >>> assert s[1] > 0 # the second value is non-zero (without the hanning window, we wouldn't have that!) >>> >>> wf_to_spectr = mk_wf_to_spectr.w_kaiser(chk_size, beta=0.0) # use kaiser preproc >>> s = wf_to_spectr(chk) >>> len(s) 1025 >>> assert s[1] == 0 # the second value is zero (because beta=0.0: with the hanning window, we wouldn't have that!) >>> """ if callable(preproc): def wf_to_spectr(wf): return postproc(fft_func(preproc(wf))) else: def wf_to_spectr(wf): return postproc(fft_func(wf)) return wf_to_spectr
def _mk_wf_to_spectr_w_hanning( window_size: int, fft_func: callable = rfft, postproc: callable = abs ): """Make a wf_to_spectr function that uses a hanning window preproc. """ preproc = mk_window_func.hanning(window_size) return mk_wf_to_spectr(preproc, fft_func, postproc) def _mk_wf_to_spectr_w_kaiser( window_size: int, beta: float = 0.0, fft_func: callable = rfft, postproc: callable = abs, ): """Make a wf_to_spectr function that uses a kaiser window preproc. The window_size should be set to the fixed tile (chunk) size you're using. The beta size is set to 0.0. To know what to set, look it up. We're using numpy's kaiser function for that. Here are some tips: ==== ======================= beta Window shape ==== ======================= 0 Rectangular 5 Similar to a Hamming 6 Similar to a Hanning 8.6 Similar to a Blackman ==== ======================= """ preproc = mk_window_func.kaiser(window_size, beta) return mk_wf_to_spectr(preproc, fft_func, postproc) mk_wf_to_spectr.w_hanning = _mk_wf_to_spectr_w_hanning mk_wf_to_spectr.w_kaiser = _mk_wf_to_spectr_w_kaiser DFLT_WF_TO_SPECTR = mk_wf_to_spectr() ######################################################################################################################## # Spectral Projectors from numpy import dot mat_mult = dot
[docs]class NotFittedError(ValueError, AttributeError): ...
# Featurizer class SpectralProjector: def __init__(self, scalings_, chk_to_spectr=DFLT_WF_TO_SPECTR): self.scalings_ = scalings_ self.chk_to_spectr = chk_to_spectr @property def fv_length(self): return self.scalings_.shape[1] def spectr_mat(self, chks: Chunks): return array(list(self.chk_to_spectr(chk) for chk in chks)) def transform(self, chks: Chunks): return mat_mult(self.spectr_mat(chks), self.scalings_) def fv_of_chk(self, chk): return mat_mult(self.chk_to_spectr(chk), self.scalings_) def __call__(self, chk: Chunk): return list(self.chk_to_spectr(chk)) ######################################################################################################################## # Some silly featurizer to play with from bisect import bisect_left def zcr(chk): return sum(diff(array(chk) > 0).astype(int)) / (len(chk) - 1) def rms_zcr(chk): return std(chk), zcr(chk) _n_levels = 10 _zcr_level_dividers = [2 ** (-(x + 1)) for x in range(0, _n_levels - 1)][::-1] _rms_level_dividers = [2048 * 2 ** (-(x + 1)) for x in range(0, _n_levels - 1)][::-1] def _rms_zcr_to_levels(rms, zcr): return ( bisect_left(_rms_level_dividers, rms), bisect_left(_zcr_level_dividers, zcr), ) def rms_zcr_quantizer(fv): rms, zcr = fv rms_level, zero_crossing_level = _rms_zcr_to_levels(rms, zcr) return rms_level * _n_levels + zero_crossing_level DFLT_FEATURIZER = rms_zcr DFLT_QUANTIZER = rms_zcr_quantizer import numpy as np from typing import Union, Iterable def mk_spectral_moment_featurizer( n_moments=100, preproc: callable = None, fft_func: callable = rfft, postproc: callable = abs, ): wf_to_spectr = mk_wf_to_spectr(preproc, fft_func, postproc) moments = np.arange(1, n_moments + 1) n_moments = len(moments) std_exponents = np.arange(1, n_moments + 1) if n_moments > 2: def moment_featurizer(wf): a = wf_to_spectr(wf) a_std = np.std(a) m = moment(a, moments) / (a_std ** std_exponents) m[0] = np.mean(a) m[1] = a_std return m elif n_moments == 2: def moment_featurizer(wf): a = wf_to_spectr(wf) return np.array([np.mean(a), np.std(a)]) elif n_moments == 1: def moment_featurizer(wf): a = wf_to_spectr(wf) return np.array([np.mean(a)]) else: raise ValueError( f'n_moments should be a positive integer. Instead, was {n_moments}' ) return moment_featurizer def _chk_asarray(a, axis): if axis is None: a = np.ravel(a) outaxis = 0 else: a = np.asarray(a) outaxis = axis if a.ndim == 0: a = np.atleast_1d(a) return a, outaxis
[docs]def moment(a, moment: Union[int, Iterable] = 1, axis=0): r"""Calculate the nth moment about the mean for a sample. Taken from scipy.stats Examples -------- >>> moment([1, 2, 3, 4, 5], moment=1) 0.0 >>> moment([1, 2, 3, 4, 5], moment=2) 2.0 """ a, axis = _chk_asarray(a, axis) if a.size == 0: # empty array, return nan(s) with shape matching `moment` if np.isscalar(moment): return np.nan else: return np.full(np.asarray(moment).shape, np.nan, dtype=np.float64) # for array_like moment input, return a value for each. if not np.isscalar(moment): mmnt = [_moment(a, i, axis) for i in moment] return np.array(mmnt) else: return _moment(a, moment, axis)
def _moment(a, moment, axis): if np.abs(moment - np.round(moment)) > 0: raise ValueError('All moment parameters must be integers') if moment == 0: # When moment equals 0, the result is 1, by definition. shape = list(a.shape) del shape[axis] if shape: # return an actual array of the appropriate shape return np.ones(shape, dtype=float) else: # the input was 1D, so return a scalar instead of a rank-0 array return 1.0 elif moment == 1: # By definition the first moment about the mean is 0. shape = list(a.shape) del shape[axis] if shape: # return an actual array of the appropriate shape return np.zeros(shape, dtype=float) else: # the input was 1D, so return a scalar instead of a rank-0 array return np.float64(0.0) else: # Exponentiation by squares: form exponent sequence n_list = [moment] current_n = moment while current_n > 2: if current_n % 2: current_n = (current_n - 1) / 2 else: current_n /= 2 n_list.append(current_n) # Starting point for exponentiation by squares a_zero_mean = a - np.expand_dims(np.mean(a, axis), axis) if n_list[-1] == 1: s = a_zero_mean.copy() else: s = a_zero_mean ** 2 # Perform multiplications for n in n_list[-2::-1]: s = s ** 2 if n % 2: s *= a_zero_mean return np.mean(s, axis)