"""Core functionality"""
# from sklearn.cluster import KMeans
# from functools import partial
from warnings import warn
from typing import Callable, Any, Optional, Mapping
from slang.stypes import (
Waveform,
Chunk,
Chunker,
Featurizer,
Quantizer,
Snip,
Snips,
FVs,
)
from slang.chunkers import DFLT_CHUNKER
from slang.featurizers import DFLT_FEATURIZER, DFLT_QUANTIZER
WfCallback = Optional[Callable[[Waveform], Any]]
class TagWfStore:
def __init__(self, wf_store, key_to_tag=None, key_filt=None):
self.wf_store = wf_store
self.key_to_tag = key_to_tag
self.key_filt = key_filt
def key_wf_gen(self):
pass
def wfs(self):
pass
def tag_wfs(self):
pass
from py2store.util import lazyprop
class WfSource:
def __init__(self, wfs, key_filt=None):
self.wfs = wfs
self.key_filt = key_filt
@lazyprop
def keys(self):
return tuple(filter(self.key_filt, self.wfs))
def wf_gen(self):
for k in self.keys:
yield self.wfs[k]
def key_wf_gen(self):
for k in self.keys:
yield k, self.wfs[k]
class AnnotedWfSource(WfSource):
def __init__(self, wfs, annots=None, key_filt=None):
super().__init__(wfs, key_filt)
self.annots = annots
@lazyprop
def keys(self):
annots_keys = set(self.annots)
return tuple([k for k in super().keys() if k in annots_keys])
def annots_gen(self):
for k in self.keys:
yield self.annots[k]
def annot_wf_gen(self):
for k in self.keys:
yield self.annots[k], self.wfs[k]
def key_annot_wf_gen(self):
for k in self.keys:
yield self.annots[k], self.wfs[k]
# Notes:
"""
- key_to_tag not general enough because tag not general enough. Sometimes it's not a categorical.
Sometimes it's multiple. Sometimes we have context information that needs to be associated with the annot.
"""
class KvDataSource:
def __init__(self, kv_store, key_to_tag=None, key_filt=None):
self.kv_store = kv_store
self.key_to_tag = key_to_tag
self.key_filt = key_filt
def key_wf_gen(self):
kv_store = self.kv_store
for k in filter(self.key_filt, self.kv_store):
yield k, kv_store[k]
def key_tag_wf_gen(
self, wf_callback: WfCallback = None, iterate_over_wf_callaback_output=False
):
assert (
self.key_to_tag is not None
), 'You need to have a key_to_tag function to do that!'
if wf_callback is None:
for k, wf in self.key_wf_gen():
yield k, self.key_to_tag(k), wf
else:
assert callable(wf_callback), 'wf_callback needs to be callable.'
if iterate_over_wf_callaback_output:
for k, wf in self.key_wf_gen():
tag = self.key_to_tag(k)
for x in wf_callback(wf):
yield k, tag, x
else:
for k, wf in self.key_wf_gen():
yield k, self.key_to_tag(k), wf_callback(wf)
def key_tag_chks_gen(self, wf_to_chk):
yield from self.key_tag_wf_gen(
wf_callback=wf_to_chk, iterate_over_wf_callaback_output=True
)
def key_tag_fvs_gen(self, wf_to_chk, chk_to_fv):
wf_to_fv = lambda wf: list(map(chk_to_fv, wf_to_chk(wf)))
yield from self.key_tag_wf_gen(
wf_callback=wf_to_fv, iterate_over_wf_callaback_output=True
)
def key_tag_snips_gen(self, wf_to_chk, chk_to_fv, fv_to_snip):
def wf_to_snips(wf):
return list(fv_to_snip(chk_to_fv(chk)) for chk in wf_to_chk(wf))
yield from self.key_tag_wf_gen(
wf_callback=wf_to_snips, iterate_over_wf_callaback_output=True
)
def key_chks_gen(self, wf_to_chk):
for k, wf in self.key_wf_gen():
for chk in wf_to_chk(wf):
yield k, chk
def key_fvs_gen(self, wf_to_chk, chk_to_fv):
for k, chk in self.key_chks_gen(wf_to_chk):
yield k, chk_to_fv(chk)
def key_snips_gen(self, wf_to_chk, chk_to_fv, fv_to_snip):
for k, fv in self.key_fvs_gen(wf_to_chk, chk_to_fv):
yield k, fv_to_snip(fv)
def chk_tag_pairs(self, wf_to_chks):
return ((chk, tag) for _, tag, chk in self.key_tag_chks_gen(wf_to_chks))
def fv_tag_pairs(self, wf_to_chks, chk_to_fv):
return ((fv, tag) for _, tag, fv in self.key_tag_fvs_gen(wf_to_chks, chk_to_fv))
[docs]class Snipper:
"""A base class that implements the wf->chk->fv->snip pipeline.
Default functions for wf_to_chk (a.k.a. chunker), chk_to_fv (a.k.a. featurizer) and fv_to_snip (a.k.a. nipper)
are given, but usually the user should specify these, and usually these are learned from data.
"""
def __init__(
self,
wf_to_chks: Chunker = DFLT_CHUNKER,
chk_to_fv: Featurizer = DFLT_FEATURIZER,
fv_to_snip: Quantizer = DFLT_QUANTIZER,
):
self.wf_to_chks = wf_to_chks
self.chk_to_fv = chk_to_fv
self.fv_to_snip = fv_to_snip
def wf_to_fvs(self, wf: Waveform) -> FVs:
for chk in self.wf_to_chks(wf):
yield self.chk_to_fv(chk)
def chk_to_snip(self, chk: Chunk) -> Snip:
return self.fv_to_snip(self.chk_to_fv(chk))
def wf_to_snips(self, wf: Waveform) -> Snips:
for chk in self.wf_to_chks(wf):
fv = self.chk_to_fv(chk)
yield self.fv_to_snip(fv)
snips_of_wf = wf_to_snips # alias for back-compatibility
# Delegations ##################
# TODO: Get delegated attrs to show up as full fledged attr (tab complete, etc.)
_delegations = dict(
stats_of_snip=('fv_to_snip', 'stats_of_snip'),
fv_of_snip=('fv_to_snip', 'fv_of_snip'),
alphabet_size=('fv_to_snip', 'alphabet_size'),
)
def __getattr__(self, attr):
"""Delegate method to wrapped store if not part of wrapper store methods"""
attr_spec = self._delegations.get(attr, None)
if attr_spec is not None:
a = self
for aa in attr_spec:
a = getattr(a, aa)
return a
else:
raise AttributeError(f'Unknown attribute: {attr}')
# TODO: Revise approach here (see above)
# lazyprop or property?
# getattr or self.attr (with try/catch or not)
# self.metric[snip] or self.all_metrics[snip][metric] or self.all_metrics[metric][snip] or self.all.metric[snip]
# TODO: Delegate multiple at once?
# @lazyprop
# def stats_of_snip(self) -> Mapping:
# """(Lazy) property that holds the dict of snip stats"""
# return getattr(self.fv_to_snip, 'stats_of_snip', dict()) # look for them in fv_to_snip or return empty dict
#
# @property
# def fv_of_snip(self) -> Mapping:
# """property that holds the dict of snip stats"""
# return getattr(self.fv_to_snip, 'fv_of_snip', dict()) # look for them in fv_to_snip or return empty dict
@property
def alphabet_size(self) -> int:
return self.fv_to_snip.alphabet_size
def __call__(self, wf: Waveform) -> Snips:
return self.wf_to_snips(wf)