Usage with other libraries

In this tutorial, we will show how to compute the Triphone ABX error rate within speaker of the 11th layer of HuBERT base, on the dev-clean subset of LibriSpeech. This will show how to use fastabx with various libraries. The only thing to adapt is the feature extraction part, everything else is handled by zerospeech_abx.

In the following examples, the wav files are in the dev-clean directory and the item file is triphone-dev-clean.item.

With torchaudio

import torch
import torchaudio
from fastabx import zerospeech_abx

layer = 11
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
bundle = torchaudio.pipelines.HUBERT_base
model = bundle.get_model().to(device)


def maker(path: str) -> torch.Tensor:
    x, sr = torchaudio.load(str(path))
    assert sr == bundle.sample_rate
    features, _ = model.extract_features(x.to(device))
    return features[layer - 1]


abx = zerospeech_abx(
    "./triphone-dev-clean.item",
    "./dev-clean",
    feature_maker=maker,
    extension=".wav",
)
print(abx)

With S3PRL

import torch
import torchaudio
from fastabx import zerospeech_abx
from s3prl.nn import S3PRLUpstream


layer, sample_rate = 11, 16_000
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = S3PRLUpstream("hubert").eval().to(device)
assert sample_rate // model.downsample_rates[layer] == 50


def maker(path: str) -> torch.Tensor:
    x, sr = torchaudio.load(str(path))
    assert sr == sample_rate
    return model(x.to(device))[layer]


abx = zerospeech_abx(
    "./triphone-dev-clean.item",
    "./dev-clean",
    feature_maker=maker,
    extension=".wav",
)
print(abx)