Source code for speechbrain.augment.preparation

"""Library for Downloading and Preparing Datasets for Data Augmentation,
This library provides functions for downloading datasets from the web and
preparing the necessary CSV data manifest files for use by data augmenters.

Authors:
* Mirco Ravanelli 2023

"""

import os
import logging
import torchaudio
from speechbrain.utils.data_utils import download_file
from speechbrain.utils.data_utils import get_all_files

# Logger init
logger = logging.getLogger(__name__)


[docs] def prepare_dataset_from_URL(URL, dest_folder, ext, csv_file, max_length=None): """Downloads a dataset containing recordings (e.g., noise sequences) from the provided URL and prepares the necessary CSV files for use by the noise augmenter. Arguments --------- URL: str The URL of the dataset to download. dest_folder : str The local folder where the noisy dataset will be downloaded. ext: str File extensions to search for within the downloaded dataset. csv_file : str The path to store the prepared noise CSV file. max_length : float The maximum length in seconds. Recordings longer than this will be automatically cut into pieces. """ # Download and unpack if necessary data_file = os.path.join(dest_folder, "data.zip") if not os.path.isdir(dest_folder): download_file(URL, data_file, unpack=True) else: download_file(URL, data_file) # Prepare noise csv if necessary if not os.path.isfile(csv_file): filelist = get_all_files(dest_folder, match_and=["." + ext]) prepare_csv(filelist, csv_file, max_length)
[docs] def prepare_csv(filelist, csv_file, max_length=None): """Iterate a set of wavs and write the corresponding csv file. Arguments --------- filelist : str A list containing the paths of files of interest. csv_file : str The path to store the prepared noise CSV file. max_length : float The maximum length in seconds. Recordings longer than this will be automatically cut into pieces. """ try: write_csv(filelist, csv_file, max_length) except Exception as e: # Handle the exception or log the error message logger.error("Exception:", exc_info=(e)) # Delete the file if something fails if os.path.exists(csv_file): os.remove(csv_file)
[docs] def write_csv(filelist, csv_file, max_length=None): """ Iterate through a list of audio files and write the corresponding CSV file. Arguments --------- filelist: list of str A list containing the paths of audio files of interest. csv_file: str The path where to store the prepared noise CSV file. max_lengthL float (optional): The maximum recording length in seconds. Recordings longer than this will be automatically cut into pieces. """ with open(csv_file, "w") as w: w.write("ID,duration,wav,wav_format,wav_opts\n") for i, filename in enumerate(filelist): _write_csv_row(w, filename, i, max_length)
def _write_csv_row(w, filename, index, max_length): """ Write a single row to the CSV file based on the audio file information. Arguments --------- w: file The open CSV file for writing. filename: str The path to the audio file. index: int The index of the audio file in the list. max_length: float (optional) The maximum recording length in seconds. """ signal, rate = torchaudio.load(filename) signal = _ensure_single_channel(signal, filename, rate) ID, ext = os.path.basename(filename).split(".") duration = signal.shape[1] / rate if max_length is not None and duration > max_length: _handle_long_waveform( w, filename, ID, ext, signal, rate, duration, max_length, index ) else: _write_short_waveform_csv(w, ID, ext, duration, filename, index) def _ensure_single_channel(signal, filename, rate): """ Ensure that the audio signal has only one channel. Arguments --------- signal: Tensor The audio signal. filename: str The path to the audio file. rate: int The sampling frequency of the signal. Returns: --------- Torch.Tensor The audio signal with a single channel. """ if signal.shape[0] > 1: signal = signal[0].unsqueeze(0) torchaudio.save(filename, signal, rate) return signal def _handle_long_waveform( w, filename, ID, ext, signal, rate, duration, max_length, index ): """ Handle long audio waveforms by cutting them into pieces and writing to the CSV. Arguments --------- w: file The open CSV file for writing. filename: str The path to the audio file. ID: str The unique identifier for the audio. ext: str The audio file extension. signal: Tensor The audio signal. rate: int The audio sample rate. duration: float The duration of the audio in seconds. max_length: float The maximum recording length in seconds. index: int The index of the audio file in the list. """ os.remove(filename) for j in range(int(duration / max_length)): start = int(max_length * j * rate) stop = int(min(max_length * (j + 1), duration) * rate) ext = filename.split(".")[1] new_filename = filename.replace("." + ext, "_" + str(j) + "." + ext) torchaudio.save(new_filename, signal[:, start:stop], rate) csv_row = ( f"{ID}_{index}_{j}", str((stop - start) / rate), new_filename, ext, "\n", ) w.write(",".join(csv_row)) def _write_short_waveform_csv(w, ID, ext, duration, filename, index): """ Write a CSV row for a short audio waveform. Arguments --------- w (file): The open CSV file for writing. ID (str): The unique identifier for the audio. ext (str): The audio file extension. duration (float): The duration of the audio in seconds. filename (str): The path to the audio file. index (int): The index of the audio file in the list. """ w.write(",".join((f"{ID}_{index}", str(duration), filename, ext, "\n",)))