Source code for speechbrain.utils.recipe_tests

"""Library for running recipe tests.

Authors
 * Mirco Ravanelli 2022
 * Andreas Nautsch 2022
"""
import os
import re
import csv
import subprocess as sp
from hyperpyyaml import load_hyperpyyaml


[docs]def check_row_for_test(row, filter_fields, filters, test_field): """Checks if the current row of the csv recipe file has a test to run. Arguments --------- row: dict Line of the csv file (in dict from). filter_fields: list This can be used with the "filter" variable to run only some tests. For instance, filter_fileds=['Task'] and filters=['ASR']) will only run tests for ASR recipes. filters: list See above. test_field: string Key of the input dictionary that contains the test flags. Returns --------- test: bool True if the line must be tested, False otherwise. """ test = True for field in filter_fields: for filt in filters: if not (filt in row[field]): test = False if test: test_flag = row[test_field].strip() if len(test_flag) == 0: test = False return test
[docs]def prepare_test( recipe_csvfile="tests/recipes.csv", script_field="Script_file", hparam_field="Hparam_file", recipe_id_field="RecipeID", test_field="test_debug_flags", check_field="test_debug_checks", filters_fields=[], filters=[], ): """Extracts all the needed information to run the recipe test. Arguments --------- recipe_csvfile: path Path of the csv recipe file summarizing all the recipes in the repo. script_field: str Field of the csv recipe file containing the path of the script to run. hparam_field: str Field of the csv recipe file containing the path of the hparam file. recipe_id_field: str Field of the csv recipe file containing the unique recipe ID. test_field: string Field of the csv recipe file containing the test flags. check_field: string Field of the csv recipe file containing the checks to perform. filter_fields: list This can be used with the "filter" variable to run only some tests. For instance, filter_fileds=['Task'] and filters=['ASR']) will only run tests for ASR recipes. filters: list See above. Returns --------- test_script: dict A Dictionary containing recipe IDs as keys and test_scripts as values. test_hparam: dict A dictionary containing recipe IDs as keys and hparams as values. test_flag: dict A dictionary containing recipe IDs as keys and the test flags as values. test_check: dict A dictionary containing recipe IDs as keys and the checks as values. """ # Dictionary initialization test_script = {} test_hparam = {} test_flag = {} test_check = {} # Detect needed information for the recipe tests with open(recipe_csvfile, newline="") as csvf: reader = csv.DictReader(csvf, delimiter=",", skipinitialspace=True) for row in reader: if not ( check_row_for_test(row, filters_fields, filters, test_field) ): continue recipe_id = row[recipe_id_field].strip() test_script[recipe_id] = row[script_field].strip() test_hparam[recipe_id] = row[hparam_field].strip() test_flag[recipe_id] = row[test_field].strip() test_check[recipe_id] = row[check_field].strip() return test_script, test_hparam, test_flag, test_check
[docs]def check_files( check_str, output_folder, recipe_id, pattern=r"file_exists=\[(.*?)\]" ): """Checks if the output folder created by the test has the expected files. Arguments --------- check_str: str String summarizing the checks to perform. output_folder: path The path where to check the files. recipe_id: str Unique ID of the recipe. pattern: str The pattern used to extract the list of files to check from check_str. Returns --------- check: bool True if all the files are found, False otherwise. """ check = True files_to_check = re.search(pattern, check_str) files_to_check = files_to_check.group(1).split(",") for file_to_check in files_to_check: check_path = os.path.join(output_folder, file_to_check) if not (os.path.exists(check_path)): print( "\tERROR: The recipe %s does not contain the expected file %s" % (recipe_id, check_path) ) check = False return check
[docs]def check_performance( check_str, output_folder, recipe_id, pattern=r"performance_check=\[(.*?)\]" ): """Checks if the performance achieved by the recipe matches with the expectations. This is done by adding a performance_check entry in the recipe check field of the csv recipe file For instance: performance_check=[train_log.txt, train loss, <=15, epoch: 2]), will check the variable "train_loss" in the train_log.txt at epoch 2. It will raise an error if the train_loss is >15. Arguments --------- check_str: str String summarizing the checks to perform. output_folder: path The path where the recipe files are stored. recipe_id: str Unique ID of the recipe. pattern: str The pattern used to extract the list of files to check from check_str. Returns --------- check: bool True if all the files are found, False otherwise. """ check = True performance_to_check = re.search(pattern, check_str) if performance_to_check is None: return check # Getting the needed information from the "performance_check" entry performance_to_check = performance_to_check.group(1).split(",") filename = performance_to_check[0].strip() filename = os.path.join(output_folder, filename) variable = performance_to_check[1].strip() threshold = performance_to_check[2].strip() epoch = performance_to_check[3].strip() if not (os.path.exists(filename)): print( "\tERROR: The file %s of recipe %s does not exist (needed for performance checks)" % (filename, recipe_id) ) return False # Real all the lines of the performance file with open(filename) as file: lines = file.readlines() # Fitler the lines lines_filt = [] for line in lines: if epoch in line: lines_filt.append(line) # Raising an error if there are no lines after applying the filter if len(lines_filt) == 0: print( "\tERROR: No entries %s in %s (recipe %s). See performance_check entry." % (epoch, filename, recipe_id) ) return False for line in lines_filt: # Search variable value pattern = variable + ": " + "(.*?) " var_value = re.search(pattern, line) if var_value is None: print( "\tERROR: The file %s of recipe %s does not contain the variable %s (needed for performance checks)" % (filename, recipe_id, variable) ) return False var_value = float(var_value.group(1)) check = check_threshold(threshold, var_value) if not (check): print( "\tERROR: The variable %s of file %s (recipe %s) violated the specified threshold (%s %s)" % (variable, filename, recipe_id, var_value, threshold) ) break return check
[docs]def check_threshold(threshold, value): """Checks if the value satisfied the threshold constraints. Arguments --------- threshold: str String that contains the contains. E.g, ">=10" or "==15" or "<5". value: float Float corresponding to the value to test Returns --------- bool True if the constraint is satisfied, False otherwise. """ # Get threshold value: th_value = float( threshold.strip().replace("=", "").replace(">", "").replace("<", "") ) # Check Threshold if "==" in threshold: return value == th_value elif ">=" in threshold: return value >= th_value elif ">" in threshold: return value > th_value elif "<=" in threshold: return value <= th_value elif "<" in threshold: return value < th_value else: return False
[docs]def run_test_cmd(cmd, stdout_file, stderr_file): """Runs the command corresponding to a recipe test. The standard output and the standard error is saved in the specified paths. Arguments --------- cmd: str String corresponding to the command to run. stdout_file: path File where standard output is stored. stderr_file: path File where standard error is stored. Returns --------- rc: bool The return code obtained after running the command. If 0, the test is run without errors. If >0 the execution failed. """ f_stdout = open(stdout_file, "w") f_stderr = open(stderr_file, "w") child = sp.Popen([cmd], stdout=f_stdout, stderr=f_stderr, shell=True) child.communicate()[0] rc = child.returncode f_stdout.close() f_stderr.close() return rc
[docs]def run_recipe_tests( recipe_csvfile="tests/recipes.csv", script_field="Script_file", hparam_field="Hparam_file", recipe_id_field="RecipeID", test_field="test_debug_flags", check_field="test_debug_checks", run_opts="--device=cpu", output_folder="tests/tmp/recipes/", filters_fields=[], filters=[], do_checks=True, ): """Runs the recipes tests. Arguments --------- recipe_csvfile: path Path of the csv recipe file summarizing all the recipes in the repo. script_field: str Field of the csv recipe file containing the path of the script to run. hparam_field: str Field of the csv recipe file containing the path of the hparam file. recipe_id_field: str Field of the csv recipe file containing the unique recipe ID. test_field: string Field of the csv recipe file containing the test flags. check_field: string Field of the csv recipe file containing the checks to perform. run_opts: string Additional flags to add for the tests (see run_opts of speechbrain/core.py). output_folder: string Folder where the output of the tests are saved. filter_fields: list This can be used with the "filter" variable to run only some tests. For instance, filter_fileds=['Task'] and filters=['ASR']) will only run tests for ASR recipes. filters: list See above. do_checks: If True performs the checks on the output folder (when the check_field is not empty). Returns --------- check: True True if all the recipe tests pass, False otherwise. """ # Create the output folder (where the tests results will be saved) os.makedirs(output_folder, exist_ok=True) print("Test ouputs will be put in %s" % (output_folder)) # Read the csv recipe file and detect which tests we have to run test_script, test_hparam, test_flag, test_check = prepare_test( recipe_csvfile, script_field, hparam_field, filters_fields=filters_fields, filters=filters, ) # Run script (check how to get std out, std err and save them in files) check = True for i, recipe_id in enumerate(test_script.keys()): print( "(%i/%i) Running test for %s..." % (i + 1, len(test_script.keys()), recipe_id) ) output_fold = os.path.join(output_folder, recipe_id) os.makedirs(output_fold, exist_ok=True) stdout_file = os.path.join(output_fold, "stdout.txt") stderr_file = os.path.join(output_fold, "stderr.txt") # Composing command to run cmd = ( "python " + test_script[recipe_id] + " " + test_hparam[recipe_id] + " --output_folder=" + output_fold + " " + test_flag[recipe_id] + " " + run_opts ) # Running the test return_code = run_test_cmd(cmd, stdout_file, stderr_file) # Check return code if return_code != 0: print( "\tERROR: Error in %s. Check %s and %s for more info." % (recipe_id, stderr_file, stdout_file) ) check = False # Checks check_str = test_check[recipe_id].strip() if do_checks and len(check_str) > 0: # Check if the expected files exist check &= check_files(check_str, output_fold, recipe_id) check &= check_performance(check_str, output_fold, recipe_id) return check
[docs]def load_yaml_test( recipe_csvfile="tests/recipes.csv", script_field="Script_file", hparam_field="Hparam_file", test_field="Hparam_file", filters_fields=[], filters=[], avoid_list=[ "templates/hyperparameter_optimization_speaker_id/train.yaml", "templates/speaker_id/train.yaml", # recipes creating errors if NVIDIA driver is not on one's system "recipes/timers-and-such/multistage/hparams/train_LS_LM.yaml", "recipes/timers-and-such/multistage/hparams/train_TAS_LM.yaml", "recipes/timers-and-such/direct/hparams/train.yaml", "recipes/timers-and-such/decoupled/hparams/train_LS_LM.yaml", "recipes/timers-and-such/decoupled/hparams/train_TAS_LM.yaml", "recipes/fluent-speech-commands/direct/hparams/train.yaml", "recipes/CommonLanguage/lang_id/hparams/train_ecapa_tdnn.yaml", "recipes/SLURP/direct/hparams/train.yaml", ], rir_folder="tests/tmp/rir", data_folder="tests/tmp/yaml", output_folder="tests/tmp/yaml", ): """Tests if the yaml files can be loaded without errors. Arguments --------- recipe_csvfile: path Path of the csv recipe file summarizing all the recipes in the repo. script_field: str Field of the csv recipe file containing the path of the script to run. hparam_field: str Field of the csv recipe file containing the path of the hparam file. test_field: string Field of the csv recipe file containing the test flags. filter_fields: list This can be used with the "filter" variable to run only some tests. For instance, filter_fileds=['Task'] and filters=['ASR']) will only run tests for ASR recipes. filters: list See above. avoid_list: list List of hparam file not to check. rir_folder: This overrides the rir_folder; rir_path, and openrir_folder usually specified in the hparam files. data_folder: This overrides the data_folder usually specified in the hparam files. output_folder: This overrides the output_folder usually specified in the hparam files. Returns --------- check: True True if all the hparam files are loaded correctly, False otherwise. """ # Get current working directory cwd = os.getcwd() # Set data_foler and output folder data_folder = os.path.join(cwd, data_folder) output_folder = os.path.join(cwd, output_folder) rir_folder = os.path.join(cwd, rir_folder) # Additional overrides add_overrides = { "manual_annot_folder": data_folder, "musan_folder": data_folder, "tea_models_dir": data_folder, "wsj_root": data_folder, "tokenizer_file": data_folder, "commonlanguage_folder": data_folder, "tea_infer_dir": data_folder, "original_data_folder": data_folder, "pretrain_st_dir": data_folder, # RIR folder specifications -> all point to the same zip file: one download destination "rir_path": rir_folder, "rir_folder": rir_folder, "openrir_folder": rir_folder, "open_rir_folder": rir_folder, "data_folder_rirs": rir_folder, } # Read the csv recipe file and detect which tests we have to run test_script, test_hparam, test_flag, test_check = prepare_test( recipe_csvfile, script_field, hparam_field, test_field=test_field, filters_fields=filters_fields, filters=filters, ) check = True for i, recipe_id in enumerate(test_script.keys()): hparam_file = test_hparam[recipe_id] script_file = test_script[recipe_id] # Changing working folder to recipe folder recipe_folder = os.path.dirname(script_file) recipe_folder = os.path.join(cwd, recipe_folder) os.chdir(recipe_folder) # Avoid files lister in avoid_list if hparam_file in avoid_list: continue print( "(%i/%i) Checking %s..." % (i + 1, len(test_script.keys()), hparam_file) ) # Get absolute path to the hparam file hparam_file = os.path.join(cwd, hparam_file) # Load hyperparameters file with command-line overrides overrides = {"data_folder": data_folder, "output_folder": output_folder} # Append additional overrides when needed with open(hparam_file) as f: for line in f: for key, value in add_overrides.items(): pattern = key + ":" if pattern in line and line.find(pattern) == 0: overrides.update({key: value}) with open(hparam_file) as fin: try: _ = load_hyperpyyaml(fin, overrides) except Exception as e: print("\t" + str(e)) check = False print("\tERROR: cannot load %s" % (hparam_file)) return check