Source code for preprocess

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Utilities module for pre-processing audio stimuli

Run on the command line, e.g.: ::

    $ python preprocess.py

.. note:: This module has dependencies not required by the CAQE web application. To install these dependencies, run
 ``pip install -r analysis_requirements.txt``.
"""
import argparse
import os

import numpy as np
import librosa


[docs]def rms_normalize(directory=None, file_list=None, suffix=None, target_rms=None): """ This utility performs rms normalization on a directory or list of files. Note files must be WAV files. Parameters ---------- directory : str Input directory of audio files to process. Either this or `file_list` must be defined. Default is None. file_list : list of str List of audio files to process. Either this or `directory` must be defined. Default is None. suffix : str The suffix to append to the output filenames. If `None`, then the input files will be overwritten. Default is None. target_rms : float The target RMS to which we normalize. If `None`, then calculate the minimum RMS of the peak normalized files and normalize to that. Returns ------- output_file_list : list of str pre_norm_values : list of float post_norm_values : list of float """ if file_list is None: if directory is None: raise Exception('Arguments `file_list` or `directory` must be defined') file_list = [] for path, _dirs, files in os.walk(directory): file_list.extend([os.path.join(path, f) for f in files if os.path.splitext(f)[1] == ".wav" or os.path.splitext(f)[1] == ".WAV"]) pre_norm_values = np.asarray([_rms_of_file(file_name) for file_name in file_list]) if target_rms is None: target_rms = min(pre_norm_values[pre_norm_values.nonzero()]) if suffix is not None: output_file_list = [] for f in file_list: head, tail = os.path.splitext(f) output_file_list.append(head + suffix + tail) else: output_file_list = file_list post_norm_values = np.asarray([_normalize_file(file_list[i], output_file_list[i], target_rms) for i in range(len(file_list))]) return file_list, pre_norm_values, post_norm_values
def _rms_of_file(file_path, min_val=0.001, normalize=True): """ Calculate the RMS of a file. Parameters ---------- file_path : str File path of the input file. min_val : float When calculating the RMS, the signal will be bounded by the active region that is above this `min_val` threshold. Default is 0.001. normalize : bool Peak normalize before calculating the RMS. Default is True. Returns ------- rms : float """ x, _ = librosa.load(file_path, sr=None, mono=True) if normalize: divisor = np.max(np.abs(x)) if divisor == 0: return 0 x /= divisor idx = np.where(np.abs(x) > min_val)[0] x = np.power(x[min(idx):max(idx)], 2) return np.sqrt(np.mean(x)) def _normalize_file(input_file_path, output_file_path, target_rms): """ Normalize the audio file at `input_file_path` to root mean square `target_rms` and save at `output_file_path` as a .WAV file. Parameters ---------- input_file_path : str Input audio file path of audio to be read and normalized output_file_path : str Output audio file path to where the output audio file should be saved target_rms : float The target RMS value of the audio file, i.e. we normalize to this value. Returns ------- post_norm_rms : float The RMS value after normalizing """ x, sr = librosa.load(input_file_path, sr=None, mono=True) x_rms = np.sqrt(np.mean(np.power(x, 2))) y = x * (target_rms / x_rms) librosa.output.write_wav(output_file_path, y, sr, norm=False) return np.sqrt(np.mean(np.power(y, 2)))
[docs]def generate_source_separation_anchors(directory=None, file_list=None): """ Generate the PEASS-style anchors for use in a source separation evaluation. "The distorted target anchor is created by low-pass filtering the target source signal to a 3.5 kHz cut-off frequency and by randomly setting 20% of the remaining timefrequency coefficients to zero." "The artifacts anchor is ... created by randomly setting 99% of the time-frequency coefficients of the target to zero and by adjusting the loudness of the resulting signal to that of the target." - Note that we simply used RMS instead of the ISO 352B loudness model as discussed in the paper. Parameters ---------- directory : str Input directory of audio files to process. Either this or `file_list` must be defined. Default is None. file_list : list of str List of audio files to process. Either this or `directory` must be defined. Default is None. Returns ------- None References ---------- .. [1] Emiya, V., et al. Subjective and Objective Quality Assessment of Audio Source Separation. IEEE Transactions on Audio, Speech, and Language Processing, 19(7): 2046-2057, 2011. """ if file_list is None: if directory is None: raise Exception('Arguments `file_list` or `directory` must be defined') file_list = [] for path, _dirs, files in os.walk(directory): file_list.extend([os.path.join(path, f) for f in files if os.path.splitext(f)[1] == ".wav" or os.path.splitext(f)[1] == ".WAV"]) for input_file_name in file_list: x, sr = librosa.load(input_file_name, sr=None, mono=False) if x.ndim == 1: x = x.reshape(1, -1) n_fft = int(2**np.round(np.log2(sr * 0.046))) # distorted target anchor X = [librosa.stft(x[i], n_fft=n_fft, hop_length=n_fft/2) for i in range(x.shape[0])] cutoff = int(np.ceil((3500.0/sr) * n_fft)) for _X in X: _X[cutoff:, :] = 0.0 _X[:, np.random.random(_X.shape[1]) <= 0.2] = 0.0 x_anch1 = np.array([librosa.istft(_X, hop_length=n_fft/2) for _X in X]) if x_anch1.shape[0] == 1: x_anch1 = x_anch1.reshape(-1) output_file_name = os.path.splitext(input_file_name)[0] + '_anchorDistTarget' + '.wav' librosa.output.write_wav(output_file_name, x_anch1, sr=sr) # artificial noise anchor x_mono = np.mean(x, axis=0) X = librosa.stft(x_mono, n_fft=n_fft, hop_length=n_fft/2) X[np.random.random(X.shape) <= 0.99] = 0.0 artifacts = librosa.istft(X, hop_length=n_fft/2) artifacts_rms = np.sqrt(np.mean(np.power(artifacts, 2))) x_rms = np.sqrt(np.mean(np.power(x, 2))) artifacts *= (x_rms / artifacts_rms) if x.shape[0] == 1: artifacts = artifacts.reshape(1, -1) elif x.shape[0] == 2: artifacts = np.array([artifacts, artifacts]) else: raise Exception('More than 2 channels.') artifacts_pad = np.zeros_like(x) artifacts_pad[:artifacts.shape[0], :artifacts.shape[1]] = artifacts x_anch2 = x + artifacts_pad if x_anch2.shape[0] == 1: x_anch2 = x_anch2.reshape(-1) output_file_name = os.path.splitext(input_file_name)[0] + '_anchorArtif' + '.wav' librosa.output.write_wav(output_file_name, x_anch2, sr=sr)
if __name__ == '__main__': parser = argparse.ArgumentParser(description='Pre-process audio stimuli.') sp = parser.add_subparsers(dest='command') ch = sp.add_parser('rms-normalize', help='RMS normalize .wav files in a directory.') ch.add_argument('input_directory', type=str, help='Path to input directory') ch.add_argument('--suffix', type=str, help='The suffix to append to the normalized files. If none is given, the ' 'input files will be overwritten.', default=None) ch.add_argument('--target-rms', type=float, help='The target rms value. If none is given, it will calculate the ' 'max possible without clipping.', default=None) ch = sp.add_parser('generate-ss-anchors', help='Generate anchors for source separation given a directory of ' '.wav files.') ch.add_argument('input_directory', type=str, help='Path to the input directory') args = parser.parse_args() if args.command == 'rms-normalize': rms_normalize(args.input_directory, suffix=args.suffix, target_rms=args.target_rms) elif args.command == 'generate-ss-anchors': generate_source_separation_anchors(args.input_directory)