In [3]:
import IPython.core.display as di

# This line will hide code by default when the notebook is exported as HTML
di.display_html('<script>jQuery(function() {if (jQuery("body.notebook_app").length == 0) { jQuery(".input_area").toggle(); jQuery(".prompt").toggle();}});</script>', raw=True)

# This line will add a button to toggle visibility of code blocks, for use with the HTML export version
di.display_html('''<button onclick="jQuery('.input_area').toggle(); jQuery('.prompt').toggle();">Toggle code</button>''', raw=True)

VocalSet Technique Demo

Below we show a waveform, spectrogram, playable audio, and label of each of the ten singing techniques used to train our vocal technique and singer identification models.

In [5]:
all_demos(file_list)
Vibrato
Straight
Breathy
Vocal Fry
Lip Trill
Trill
Trillo
Inhaled
Belt
Spoken
In [1]:
# Load libraries
# -*- coding: utf-8 -*-
import torch
import numpy as np
from torch.autograd import Variable
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import h5py
import matplotlib.pyplot as plt
import torch.optim as optim
import random
import tqdm
from tqdm import trange
import os
import librosa
from librosa import display
%matplotlib inline
import IPython.display as ipd
from audio_embed import utilities
In [2]:
# Load list of sample files
file_list = ['samples/f2_scales_vibrato_a(1).wav', "samples/f2_scales_straight_e.wav", "samples/f2_scales_breathy_e.wav", "samples/f2_scales_vocal_fry_o.wav", "samples/f2_scales_lip_trill_a.wav", "samples/f2_long_trill_a.wav", "samples/f2_long_trillo_a.wav", "samples/f2_long_inhaled_a.wav", "samples/f2_scales_belt_a.wav", "samples/f2_row_spoken.wav"]
title_list = ['Vibrato', 'Straight', 'Breathy', 'Vocal Fry', 'Lip Trill', 'Trill', 'Trillo', 'Inhaled', 'Belt', 'Spoken']
In [3]:
# Plots waveform and mel spectrogram of a sample file and you can listen to it

def single_demo(f, num):
    sr=44100

    print(title_list[num])
    plt.subplots(nrows=1, ncols=2, figsize=(20, 6))
    y, sr = librosa.load(f, duration=5, sr=sr)
    utilities.audio(y, sr)

    plt.subplot(2, 2, 1)
    plt.title(title_list[num], fontsize=30)
    librosa.display.waveplot(y, sr=sr)

    plt.subplot(2,2,2)
    plt.title(title_list[num], fontsize=30)
    spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000)
    librosa.display.specshow(librosa.power_to_db(spec, ref=np.max), y_axis='mel', fmax=8000, x_axis='time')

    plt.show()
     
# single_demo(file_list[0], 0)
In [4]:
def all_demos(file_list):
    num = 0
    
    for f in file_list:
        single_demo(f, num)
        num+=1
        
utilities.apply_style()