我们从Python开源项目中,提取了以下13个代码示例,用于说明如何使用features.mfcc()。
def wav_to_input( wav_file_name ): input_data, f_s = sf.read(wav_file_name) # mfcc_feat = MFCC_input(mfcc(input_data,f_s)) mfcc_feat = mfcc(input_data,f_s) #Delta features delta_feat = mfcc_feat[:-1]-mfcc_feat[1:] #Delta-Delta features deltadelta_feat = delta_feat[:-1]-delta_feat[1:] #Removing the first two frames mfcc_feat = mfcc_feat[2:] delta_feat = delta_feat[1:] #Concatenating mfcc, delta and delta-delta features full_input = np.concatenate((mfcc_feat,delta_feat,deltadelta_feat), axis=1) return full_input
def get_data(rootdir = TIMIT_main_dir): inputs = [] targets = [] for dir_path, sub_dirs, files in os.walk(rootdir): for file in files: if (os.path.join(dir_path, file)).endswith('.wav'): wav_file_name = os.path.join(dir_path, file) input_data, f_s = sf.read(wav_file_name) # mfcc_feat = MFCC_input(mfcc(input_data,f_s)) mfcc_feat = mfcc(input_data,f_s) inputs.append(mfcc_feat)#Rakeshvar wants one frame along each column but i am using Lasagne text_file_name = wav_file_name[:-4] + '.txt' target_data_file = open(text_file_name) target_data = str(target_data_file.read()).lower().translate(None, '!:,".;?') # target_data = str(target_data_file.read()).lower().translate(str.maketrans('','', '!:,".;?')) target_data = target_data[8:-1]#No '.' in lexfree dictionary targets.append(target_data) return inputs, targets
def get_data(rootdir = TIMIT_main_dir): inputs = [] targets = [] for dir_path, sub_dirs, files in os.walk(rootdir): for file in files: if (os.path.join(dir_path, file)).endswith('.wav'): wav_file_name = os.path.join(dir_path, file) input_data, f_s = sf.read(wav_file_name) # mfcc_feat = MFCC_input(mfcc(input_data,f_s)) mfcc_feat = mfcc(input_data,f_s) #Delta features delta_feat = mfcc_feat[:-1]-mfcc_feat[1:] #Delta-Delta features deltadelta_feat = delta_feat[:-1]-delta_feat[1:] #Removing the first two frames mfcc_feat = mfcc_feat[2:] delta_feat = delta_feat[1:] #Concatenating mfcc, delta and delta-delta features full_input = np.concatenate((mfcc_feat,delta_feat,deltadelta_feat), axis=1) inputs.append(np.asarray(full_input, dtype=theano.config.floatX))#Rakeshvar wants one frame along each column but i am using Lasagne text_file_name = wav_file_name[:-4] + '.txt' target_data_file = open(text_file_name) target_data = str(target_data_file.read()).lower().translate(None, '!:,".;?') # target_data = str(target_data_file.read()).lower().translate(str.maketrans('','', '!:,".;?')) target_data = target_data[8:-1]#No '.' in lexfree dictionary targets.append(target_data) return inputs, targets
def read_wavs_trng(emotions, trng_path, pickle_path, use_pickle=False): """ Utility function to read wav files, convert them into MFCC vectors and store in a pickle file (Pickle file is useful in case you re-train on the same data changing hyperparameters) """ trng_data = {} if use_pickle and os.path.isfile(pickle_path): write_pickle = False trng_data = pickle.load(open(pickle_path, "rb")) else: write_pickle = True for emo in emotions: mfccs = [] for wavfile in glob.glob(trng_path + '/' + emo + '/*.wav'): rate, sig = wvf.read(wavfile) mfcc_feat = mfcc(sig, rate) mfccs.append(mfcc_feat) trng_data[emo] = mfccs if write_pickle: pickle.dump(trng_data, open(pickle_path, "wb")) return trng_data
def test_emo(test_file, gmms): """ NOTE: Use only after training. Test a given file and predict an emotion for it. """ rate, sig = wvf.read(test_file) mfcc_feat = mfcc(sig, rate) pred = {} for emo in gmms: pred[emo] = gmms[emo].score(mfcc_feat) return emotions_nbest(pred, 2), pred
def run_tests(test_files): # Classify input data for test_file in test_files: # Read input file sampling_freq, signal = wavfile.read(test_file) # Extract MFCC features with warnings.catch_warnings(): warnings.simplefilter('ignore') features_mfcc = mfcc(signal, sampling_freq) # Define variables max_score = -float('inf') output_label = None # Run the current feature vector through all the HMM # models and pick the one with the highest score for item in speech_models: model, label = item score = model.compute_score(features_mfcc) if score > max_score: max_score = score predicted_label = label # Print the predicted output start_index = test_file.find('/') + 1 end_index = test_file.rfind('/') original_label = test_file[start_index:end_index] print('\nOriginal: ', original_label) print('Predicted:', predicted_label)
def make_split_audio_array(folder, num_splits = 5): lst = [] for filename in os.listdir(folder): if filename.endswith('wav'): normed_sig = make_standard_length(filename) chunk = normed_sig.shape[0]/num_splits for i in range(num_splits - 1): lst.append(normed_sig[i*chunk:(i+2)*chunk]) lst = np.array(lst) lst = lst.reshape(lst.shape[0], -1) return lst # for input wav file outputs (13, 2999) mfcc np array
def make_normed_mfcc(filename, outrate=8000): normed_sig = make_standard_length(filename) normed_mfcc_feat = mfcc(normed_sig, outrate) normed_mfcc_feat = normed_mfcc_feat.T return normed_mfcc_feat # make mfcc np array from wav file using librosa package
def make_librosa_mfcc(filename): y, sr = librosa.load(filename) mfcc_feat = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13) return mfcc_feat # make mfcc np array from wav file using speech features package
def make_mfcc(filename): (rate, sig) = wav.read(filename) mfcc_feat = mfcc(sig, rate) mfcc_feat = mfcc_feat.T return mfcc_feat # for folder containing wav files, output numpy array of normed mfcc
def make_mean_mfcc(filename): try: (rate, sig) = wav.read(filename) mfcc_feat = mfcc(sig, rate) avg_mfcc = np.mean(mfcc_feat, axis = 0) return avg_mfcc except: pass # write new csv corresponding to dataframe of given language and gender
def make_mean_mfcc_df(folder): norms = [] for filename in os.listdir(folder): (rate, sig) = wav.read(filename) mfcc_feat = mfcc(sig, rate) mean_mfcc = np.mean(mfcc_feat, axis = 0) #mean_mfcc = np.reshape(mean_mfcc, (1,13)) norms.append(mean_mfcc) flat = [a.ravel() for a in norms] stacked = np.vstack(flat) df = pd.DataFrame(stacked) return df
def build_models(input_folder): # Initialize the variable to store all the models speech_models = [] # Parse the input directory for dirname in os.listdir(input_folder): # Get the name of the subfolder subfolder = os.path.join(input_folder, dirname) if not os.path.isdir(subfolder): continue # Extract the label label = subfolder[subfolder.rfind('/') + 1:] # Initialize the variables X = np.array([]) # Create a list of files to be used for training # We will leave one file per folder for testing training_files = [x for x in os.listdir(subfolder) if x.endswith('.wav')][:-1] # Iterate through the training files and build the models for filename in training_files: # Extract the current filepath filepath = os.path.join(subfolder, filename) # Read the audio signal from the input file sampling_freq, signal = wavfile.read(filepath) # Extract the MFCC features with warnings.catch_warnings(): warnings.simplefilter('ignore') features_mfcc = mfcc(signal, sampling_freq) # Append to the variable X if len(X) == 0: X = features_mfcc else: X = np.append(X, features_mfcc, axis=0) # Create the HMM model model = ModelHMM() # Train the HMM model.train(X) # Save the model for the current word speech_models.append((model, label)) # Reset the variable model = None return speech_models # Define a function to run tests on input files