Python librosa 模块,stft() 实例源码

我们从Python开源项目中,提取了以下43个代码示例,用于说明如何使用librosa.stft()

项目:Mendelssohn    作者:diggerdu    | 项目源码 | 文件源码
def stft(wav, n_fft=1024, overlap=4, dt=tf.int32, absp=False):
    assert (wav.shape[0] > n_fft)
    X = tf.placeholder(dtype=dt,shape=wav.shape)
    X = tf.cast(X,tf.float32)
    hop = n_fft / overlap

    ## prepare constant variable
    Pi = tf.constant(np.pi, dtype=tf.float32)
    W = tf.constant(scipy.hanning(n_fft), dtype=tf.float32)
    S = tf.pack([tf.fft(tf.cast(tf.multiply(W,X[i:i+n_fft]),\
            tf.complex64)) for i in range(1, wav.shape[0] - n_fft, hop)])
    abs_S = tf.complex_abs(S)
    sess = tf.Session()
    if absp:
        return sess.run(abs_S, feed_dict={X:wav})
    else:
        return sess.run(S, feed_dict={X:wav})
项目:magenta    作者:tensorflow    | 项目源码 | 文件源码
def griffin_lim(mag, phase_angle, n_fft, hop, num_iters):
  """Iterative algorithm for phase retrival from a magnitude spectrogram.

  Args:
    mag: Magnitude spectrogram.
    phase_angle: Initial condition for phase.
    n_fft: Size of the FFT.
    hop: Stride of FFT. Defaults to n_fft/2.
    num_iters: Griffin-Lim iterations to perform.

  Returns:
    audio: 1-D array of float32 sound samples.
  """
  fft_config = dict(n_fft=n_fft, win_length=n_fft, hop_length=hop, center=True)
  ifft_config = dict(win_length=n_fft, hop_length=hop, center=True)
  complex_specgram = inv_magphase(mag, phase_angle)
  for i in range(num_iters):
    audio = librosa.istft(complex_specgram, **ifft_config)
    if i != num_iters - 1:
      complex_specgram = librosa.stft(audio, **fft_config)
      _, phase = librosa.magphase(complex_specgram)
      phase_angle = np.angle(phase)
      complex_specgram = inv_magphase(mag, phase_angle)
  return audio
项目:speech-enhancement-WGAN    作者:jerrygood0703    | 项目源码 | 文件源码
def make_spectrum(self, filename, use_normalize):
        sr, y = wav.read(filename)
        if sr != 16000:
            raise ValueError('Sampling rate is expected to be 16kHz!')
        if y.dtype!='float32':
            y = np.float32(y/32767.)

        D=librosa.stft(y,n_fft=512,hop_length=256,win_length=512,window=scipy.signal.hamming)
        Sxx=np.log10(abs(D)**2) 
        if use_normalize:
            mean = np.mean(Sxx, axis=1).reshape((257,1))
            std = np.std(Sxx, axis=1).reshape((257,1))+1e-12
            Sxx = (Sxx-mean)/std  
        slices = []
        for i in range(0, Sxx.shape[1]-self.FRAMELENGTH, self.OVERLAP):
            slices.append(Sxx[:,i:i+self.FRAMELENGTH])
        return np.array(slices)
项目:odin    作者:imito    | 项目源码 | 文件源码
def test_stft_istft(self):
        try:
            import librosa
            ds = F.load_digit_wav()
            name = ds.keys()[0]
            path = ds[name]

            y, _ = speech.read(path, pcm=True)
            hop_length = int(0.01 * 8000)
            stft = signal.stft(y, n_fft=256, hop_length=hop_length, window='hann')
            stft_ = librosa.stft(y, n_fft=256, hop_length=hop_length, window='hann')
            self.assertTrue(np.allclose(stft, stft_.T))

            y1 = signal.istft(stft, hop_length=hop_length, window='hann')
            y2 = librosa.istft(stft_, hop_length=hop_length, window='hann')
            self.assertTrue(np.allclose(y1, y2))
        except ImportError:
            print("test_stft_istft require librosa.")
项目:Tacotron    作者:barronalex    | 项目源码 | 文件源码
def griffinlim(spectrogram, n_iter=50, window='hann', n_fft=2048, win_length=2048, hop_length=-1, verbose=False):
    if hop_length == -1:
        hop_length = n_fft // 4

    angles = np.exp(2j * np.pi * np.random.rand(*spectrogram.shape))

    t = tqdm(range(n_iter), ncols=100, mininterval=2.0, disable=not verbose)
    for i in t:
        full = np.abs(spectrogram).astype(np.complex) * angles
        inverse = librosa.istft(full, hop_length = hop_length, win_length = win_length, window = window)
        rebuilt = librosa.stft(inverse, n_fft = n_fft, hop_length = hop_length, win_length = win_length, window = window)
        angles = np.exp(1j * np.angle(rebuilt))

        if verbose:
            diff = np.abs(spectrogram) - np.abs(rebuilt)
            t.set_postfix(loss=np.linalg.norm(diff, 'fro'))

    full = np.abs(spectrogram).astype(np.complex) * angles
    inverse = librosa.istft(full, hop_length = hop_length, win_length = win_length, window = window)

    return inverse
项目:audio    作者:willfrey    | 项目源码 | 文件源码
def __call__(self, y):
        """Short-time Fourier transform (STFT).

        Returns a real-valued matrix
        Returns a complex-valued matrix D such that
            `np.abs(D[f, t])` is the magnitude of frequency bin `f`
            at frame `t`

            `np.angle(D[f, t])` is the phase of frequency bin `f`
            at frame `t`

        Parameters
        ----------
        y : np.ndarray [shape=(n,)], real-valued
            the input signal (audio time series)

        Returns
        -------
        D : np.ndarray [shape=(1 + n_fft/2, t), dtype=dtype]
        STFT matrix

        """
        return librosa.stft(y, **self.__dict__)
项目:Sound-classification-on-Raspberry-Pi-with-Tensorflow    作者:GianlucaPaolocci    | 项目源码 | 文件源码
def extract_features(file_name):
    X, sample_rate = librosa.load(file_name)
    stft = np.abs(librosa.stft(X))
    mfccs = np.array(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=8).T)
    chroma = np.array(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T)
    mel = np.array(librosa.feature.melspectrogram(X, sr=sample_rate).T)
    contrast = np.array(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T)
    tonnetz = np.array(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T)
    return mfccs,chroma,mel,contrast,tonnetz
项目:Sound-classification-on-Raspberry-Pi-with-Tensorflow    作者:GianlucaPaolocci    | 项目源码 | 文件源码
def extract_features():
    X = sounddevice.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1)
    sounddevice.wait()
    X= np.squeeze(X)
    stft = np.abs(librosa.stft(X))
    mfccs = np.array(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=8).T)
    chroma = np.array(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T)
    mel = np.array(librosa.feature.melspectrogram(X, sr=sample_rate).T)
    contrast = np.array(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T)
    tonnetz = np.array(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T)
    ext_features = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
    features = np.vstack([features,ext_features])
    return features
项目:speechless    作者:JuliusKunze    | 项目源码 | 文件源码
def _complex_spectrogram(self) -> ndarray:
        return librosa.stft(y=self.get_raw_audio(), n_fft=self.fourier_window_length, hop_length=self.hop_length)
项目:Tacotron_pytorch    作者:root20    | 项目源码 | 文件源码
def _griffin_lim(S, n_fft, win_length, hop_length, num_iters):
    angles = np.exp(2j * np.pi * np.random.rand(*S.shape))
    S_complex = np.abs(S).astype(np.complex)
    for i in range(num_iters):
        if i > 0:
            angles = np.exp(1j * np.angle(librosa.stft(y=y, n_fft=n_fft, hop_length=hop_length, win_length=win_length)))
        y = librosa.istft(S_complex * angles, hop_length=hop_length, win_length=win_length)
    return y
项目:source_separation_ml_jeju    作者:hjkwon0609    | 项目源码 | 文件源码
def create_spectrogram_from_audio(data):
    global setting
    spectrogram = librosa.stft(data, n_fft=Config.n_fft, hop_length=Config.hop_length).transpose()

    # divide the real and imaginary components of each element 
    # concatenate the matrix with the real components and the matrix with imaginary components
    # (DataCorruptionError when saving complex numbers in TFRecords)

    # concatenated = np.concatenate([np.real(spectrogram), np.imag(spectrogram)], axis=1)
    return spectrogram # [num_time_frames, num_freq_bins]
项目:TensorFlow_AudioSet_Example    作者:DantesLegacy    | 项目源码 | 文件源码
def plot_log_power_specgram(sound_names,raw_sounds):
    i = 1
    fig = plt.figure(figsize=(25,60), dpi = 900)
    for n,f in zip(sound_names,raw_sounds):
        plt.subplot(10,1,i)
        D = librosa.logamplitude(np.abs(librosa.stft(f))**2, ref_power=np.max)
        librosa.display.specshow(D,x_axis='time' ,y_axis='log')
        plt.title(n.title())
        i += 1
    plt.suptitle('Figure 3: Log power spectrogram',x=0.5, y=0.915,fontsize=18)
    plt.show()
项目:TensorFlow_AudioSet_Example    作者:DantesLegacy    | 项目源码 | 文件源码
def extract_feature(file_name):
    X, sample_rate = librosa.load(file_name)
    stft = np.abs(librosa.stft(X))
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
    mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
    return mfccs,chroma,mel,contrast,tonnetz
项目:tacotron    作者:jinfagang    | 项目源码 | 文件源码
def get_spectrograms(sound_file):
    '''Extracts melspectrogram and log magnitude from given `sound_file`.
    Args:
      sound_file: A string. Full path of a sound file.

    Returns:
      Transposed S: A 2d array. A transposed melspectrogram with shape of (T, n_mels)
      Transposed magnitude: A 2d array.Has shape of (T, 1+hp.n_fft//2)
    '''
    # Loading sound file
    y, sr = librosa.load(sound_file, sr=None)  # or set sr to hp.sr.

    # stft. D: (1+n_fft//2, T)
    D = librosa.stft(y=y,
                     n_fft=hp.n_fft,
                     hop_length=hp.hop_length,
                     win_length=hp.win_length)

    # magnitude spectrogram
    magnitude = np.abs(D)  # (1+n_fft/2, T)

    # power spectrogram
    power = magnitude ** 2  # (1+n_fft/2, T)

    # mel spectrogram
    S = librosa.feature.melspectrogram(S=power, n_mels=hp.n_mels)  # (n_mels, T)

    return np.transpose(S.astype(np.float32)), np.transpose(magnitude.astype(np.float32))  # (T, n_mels), (T, 1+n_fft/2)
项目:tacotron    作者:jinfagang    | 项目源码 | 文件源码
def spectrogram2wav(spectrogram):
    '''
    spectrogram: [t, f], i.e. [t, nfft // 2 + 1]
    '''
    spectrogram = spectrogram.T  # [f, t]
    X_best = copy.deepcopy(spectrogram)  # [f, t]
    for i in range(hp.n_iter):
        X_t = invert_spectrogram(X_best)
        est = librosa.stft(X_t, hp.n_fft, hp.hop_length, win_length=hp.win_length)  # [f, t]
        phase = est / np.maximum(1e-8, np.abs(est))  # [f, t]
        X_best = spectrogram * phase  # [f, t]
    X_t = invert_spectrogram(X_best)

    return np.real(X_t)
项目:DDAE    作者:jerrygood0703    | 项目源码 | 文件源码
def make_spectrum_phase(y, FRAMESIZE, OVERLAP, FFTSIZE):
    D=librosa.stft(y,n_fft=FRAMESIZE,hop_length=OVERLAP,win_length=FFTSIZE,window=scipy.signal.hamming)
    Sxx = np.log10(abs(D)**2) 
    phase = np.exp(1j * np.angle(D))
    mean = np.mean(Sxx, axis=1).reshape((257,1))
    std = np.std(Sxx, axis=1).reshape((257,1))+1e-12
    Sxx = (Sxx-mean)/std  
    return Sxx, phase, mean, std
项目:gcForest    作者:kingfengji    | 项目源码 | 文件源码
def get_feature_aqibsaeed_1(X, sr, au_path=None):
    """
    http://aqibsaeed.github.io/2016-09-03-urban-sound-classification-part-1/
    """
    import librosa
    if au_path is not None:
        X, sr = librosa.load(au_path)
    stft = np.abs(librosa.stft(X))
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sr, n_mfcc=40).T,axis=0)
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sr).T,axis=0)
    mel = np.mean(librosa.feature.melspectrogram(X, sr=sr).T,axis=0)
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sr).T,axis=0)
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sr).T,axis=0)
    feature = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
    return feature
项目:pumpp    作者:bmcfee    | 项目源码 | 文件源码
def transform_audio(self, y):
        '''Compute the STFT magnitude and phase.

        Parameters
        ----------
        y : np.ndarray
            The audio buffer

        Returns
        -------
        data : dict
            data['mag'] : np.ndarray, shape=(n_frames, 1 + n_fft//2)
                STFT magnitude

            data['phase'] : np.ndarray, shape=(n_frames, 1 + n_fft//2)
                STFT phase
        '''
        n_frames = self.n_frames(get_duration(y=y, sr=self.sr))

        D = stft(y, hop_length=self.hop_length,
                 n_fft=self.n_fft)

        D = fix_length(D, n_frames)

        mag, phase = magphase(D)
        if self.log:
            mag = amplitude_to_db(mag, ref=np.max)

        return {'mag': mag.T[self.idx].astype(np.float32),
                'phase': np.angle(phase.T)[self.idx].astype(np.float32)}
项目:tartarus    作者:sergiooramas    | 项目源码 | 文件源码
def compute_spec(audio_file,spectro_file):
    # Get actual audio
    audio, sr = librosa.load(audio_file, sr=config['resample_sr'])
    # Compute spectrogram
    if config['spectrogram_type']=='cqt':
        spec = librosa.cqt(audio, sr=sr, hop_length=config['hop'], n_bins=config['cqt_bins'], real=False)
    elif config['spectrogram_type']=='mel':
        spec = librosa.feature.melspectrogram(y=audio, sr=sr, hop_length=config['hop'],n_fft=config['n_fft'],n_mels=config['n_mels'])
    elif config['spectrogram_type']=='stft':
        spec = librosa.stft(y=audio,n_fft=config['n_fft'])
    # Write results:
    with open(spectro_file, "w") as f:
        pickle.dump(spec, f, protocol=-1) # spec shape: MxN.
项目:Mendelssohn    作者:diggerdu    | 项目源码 | 文件源码
def expand(self, audio):
        ori_len = audio.shape[0]
        tmp = resample(audio, r=0.5, type='sinc_best')
        down_len = tmp.shape[0]
        tmp = resample(tmp, r=(ori_len+1) / float(down_len), type='sinc_best')
        tmp = librosa.stft(audio, 1024)
        phase = np.divide(tmp, np.abs(tmp))
        spec_input = np.abs(librosa.stft(audio, 1024))[0:n_input, ::]
        spec_input = spec_input[::, 0:spec_input.shape[1]//n_len*n_len]
        spec_input = np.split(spec_input,
                              spec_input.shape[1]//n_len, axis=1)
        spec_input = np.asarray(spec_input)
        spec_input = np.expand_dims(spec_input, axis=-1)
        feed_dict = {self.input_op: np.log1p(spec_input) / 12.0}
        debug = self.sess.run(self.debug_op, feed_dict=feed_dict)
        np.save('debug.npy', debug)
        S = self.sess.run(self.eva_op, feed_dict=feed_dict)
        S[S >= 5e3] = 5e3
        S[S <= 0] = 0
        print ('mean', np.mean(S))
        print (np.sum(np.isinf(S)))
        S = np.squeeze(np.concatenate(np.split(S, S.shape[0]), axis=2),
                       axis=(0, -1))
        phase = phase[..., :S.shape[1]]
        print (phase.shape)
        print (S.shape)
        print (np.sum(np.isinf(np.multiply(S, phase))))

        X = librosa.istft(np.multiply(S, phase))
        return X
项目:tacotron    作者:Kyubyong    | 项目源码 | 文件源码
def get_spectrograms(sound_file): 
    '''Extracts melspectrogram and log magnitude from given `sound_file`.
    Args:
      sound_file: A string. Full path of a sound file.

    Returns:
      Transposed S: A 2d array. A transposed melspectrogram with shape of (T, n_mels)
      Transposed magnitude: A 2d array.Has shape of (T, 1+hp.n_fft//2)
    '''
    # Loading sound file
    y, sr = librosa.load(sound_file, sr=hp.sr) # or set sr to hp.sr.

    # stft. D: (1+n_fft//2, T)
    D = librosa.stft(y=y,
                     n_fft=hp.n_fft, 
                     hop_length=hp.hop_length, 
                     win_length=hp.win_length) 

    # magnitude spectrogram
    magnitude = np.abs(D) #(1+n_fft/2, T)

    # power spectrogram
    power = magnitude**2 #(1+n_fft/2, T) 

    # mel spectrogram
    S = librosa.feature.melspectrogram(S=power, n_mels=hp.n_mels) #(n_mels, T)

    return np.transpose(S.astype(np.float32)), np.transpose(magnitude.astype(np.float32)) # (T, n_mels), (T, 1+n_fft/2)
项目:tacotron    作者:Kyubyong    | 项目源码 | 文件源码
def spectrogram2wav(spectrogram):
    '''
    spectrogram: [t, f], i.e. [t, nfft // 2 + 1]
    '''
    spectrogram = spectrogram.T  # [f, t]
    X_best = copy.deepcopy(spectrogram)  # [f, t]
    for i in range(hp.n_iter):
        X_t = invert_spectrogram(X_best)
        est = librosa.stft(X_t, hp.n_fft, hp.hop_length, win_length=hp.win_length)  # [f, t]
        phase = est / np.maximum(1e-8, np.abs(est))  # [f, t]
        X_best = spectrogram * phase  # [f, t]
    X_t = invert_spectrogram(X_best)

    return np.real(X_t)
项目:apicultor    作者:sonidosmutantes    | 项目源码 | 文件源码
def source_separation(self, x):   
                if not Duration()(x) > 10:
                    stftx = librosa.stft(x)
                    real = stftx.real
                    imag = stftx.imag
                    ssp = find_sparse_source_points(real, imag) #find sparsity in the signal
                    cos_dist = cosine_distance(ssp) #cosine distance from sparse data
                    sources = find_number_of_sources(cos_dist) #find possible number of sources
                    if (sources == 0) or (sources == 1):  #this means x is an instrumental track and doesn't have more than one source        
                        print "There's only one visible source"   
                        return x 
                    else:
                        print "Separating sources"              
                        xs = NMF(stftx, sources)
                        return xs[0] #take the bass part #TODO: correct NMF to return noiseless reconstruction
                else: 
                    stftx = librosa.stft(x[:441000]) #take 10 seconds of signal data to find sources
                    print "It can take some time to find any source in this signal"           
                    real = stftx.real
                    imag = stftx.imag
                    ssp = find_sparse_source_points(real, imag) #find sparsity in the signal
                    cos_dist = cosine_distance(ssp) #cosine distance from sparse data
                    sources = find_number_of_sources(cos_dist) #find possible number of sources
                    if (sources == 0) or (sources == 1):  #this means x is an instrumental track and doesn't have more than one source 
                        print "There's only one visible source"        
                        return x    
                    else:  
                        print "Separating sources"          
                        xs = NMF(librosa.stft(x), sources)
                        return xs[0] #take the bass part #TODO: correct NMF to return noiseless reconstruction
项目:apicultor    作者:sonidosmutantes    | 项目源码 | 文件源码
def sad_music_remix(self, neg_arous_dir, files, decisions, harmonic = None):
                for subdirs, dirs, sounds in os.walk(neg_arous_dir):   
                    fx = random.choice(sounds[::-1])                    
                    fy = random.choice(sounds[:])                      
                x = MonoLoader(filename = neg_arous_dir + '/' + fx)()  
                y = MonoLoader(filename = neg_arous_dir + '/' + fy)()  
                fx = fx.split('.')[0]                                  
                fy = fy.split('.')[0]                                  
                fx = np.where(files == fx)[0][0]                       
                fy = np.where(files == fy)[0][0]                       
                if harmonic is False or None:                          
                    dec_x = get_coordinate(fx, 1, decisions)                                                        
                    dec_y = get_coordinate(fy, 1, decisions)
                else:
                    dec_x = get_coordinate(fx, 2, decisions)
                    dec_y = get_coordinate(fy, 2, decisions)
                x = self.source_separation(x)   
                x = scratch_music(x, dec_x)
                x = x[np.nonzero(x)]                            
                y = scratch_music(y, dec_y)
                y = y[np.nonzero(y)]                            
                x, y = same_time(x,y)                                                                       
                negative_arousal_samples = [i/i.max() for i in (x,y)]                                                                       
                negative_arousal_x = np.array(negative_arousal_samples).sum(axis=0)                                                           
                negative_arousal_x = 0.5*negative_arousal_x/negative_arousal_x.max()                                                              
                if harmonic is True:                                   
                    return librosa.decompose.hpss(librosa.stft(negative_arousal_x), margin = (1.0, 5.0))[0]                 
                if harmonic is False or harmonic is None:
                    onsets = hfc_onsets(np.float32(negative_arousal_x))
                    interv = seconds_to_indices(onsets)
                    steps = overlapped_intervals(interv)
                    output = librosa.effects.remix(negative_arousal_x, steps[::-1], align_zeros = False)
                    output = librosa.effects.pitch_shift(output, sr = 44100, n_steps = 3)
                    remix_filename = 'data/emotions/remixes/sad/'+str(time.strftime("%Y%m%d-%H:%M:%S"))+'multitag_remix.ogg' 
                    MonoWriter(filename=remix_filename, format = 'ogg', sampleRate = 44100)(np.float32(output))
                    subprocess.call(["ffplay", "-nodisp", "-autoexit", remix_filename])
项目:apicultor    作者:sonidosmutantes    | 项目源码 | 文件源码
def happy_music_remix(self, pos_arous_dir, files, decisions, harmonic = None):
                for subdirs, dirs, sounds in os.walk(pos_arous_dir):   
                    fx = random.choice(sounds[::-1])                    
                    fy = random.choice(sounds[:])                      
                x = MonoLoader(filename = pos_arous_dir + '/' + fx)()  
                y = MonoLoader(filename = pos_arous_dir + '/' + fy)()  
                fx = fx.split('.')[0]                                  
                fy = fy.split('.')[0]                                  
                fx = np.where(files == fx)[0][0]                       
                fy = np.where(files == fy)[0][0]                       
                if harmonic is False or None:                          
                    dec_x = get_coordinate(fx, 3, decisions)                                                        
                    dec_y = get_coordinate(fy, 3, decisions)
                else:
                    dec_x = get_coordinate(fx, 0, decisions)
                    dec_y = get_coordinate(fy, 0, decisions)
                x = self.source_separation(x) 
                x = scratch_music(x, dec_x)                            
                y = scratch_music(y, dec_y)
                x = x[np.nonzero(x)]                           
                y = y[np.nonzero(y)]
                x, y = same_time(x,y)  
                positive_arousal_samples = [i/i.max() for i in (x,y)]  
                positive_arousal_x = np.float32(positive_arousal_samples).sum(axis=0) 
                positive_arousal_x = 0.5*positive_arousal_x/positive_arousal_x.max()
        if harmonic is True:
                    return librosa.decompose.hpss(librosa.stft(positive_arousal_x), margin = (1.0, 5.0))[0]  
        if harmonic is False or harmonic is None:
                    interv = RhythmExtractor2013()(positive_arousal_x)[1] * 44100
                    steps = overlapped_intervals(interv)
                    output = librosa.effects.remix(positive_arousal_x, steps, align_zeros = False)
                    output = librosa.effects.pitch_shift(output, sr = 44100, n_steps = 4)
                    remix_filename = 'data/emotions/remixes/happy/'+str(time.strftime("%Y%m%d-%H:%M:%S"))+'multitag_remix.ogg'
                    MonoWriter(filename=remix_filename, format = 'ogg', sampleRate = 44100)(np.float32(output))
                    subprocess.call(["ffplay", "-nodisp", "-autoexit", remix_filename])
项目:apicultor    作者:sonidosmutantes    | 项目源码 | 文件源码
def not_angry_music_remix(self, neg_arous_dir, files, decisions):
                sounds = []
                for i in range(len(neg_arous_dir)):
                    for subdirs, dirs, s in os.walk(neg_arous_dir[i]):                                  
                        sounds.append(subdirs + '/' + random.choice(s))
                fx = random.choice(sounds[::-1])
                fy = random.choice(sounds[:])                    
                x = MonoLoader(filename = fx)()  
                y = MonoLoader(filename = fy)()  
                fx = fx.split('/')[1].split('.')[0]                                  
                fy = fy.split('/')[1].split('.')[0]                                  
                fx = np.where(files == fx)[0]                      
                fy = np.where(files == fy)[0]                      
                dec_x = get_coordinate(fx, choice(range(1,3)), decisions)               
                dec_y = get_coordinate(fy, choice(range(1,3)), decisions)
                x = self.source_separation(x) 
                x = scratch_music(x, dec_x)                            
                y = scratch_music(y, dec_y)
                x = x[np.nonzero(x)]                           
                y = y[np.nonzero(y)]
                x, y = same_time(x,y)
                morph = stft.morph(x1 = x,x2 = y,fs = 44100,w1=np.hanning(1025),N1=2048,w2=np.hanning(1025),N2=2048,H1=512,smoothf=0.1,balancef=0.7)
                onsets = hfc_onsets(np.float32(morph))
                interv = seconds_to_indices(onsets)
                steps = overlapped_intervals(interv)
                output = librosa.effects.remix(morph, steps[::-1], align_zeros = False)
                output = librosa.effects.pitch_shift(output, sr = 44100, n_steps = 4)
                remix_filename = 'data/emotions/remixes/not angry/'+str(time.strftime("%Y%m%d-%H:%M:%S"))+'multitag_remix.ogg'
                MonoWriter(filename = remix_filename, sampleRate = 44100, format = 'ogg')(np.float32(output))
                subprocess.call(["ffplay", "-nodisp", "-autoexit", remix_filename])
项目:apicultor    作者:sonidosmutantes    | 项目源码 | 文件源码
def not_relaxed_music_remix(self, pos_arous_dir, files, decisions):
                sounds = []
                for i in range(len(pos_arous_dir)):
                    for subdirs, dirs, s in os.walk(pos_arous_dir[i]):                                  
                        sounds.append(subdirs + '/' + random.choice(s))
                fx = random.choice(sounds[::-1])
                fy = random.choice(sounds[:])                    
                x = MonoLoader(filename = fx)()  
                y = MonoLoader(filename = fy)()  
                fx = fx.split('/')[1].split('.')[0]                                  
                fy = fy.split('/')[1].split('.')[0]                                  
                fx = np.where(files == fx)[0]                     
                fy = np.where(files == fy)[0]                      
                dec_x = get_coordinate(fx, choice([0,1,3]), decisions)               
                dec_y = get_coordinate(fy, choice([0,1,3]), decisions)
                x = self.source_separation(x) 
                x = scratch_music(x, dec_x)                            
                y = scratch_music(y, dec_y)
                x = x[np.nonzero(x)]                           
                y = y[np.nonzero(y)]
                x, y = same_time(x,y)
                morph = stft.morph(x1 = x,x2 = y,fs = 44100,w1=np.hanning(1025),N1=2048,w2=np.hanning(1025),N2=2048,H1=512,smoothf=0.01,balancef=0.7)
                interv = RhythmExtractor2013()(np.float32(morph))[1] * 44100
                steps = overlapped_intervals(interv)
                output = librosa.effects.remix(morph, steps[::-1], align_zeros = False)
                output = librosa.effects.pitch_shift(output, sr = 44100, n_steps = 3)
                remix_filename = 'data/emotions/remixes/not relaxed/'+str(time.strftime("%Y%m%d-%H:%M:%S"))+'multitag_remix.ogg'
                MonoWriter(filename = remix_filename, sampleRate = 44100, format = 'ogg')(np.float32(output)) 
                subprocess.call(["ffplay", "-nodisp", "-autoexit", remix_filename])
项目:deepspeech.pytorch    作者:SeanNaren    | 项目源码 | 文件源码
def parse_audio(self, audio_path):
        if self.augment:
            y = load_randomly_augmented_audio(audio_path, self.sample_rate)
        else:
            y = load_audio(audio_path)
        if self.noiseInjector:
            add_noise = np.random.binomial(1, self.noise_prob)
            if add_noise:
                y = self.noiseInjector.inject_noise(y)
        n_fft = int(self.sample_rate * self.window_size)
        win_length = n_fft
        hop_length = int(self.sample_rate * self.window_stride)
        # STFT
        D = librosa.stft(y, n_fft=n_fft, hop_length=hop_length,
                         win_length=win_length, window=self.window)
        spect, phase = librosa.magphase(D)
        # S = log(S+1)
        spect = np.log1p(spect)
        spect = torch.FloatTensor(spect)
        if self.normalize:
            mean = spect.mean()
            std = spect.std()
            spect.add_(-mean)
            spect.div_(std)

        return spect
项目:genrec    作者:kkanellis    | 项目源码 | 文件源码
def __init__(self, tex_wnd, fft_len=512, sr=22050):
        self.tex_wnd = tex_wnd
        self.an_wnd_len = fft_len
        self.sr = sr

        # calc signal spectrum
        self.fft_tex_wnds = np.abs(
            librosa.stft(
                y=tex_wnd,
                n_fft=fft_len,
                hop_length=fft_len,
            )
        )
项目:gcforest    作者:w821881341    | 项目源码 | 文件源码
def get_feature_aqibsaeed_1(X, sr, au_path=None):
    """
    http://aqibsaeed.github.io/2016-09-03-urban-sound-classification-part-1/
    """
    import librosa
    if au_path is not None:
        X, sr = librosa.load(au_path)
    stft = np.abs(librosa.stft(X))
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sr, n_mfcc=40).T,axis=0)
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sr).T,axis=0)
    mel = np.mean(librosa.feature.melspectrogram(X, sr=sr).T,axis=0)
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sr).T,axis=0)
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sr).T,axis=0)
    feature = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
    return feature
项目:urnn    作者:stwisdom    | 项目源码 | 文件源码
def stft_mc(x,N=1024,hop=None,window='hann'):
    # N=1024
    if hop is None:
        hop=N/2
    S=x.shape
    if len(S)==1:
        nch=1
        nsampl=len(x)
        x=np.reshape(x,(1,nsampl)) 
    else:
        nch=S[0]
        nsampl=S[1]
    xdtype=x.dtype
    nfram=int(scipy.ceil(float(nsampl)/float(hop)))
    npad=int(nfram)*hop-nsampl
    pad=np.zeros((nch,npad)).astype(xdtype)
    x=np.concatenate((x,pad),axis=1)
    #pad the edges to avoid window taper effects
    pad=np.zeros((nch,N)).astype(xdtype)
    x=np.concatenate((pad,x,pad),axis=1)
    for ich in range(0,nch):
        x0=x[ich,:]
        if not x0.flags.c_contiguous:
            x0=x0.copy(order='C')
        X0=librosa.core.stft(x0,n_fft=N,hop_length=hop,window=window,center=False,dtype=np.complex64)
        if ich==0:
            X=np.zeros((N/2+1,X0.shape[1],nch)).astype(np.complex64)
            X[:,:,0]=X0
        else:
            X[:,:,ich]=X0
    return X
项目:Tacotron    作者:barronalex    | 项目源码 | 文件源码
def process_audio(fname, n_fft=2048, win_length=1200, hop_length=300, sr=16000):
    wave, sr = librosa.load(fname, mono=True, sr=sr)

    # trim initial silence
    wave, _ = librosa.effects.trim(wave)

    # first pad the audio to the maximum length
    # we ensure it is a multiple of 4r so it works with max frames
    assert math.ceil(maximum_audio_length / hop_length) % 4*r == 0
    if wave.shape[0] <= maximum_audio_length: 
        wave = np.pad(wave,
                (0,maximum_audio_length - wave.shape[0]), 'constant', constant_values=0)
    else:
        return None, None

    pre_emphasis = 0.97
    wave = np.append(wave[0], wave[1:] - pre_emphasis * wave[:-1])

    stft = librosa.stft(wave, n_fft=n_fft, win_length=win_length, hop_length=hop_length)
    mel = librosa.feature.melspectrogram(S=stft, n_mels=80)

    stft = np.log(np.abs(stft) + 1e-8)
    mel = np.log(np.abs(mel) + 1e-8)

    stft = reshape_frames(stft)
    mel = reshape_frames(mel)

    return mel, stft
项目:bird-species-classification    作者:johnmartinsson    | 项目源码 | 文件源码
def stft(x, fs, framesz, hop):
    framesamp = int(framesz*fs)
    hopsamp = int(hop*fs)
    w = scipy.hanning(framesamp)
    X = scipy.array([scipy.fft(w*x[i:i+framesamp])
                     for i in range(0, len(x)-framesamp, hopsamp)])
    return X
项目:bird-species-classification    作者:johnmartinsson    | 项目源码 | 文件源码
def wave_to_complex_spectrogram(wave, fs):
    return librosa.stft(wave, n_fft=512, hop_length=128, win_length=512)
项目:tacotron    作者:keithito    | 项目源码 | 文件源码
def _griffin_lim_tensorflow(S):
  '''TensorFlow implementation of Griffin-Lim
  Based on https://github.com/Kyubyong/tensorflow-exercises/blob/master/Audio_Processing.ipynb
  '''
  with tf.variable_scope('griffinlim'):
    # TensorFlow's stft and istft operate on a batch of spectrograms; create batch of size 1
    S = tf.expand_dims(S, 0)
    S_complex = tf.identity(tf.cast(S, dtype=tf.complex64))
    y = _istft_tensorflow(S_complex)
    for i in range(hparams.griffin_lim_iters):
      est = _stft_tensorflow(y)
      angles = est / tf.cast(tf.maximum(1e-8, tf.abs(est)), tf.complex64)
      y = _istft_tensorflow(S_complex * angles)
    return tf.squeeze(y, 0)
项目:tacotron    作者:keithito    | 项目源码 | 文件源码
def _stft(y):
  n_fft, hop_length, win_length = _stft_parameters()
  return librosa.stft(y=y, n_fft=n_fft, hop_length=hop_length, win_length=win_length)
项目:tacotron    作者:keithito    | 项目源码 | 文件源码
def _stft_tensorflow(signals):
  n_fft, hop_length, win_length = _stft_parameters()
  return tf.contrib.signal.stft(signals, win_length, hop_length, n_fft, pad_end=False)
项目:tacotron_asr    作者:Kyubyong    | 项目源码 | 文件源码
def get_spectrogram(sound_fpath): 
    '''Extracts melspectrogram and magnitude from given `sound_file`.
    Args:
      sound_fpath: A string. Full path of a sound file.

    Returns:
      Transposed S: A 2d array. A transposed melspectrogram with shape of (T, n_mels)
      Transposed magnitude: A 2d array. A transposed magnitude spectrogram 
        with shape of (T, 1+hp.n_fft//2)
    '''
    # Loading sound file
    y, sr = librosa.load(sound_fpath, sr=None) # or set sr to hp.sr.

    # stft. D: (1+n_fft//2, T)
    D = librosa.stft(y=y,
                     n_fft=hp.n_fft, 
                     hop_length=hp.hop_length, 
                     win_length=hp.win_length) 

    # magnitude spectrogram
    magnitude = np.abs(D) #(1+n_fft/2, T)

    # power spectrogram
    power = magnitude**2

    # mel spectrogram
    S = librosa.feature.melspectrogram(S=power, n_mels=hp.n_mels) #(n_mels, T)

    return np.transpose(S.astype(np.float32))
项目:make_dataset    作者:hyzhan    | 项目源码 | 文件源码
def parse_audio(self, audio_path):
        if self.augment:
            y = load_randomly_augmented_audio(audio_path)
        else:
            y = load_audio(audio_path)
        if self.noiseInjector:
            add_noise = np.random.binomial(1, self.noise_prob)
            if add_noise:
                y = self.noiseInjector.inject_noise(y)
        n_fft = int(self.sample_rate * self.window_size)
        win_length = n_fft
        hop_length = int(self.sample_rate * self.window_stride)
        # STFT
        D = librosa.stft(y, n_fft=n_fft, hop_length=hop_length,
                         win_length=win_length, window=self.window)
        spect, phase = librosa.magphase(D)
        # S = log(S+1)
        spect = np.log1p(spect)
        spect = torch.FloatTensor(spect)
        if self.normalize:
            mean = spect.mean()
            std = spect.std()
            spect.add_(-mean)
            spect.div_(std)

        return spect
项目:make_dataset    作者:hyzhan    | 项目源码 | 文件源码
def parse_audio(self, audio_path):
        if self.augment:
            y = load_randomly_augmented_audio(audio_path)
        else:
            y = load_audio(audio_path)
        if self.noiseInjector:
            add_noise = np.random.binomial(1, self.noise_prob)
            if add_noise:
                y = self.noiseInjector.inject_noise(y)
        n_fft = int(self.sample_rate * self.window_size)
        win_length = n_fft
        hop_length = int(self.sample_rate * self.window_stride)
        # STFT
        D = librosa.stft(y, n_fft=n_fft, hop_length=hop_length,
                         win_length=win_length, window=self.window)
        spect, phase = librosa.magphase(D)
        # S = log(S+1)
        spect = np.log1p(spect)
        spect = torch.FloatTensor(spect)
        if self.normalize:
            mean = spect.mean()
            std = spect.std()
            spect.add_(-mean)
            spect.div_(std)

        return spect
项目:aurora-sdk-win    作者:nanoleaf    | 项目源码 | 文件源码
def process_music_data(data_in, is_fft, is_energy, n_output_bins, n_fft, is_visual):
    # length is len(data_in)/4
    data_np = np.fromstring(data_in, 'Float32')

    # visualizer
    if is_visual:
        visualizer(data_np)

    # energy
    if is_energy:
        energy = np.abs(data_np) ** 2
        energy = energy.sum()
        energy *= 2**5
        energy_output = energy.astype(np.uint16)
    else:
        energy_output = np.zeros(2).astype(np.uint16)

    # fft
    if is_fft:
        global sample_rate

        # down-sample by 4, with filtering, energy not scaled
        data_np = librosa.resample(data_np,
                                   sample_rate,
                                   sample_rate/4,
                                   res_type='kaiser_fast')

        # short time fft over n_fft samples
        fft_data = librosa.stft(data_np, n_fft,
                                hop_length=n_fft,
                                center=False)

        fft_data_mag = np.abs(fft_data[0:n_fft//2]) ** 2

        # magnitude scaling
        fft_data_mag *= 2**3
        fft_output = get_output_fft_bins(fft_data_mag, n_output_bins)
        fft_output = fft_output.astype(np.uint8)
    else:
        fft_output = np.zeros(n_output_bins).astype(np.uint8)

    return fft_output, energy_output
项目:dcase2016_task4    作者:pafoster    | 项目源码 | 文件源码
def feature_extraction(y=None, fs=None, statistics=True, include_mfcc0=True, include_delta=True, include_acceleration=True, mfcc_params=None, delta_params=None, acceleration_params=None):
    # Extract features, Mel Frequency Cepstral Coefficients
    eps = numpy.spacing(1)

    # Windowing function
    if mfcc_params['window'] == 'hamming_asymmetric':
        window = scipy.signal.hamming(mfcc_params['n_fft'], sym=False)
    elif mfcc_params['window'] == 'hamming_symmetric':
        window = scipy.signal.hamming(mfcc_params['n_fft'], sym=True)
    elif mfcc_params['window'] == 'hann_asymmetric':
        window = scipy.signal.hann(mfcc_params['n_fft'], sym=False)
    elif mfcc_params['window'] == 'hann_symmetric':
        window = scipy.signal.hann(mfcc_params['n_fft'], sym=True)
    else:
        window = None

    # Calculate Static Coefficients
    magnitude_spectrogram = numpy.abs(librosa.stft(y + eps, n_fft=mfcc_params['n_fft'], win_length=mfcc_params['win_length'], hop_length=mfcc_params['hop_length'], window=window))**2
    mel_basis = librosa.filters.mel(sr=fs, n_fft=mfcc_params['n_fft'], n_mels=mfcc_params['n_mels'], fmin=mfcc_params['fmin'], fmax=mfcc_params['fmax'], htk=mfcc_params['htk'])
    mel_spectrum = numpy.dot(mel_basis, magnitude_spectrogram)
    mfcc = librosa.feature.mfcc(S=librosa.logamplitude(mel_spectrum))

    # Collect the feature matrix
    feature_matrix = mfcc
    if include_delta:
        # Delta coefficients
        mfcc_delta = librosa.feature.delta(mfcc, **delta_params)

        # Add Delta Coefficients to feature matrix
        feature_matrix = numpy.vstack((feature_matrix, mfcc_delta))

    if include_acceleration:
        # Acceleration coefficients (aka delta)
        mfcc_delta2 = librosa.feature.delta(mfcc, order=2, **acceleration_params)

        # Add Acceleration Coefficients to feature matrix
        feature_matrix = numpy.vstack((feature_matrix, mfcc_delta2))


    if not include_mfcc0:
        # Omit mfcc0
        feature_matrix = feature_matrix[1:, :]

    feature_matrix = feature_matrix.T

    # Collect into data structure
    if statistics:
        return {
            'feat': feature_matrix,
            'stat': {
                'mean': numpy.mean(feature_matrix, axis=0),
                'std': numpy.std(feature_matrix, axis=0),
                'N': feature_matrix.shape[0],
                'S1': numpy.sum(feature_matrix, axis=0),
                'S2': numpy.sum(feature_matrix ** 2, axis=0),
            }
        }
    else:
        return {
            'feat': feature_matrix}
项目:aurora-sdk-mac    作者:nanoleaf    | 项目源码 | 文件源码
def process_music_data(data_in, is_fft, is_energy, n_output_bins, n_fft, is_visual):
    # length is len(data_in)/4
    data_np = np.fromstring(data_in, 'Float32')

    # visualizer
    if is_visual:
        visualizer(data_np)

    # energy
    if is_energy:
        energy = np.abs(data_np) ** 2
        energy = energy.sum()
        energy *= 2**5
        energy_output = energy.astype(np.uint16)
    else:
        energy_output = np.zeros(2).astype(np.uint16)

    # fft
    if is_fft:
        global sample_rate

        # down-sample by 4, with filtering, energy not scaled
        data_np = librosa.resample(data_np,
                                   sample_rate,
                                   sample_rate/4,
                                   res_type='kaiser_fast')

        # short time fft over n_fft samples
        fft_data = librosa.stft(data_np, n_fft,
                                hop_length=n_fft,
                                center=False)

        fft_data_mag = np.abs(fft_data[0:n_fft//2]) ** 2

        # magnitude scaling
        fft_data_mag *= 2**3
        fft_output = get_output_fft_bins(fft_data_mag, n_output_bins)
        fft_output = fft_output.astype(np.uint8)
    else:
        fft_output = np.zeros(n_output_bins).astype(np.uint8)

    return fft_output, energy_output