import essentia import pandas as pd import numpy as np from essentia.standard import (MonoLoader, Windowing, Spectrum, MFCC, ZeroCrossingRate, SpectralCentroidTime, RollOff, Flux, Envelope, FlatnessSFX, LogAttackTime, StrongDecay, FlatnessDB, HFC, SpectralComplexity, Energy, FrameGenerator, YamlOutput) # Disable annoying info level logging essentia.log.infoActive = False class bcolors: BLUE = '\033[94m' GREEN = '\033[92m' YELLOW = '\033[93m' RED = '\033[91m' ENDC = '\033[0m' def extractFeatures(audio, outputPath, sampleRate): if isinstance(audio, str): # Loads the audio file specified loader = MonoLoader(filename = audio, sampleRate = sampleRate) audio = loader() # Sets up the functions that will be used # TODO check if zero phase windowing is something we might want window = Windowing(normalized = False, size = 6144, type = 'hamming', zeroPhase = False) spectrum = Spectrum() mfcc = MFCC(inputSize = 6144, sampleRate = sampleRate) zcr = ZeroCrossingRate() sc = SpectralCentroidTime(sampleRate = sampleRate) sr = RollOff(sampleRate = sampleRate) sf = Flux() env = Envelope(attackTime = 2, releaseTime = 300, sampleRate = sampleRate) flat = FlatnessSFX() logAtt = LogAttackTime(sampleRate = sampleRate) strDec = StrongDecay(sampleRate = sampleRate) flatDB = FlatnessDB() hfc = HFC(sampleRate = sampleRate) spcComp = SpectralComplexity(sampleRate = sampleRate, magnitudeThreshold = 2) energy = Energy() # Creates a pool to collect the values of the features pool = essentia.Pool() # Slices the signal into frames for frame in FrameGenerator(audio, frameSize = 6144, hopSize = 3072, startFromZero = True , validFrameThresholdRatio = 0.7): # Applies a window function to the frame windowedFrame = window(frame) # Computes time domain features frameZCR = zcr(windowedFrame) frameSC = sc(windowedFrame) frameEFlatness = flat(env(windowedFrame)) frameLogAtt = logAtt(env(windowedFrame))[1] frameStrDec = strDec(windowedFrame) # Computes spectral features frameSpectrum = spectrum(windowedFrame) frameSR = sr(frameSpectrum) frameSF = sf(frameSpectrum) frameSEFlatness = flat(env(frameSpectrum)) frameSLogAtt = logAtt(env(frameSpectrum))[1] frameSStrDec = strDec(frameSpectrum) frameSFlat = flatDB(frameSpectrum) frameHFC = hfc(frameSpectrum) frameSComp = spcComp(frameSpectrum) # Computes cepstral features # Discards the bands melBandEnergies, mfcc_coeffs = mfcc(frameSpectrum) fHzMod = _4HzModulation(melBandEnergies, energy(frameSpectrum), sampleRate) # Adds the values to the pool pool.add('ZCR', frameZCR) pool.add('SC', frameSC) pool.add('Flat', frameEFlatness) pool.add('LAtt', frameLogAtt) pool.add('SDec', frameStrDec) pool.add('SR', frameSR) pool.add('SF', frameSF) pool.add('SEFlat', frameSEFlatness) pool.add('SFlat', frameSFlat) pool.add('SLAtt', frameSLogAtt) pool.add('SSDec', frameSStrDec) pool.add('HFC', frameHFC) pool.add('SComp', frameSComp) for index, coef in enumerate(mfcc_coeffs): pool.add('mfcc' + str(index), coef) pool.add('4HzMod', fHzMod) YamlOutput(filename = outputPath, format = 'json', writeVersion = False)(pool) return pd.DataFrame(np.array([pool[i] for i in pool.descriptorNames()]).T, columns = pool.descriptorNames()) def _4HzModulation(melEnergies, frameEnergy, sampleRate): from scipy.signal import butter, sosfilt, sosfreqz nyquist = 0.5 * sampleRate lowCut = 3 / nyquist highCut = 5 / nyquist sos = butter(N = 2, Wn = [lowCut, highCut], analog = False, btype = 'band', output = 'sos') filtered = sosfilt(sos = sos, x = melEnergies) energySum = sum(filtered) return energySum / frameEnergy # Prints a nice message to let the user know the module was imported print(bcolors.BLUE + 'feature_extractor loaded' + bcolors.ENDC) # Enables executing the module as a standalone script if __name__ == "__main__": import sys extractFeatures(sys.argv[1], sys.argv[2], int(sys.argv[3]))