You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
98 lines
3.3 KiB
98 lines
3.3 KiB
# import essentia.standard
|
|
import essentia
|
|
from essentia.standard import (MonoLoader, Windowing, Spectrum, MFCC,
|
|
ZeroCrossingRate, SpectralCentroidTime, RollOff, Flux, Envelope,
|
|
FlatnessSFX, LogAttackTime, StrongDecay, FlatnessDB, HFC, LPC,
|
|
SpectralComplexity, FrameGenerator, YamlOutput)
|
|
|
|
# Disable annoying info level logging
|
|
essentia.log.infoActive = False
|
|
|
|
def extractFeatures(audioPath, outputPath, sampleRate):
|
|
# Loads the audio file specified
|
|
loader = MonoLoader(filename = audioPath, sampleRate = sampleRate)
|
|
audio = loader()
|
|
|
|
# Sets up the functions that will be used
|
|
# TODO check if zero phase windowing is something we might want
|
|
window = Windowing(normalized = False, size = 6144, type = 'hamming',
|
|
zeroPhase = False)
|
|
spectrum = Spectrum()
|
|
mfcc = MFCC(inputSize = 6144, sampleRate = sampleRate)
|
|
zcr = ZeroCrossingRate()
|
|
sc = SpectralCentroidTime(sampleRate = sampleRate)
|
|
sr = RollOff(sampleRate = sampleRate)
|
|
sf = Flux()
|
|
env = Envelope(attackTime = 2, releaseTime = 300, sampleRate = sampleRate)
|
|
flat = FlatnessSFX()
|
|
logAtt = LogAttackTime(sampleRate = sampleRate)
|
|
strDec = StrongDecay(sampleRate = sampleRate)
|
|
flatDB = FlatnessDB()
|
|
hfc = HFC(sampleRate = sampleRate)
|
|
lpc = LPC(sampleRate = sampleRate)
|
|
spcComp = SpectralComplexity(sampleRate = sampleRate, magnitudeThreshold = 2)
|
|
|
|
# Creates a pool to collect the values of the features
|
|
pool = essentia.Pool()
|
|
|
|
# Slices the signal into frames
|
|
for frame in FrameGenerator(audio, frameSize = 6144, hopSize = 3072,
|
|
startFromZero = True , validFrameThresholdRatio = 0.7):
|
|
# Applies a window function to the frame
|
|
windowedFrame = window(frame)
|
|
|
|
# Computes time domain features
|
|
frameZCR = zcr(windowedFrame)
|
|
frameSC = sc(windowedFrame)
|
|
frameEFlatness = flat(env(windowedFrame))
|
|
frameLogAtt = logAtt(env(windowedFrame))[1]
|
|
frameStrDec = strDec(windowedFrame)
|
|
frameLPC, frameReflection = lpc(windowedFrame)
|
|
|
|
# Computes spectral features
|
|
frameSpectrum = spectrum(windowedFrame)
|
|
frameSR = sr(frameSpectrum)
|
|
frameSF = sf(frameSpectrum)
|
|
frameSEFlatness = flat(env(frameSpectrum))
|
|
frameSLogAtt = logAtt(env(frameSpectrum))[1]
|
|
frameSStrDec = strDec(frameSpectrum)
|
|
frameSFlat = flatDB(frameSpectrum)
|
|
frameHFC = hfc(frameSpectrum)
|
|
frameSComp = spcComp(frameSpectrum)
|
|
|
|
# Computes cepstral features
|
|
# Discards the bands
|
|
mfcc_coeffs = mfcc(frameSpectrum)[1]
|
|
|
|
# Adds the values to the pool
|
|
pool.add('ZCR', frameZCR)
|
|
pool.add('SC', frameSC)
|
|
pool.add('Flat', frameEFlatness)
|
|
pool.add('LAtt', frameLogAtt)
|
|
pool.add('SDec', frameStrDec)
|
|
for index, coef in enumerate(frameLPC[1:frameLPC.size]):
|
|
pool.add('LPC' + str(index + 1), coef)
|
|
for index, coef in enumerate(frameReflection):
|
|
pool.add('REFL' + str(index), coef)
|
|
|
|
pool.add('SR', frameSR)
|
|
pool.add('SF', frameSF)
|
|
pool.add('SEFlat', frameSEFlatness)
|
|
pool.add('SFlat', frameSFlat)
|
|
pool.add('SLAtt', frameSLogAtt)
|
|
pool.add('SSDec', frameSStrDec)
|
|
pool.add('HFC', frameHFC)
|
|
pool.add('SComp', frameSComp)
|
|
|
|
for index, coef in enumerate(mfcc_coeffs):
|
|
pool.add('mfcc' + str(index), coef)
|
|
|
|
YamlOutput(filename = outputPath, format = 'json', writeVersion = False)(pool)
|
|
|
|
# Prints a nice message to let the user know the module was imported
|
|
print('feature_extractor loaded')
|
|
|
|
# Enables executing the module as a standalone script
|
|
if __name__ == "__main__":
|
|
import sys
|
|
extractFeatures(sys.argv[1], sys.argv[2], int(sys.argv[3]))
|