Speech/Music classification of audio files using machine learning techniques.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

98 lines
3.3 KiB

# import essentia.standard
import essentia
from essentia.standard import (MonoLoader, Windowing, Spectrum, MFCC,
ZeroCrossingRate, SpectralCentroidTime, RollOff, Flux, Envelope,
FlatnessSFX, LogAttackTime, StrongDecay, FlatnessDB, HFC, LPC,
SpectralComplexity, FrameGenerator, YamlOutput)
# Disable annoying info level logging
essentia.log.infoActive = False
def extractFeatures(audioPath, outputPath, sampleRate):
# Loads the audio file specified
loader = MonoLoader(filename = audioPath, sampleRate = sampleRate)
audio = loader()
# Sets up the functions that will be used
# TODO check if zero phase windowing is something we might want
window = Windowing(normalized = False, size = 6144, type = 'hamming',
zeroPhase = False)
spectrum = Spectrum()
mfcc = MFCC(inputSize = 6144, sampleRate = sampleRate)
zcr = ZeroCrossingRate()
sc = SpectralCentroidTime(sampleRate = sampleRate)
sr = RollOff(sampleRate = sampleRate)
sf = Flux()
env = Envelope(attackTime = 2, releaseTime = 300, sampleRate = sampleRate)
flat = FlatnessSFX()
logAtt = LogAttackTime(sampleRate = sampleRate)
strDec = StrongDecay(sampleRate = sampleRate)
flatDB = FlatnessDB()
hfc = HFC(sampleRate = sampleRate)
lpc = LPC(sampleRate = sampleRate)
spcComp = SpectralComplexity(sampleRate = sampleRate, magnitudeThreshold = 2)
# Creates a pool to collect the values of the features
pool = essentia.Pool()
# Slices the signal into frames
for frame in FrameGenerator(audio, frameSize = 6144, hopSize = 3072,
startFromZero = True , validFrameThresholdRatio = 0.7):
# Applies a window function to the frame
windowedFrame = window(frame)
# Computes time domain features
frameZCR = zcr(windowedFrame)
frameSC = sc(windowedFrame)
frameEFlatness = flat(env(windowedFrame))
frameLogAtt = logAtt(env(windowedFrame))[1]
frameStrDec = strDec(windowedFrame)
frameLPC, frameReflection = lpc(windowedFrame)
# Computes spectral features
frameSpectrum = spectrum(windowedFrame)
frameSR = sr(frameSpectrum)
frameSF = sf(frameSpectrum)
frameSEFlatness = flat(env(frameSpectrum))
frameSLogAtt = logAtt(env(frameSpectrum))[1]
frameSStrDec = strDec(frameSpectrum)
frameSFlat = flatDB(frameSpectrum)
frameHFC = hfc(frameSpectrum)
frameSComp = spcComp(frameSpectrum)
# Computes cepstral features
# Discards the bands
mfcc_coeffs = mfcc(frameSpectrum)[1]
# Adds the values to the pool
pool.add('ZCR', frameZCR)
pool.add('SC', frameSC)
pool.add('Flat', frameEFlatness)
pool.add('LAtt', frameLogAtt)
pool.add('SDec', frameStrDec)
for index, coef in enumerate(frameLPC[1:frameLPC.size]):
pool.add('LPC' + str(index + 1), coef)
for index, coef in enumerate(frameReflection):
pool.add('REFL' + str(index), coef)
pool.add('SR', frameSR)
pool.add('SF', frameSF)
pool.add('SEFlat', frameSEFlatness)
pool.add('SFlat', frameSFlat)
pool.add('SLAtt', frameSLogAtt)
pool.add('SSDec', frameSStrDec)
pool.add('HFC', frameHFC)
pool.add('SComp', frameSComp)
for index, coef in enumerate(mfcc_coeffs):
pool.add('mfcc' + str(index), coef)
YamlOutput(filename = outputPath, format = 'json', writeVersion = False)(pool)
# Prints a nice message to let the user know the module was imported
print('feature_extractor loaded')
# Enables executing the module as a standalone script
if __name__ == "__main__":
import sys
extractFeatures(sys.argv[1], sys.argv[2], int(sys.argv[3]))