Speech/Music classification of audio files using machine learning techniques.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

59 lines
1.9 KiB

import essentia
import essentia.standard
from essentia.standard import *
import essentia.streaming
from pylab import plot, show, figure, imshow
import matplotlib.pyplot as plt
def extractFeatures(audioPath, outputPath, sampleRate):
# Loads the audio file specified
loader = essentia.standard.MonoLoader(filename = audioPath, sampleRate = sampleRate)
audio = loader()
# Sets up the functions that will be used
# TODO check if zero phase windowing is something we might want
window = Windowing(normalized = False, size = 6144, type = 'hamming',
zeroPhase = False)
spectrum = Spectrum()
mfcc = MFCC(inputSize = 6144, sampleRate = sampleRate)
zcr = ZeroCrossingRate()
sc = SpectralCentroidTime(sampleRate = sampleRate)
sr = RollOff(sampleRate = sampleRate)
sf = Flux()
# Creates a pool to collect the values of the features
pool = essentia.Pool()
# Slices the signal into frames
for frame in FrameGenerator(audio, frameSize = 6144, hopSize = 3072,
startFromZero = True , validFrameThresholdRatio = 0.7):
# Applies a window function to the frame
windowedFrame = window(frame)
# Computes time domain features
frameZCR = zcr(windowedFrame)
frameSC = sc(windowedFrame)
# Computes spectral features
frameSpectrum = spectrum(windowedFrame)
frameSR = sr(frameSpectrum)
frameSF = sf(frameSpectrum)
# Discards the bands
mfcc_coeffs = mfcc(frameSpectrum)[1]
# Adds the values to the pool
pool.add('ZCR', frameZCR)
pool.add('SC', frameSC)
pool.add('SR', frameSR)
pool.add('SF', frameSF)
pool.add('mfcc', mfcc_coeffs)
YamlOutput(filename = outputPath, format = 'json', writeVersion = False)(pool)
# Prints a nice message to let the user know the module was imported
print('feature_extractor loaded')
# Enables executing the module as a standalone script
if __name__ == "__main__":
import sys
extractFeatures(sys.argv[1], sys.argv[2], int(sys.argv[3]))