Init feature_extractor

7 years ago · bff1dad95c
3 changed files with 75 additions and 1 deletions
--- a/classifier/README.md
+++ b/classifier/README.md
@ -0,0 +1,16 @@
 # Feature extraction
 The file `feature_extractor` is a python module that uses the open-source library [Essentia](http://essentia.upf.edu/documentation/index.html) to extract audio features from a file in the path specified in the first parameter and save the features' values to a binary file in the path specified in the second parameter.
 **Dependencies:**
 - essentia
 - numpy
 - scipy
 - matplotlib
 All dependencies are available both for python2 and python3 versions and can all be installed using the commands `pip install <package_name>` or `pip3 install <package_name>` for python2 and python3 respectively.
 The module can be imported or executed as a script using one of the following commands
 `python feature_extractor.py <audio_file_path> <extracted_features_file_path> <audio_file_sample_rate>`
 or
 `python3 feature_extractor.py <audio_file_path> <extracted_features_file_path> <audio_file_sample_rate>`
--- a/classifier/feature_extractor.py
+++ b/classifier/feature_extractor.py
@ -0,0 +1,59 @@
 import essentia
 import essentia.standard
 from essentia.standard import *
 import essentia.streaming
 from pylab import plot, show, figure, imshow
 import matplotlib.pyplot as plt
 def extractFeatures(audioPath, outputPath, sampleRate):
 	# Loads the audio file specified
 	loader = essentia.standard.MonoLoader(filename = audioPath, sampleRate = sampleRate)
 	audio = loader()
 	# Sets up the functions that will be used
 	# TODO check if zero phase windowing is something we might want
 	window = Windowing(normalized = False, size = 6144, type = 'hamming',
 		zeroPhase = False)
 	spectrum = Spectrum()
 	mfcc = MFCC(inputSize = 6144, sampleRate = sampleRate)
 	zcr = ZeroCrossingRate()
 	sc = SpectralCentroidTime(sampleRate = sampleRate)
 	sr = RollOff(sampleRate = sampleRate)
 	sf = Flux()
 	# Creates a pool to collect the values of the features
 	pool = essentia.Pool()
 	# Slices the signal into frames
 	for frame in FrameGenerator(audio, frameSize = 6144, hopSize = 3072,
 		startFromZero = True , validFrameThresholdRatio = 0.7):
 		# Applies a window function to the frame
 		windowedFrame = window(frame)
 		# Computes time domain features
 		frameZCR = zcr(windowedFrame)
 		frameSC = sc(windowedFrame)
 		# Computes spectral features
 		frameSpectrum = spectrum(windowedFrame)
 		frameSR = sr(frameSpectrum)
 		frameSF = sf(frameSpectrum)
 		# Discards the bands
 		mfcc_coeffs = mfcc(frameSpectrum)[1]
 		# Adds the values to the pool
 		pool.add('ZCR', frameZCR)
 		pool.add('SC', frameSC)
 		pool.add('SR', frameSR)
 		pool.add('SF', frameSF)
 		pool.add('mfcc', mfcc_coeffs)
 	YamlOutput(filename = outputPath, format = 'json', writeVersion = False)(pool)
 # Prints a nice message to let the user know the module was imported
 print('feature_extractor loaded')
 # Enables executing the module as a standalone script
 if __name__ == "__main__":
 	import sys
 	extractFeatures(sys.argv[1], sys.argv[2], int(sys.argv[3]))
--- a/classifier/hello
+++ b/classifier/hello
@ -1 +0,0 @@
 hello world