Init feature_extractor

6 years ago · bff1dad95c
3 changed files with 75 additions and 1 deletions
--- a/classifier/README.md
+++ b/classifier/README.md
@ -0,0 +1,16 @@
+# Feature extraction
+
+The file `feature_extractor` is a python module that uses the open-source library [Essentia](http://essentia.upf.edu/documentation/index.html) to extract audio features from a file in the path specified in the first parameter and save the features' values to a binary file in the path specified in the second parameter.
+
+**Dependencies:**
+- essentia
+- numpy
+- scipy
+- matplotlib
+
+All dependencies are available both for python2 and python3 versions and can all be installed using the commands `pip install <package_name>` or `pip3 install <package_name>` for python2 and python3 respectively.
+
+The module can be imported or executed as a script using one of the following commands
+`python feature_extractor.py <audio_file_path> <extracted_features_file_path> <audio_file_sample_rate>`
+or
+`python3 feature_extractor.py <audio_file_path> <extracted_features_file_path> <audio_file_sample_rate>`
--- a/classifier/feature_extractor.py
+++ b/classifier/feature_extractor.py
@ -0,0 +1,59 @@
+import essentia
+import essentia.standard
+from essentia.standard import *
+import essentia.streaming
+from pylab import plot, show, figure, imshow
+import matplotlib.pyplot as plt
+
+def extractFeatures(audioPath, outputPath, sampleRate):
+	# Loads the audio file specified
+	loader = essentia.standard.MonoLoader(filename = audioPath, sampleRate = sampleRate)
+	audio = loader()
+
+	# Sets up the functions that will be used
+	# TODO check if zero phase windowing is something we might want
+	window = Windowing(normalized = False, size = 6144, type = 'hamming',
+		zeroPhase = False)
+	spectrum = Spectrum()
+	mfcc = MFCC(inputSize = 6144, sampleRate = sampleRate)
+	zcr = ZeroCrossingRate()
+	sc = SpectralCentroidTime(sampleRate = sampleRate)
+	sr = RollOff(sampleRate = sampleRate)
+	sf = Flux()
+
+	# Creates a pool to collect the values of the features
+	pool = essentia.Pool()
+
+	# Slices the signal into frames
+	for frame in FrameGenerator(audio, frameSize = 6144, hopSize = 3072,
+		startFromZero = True , validFrameThresholdRatio = 0.7):
+		# Applies a window function to the frame
+		windowedFrame = window(frame)
+
+		# Computes time domain features
+		frameZCR = zcr(windowedFrame)
+		frameSC = sc(windowedFrame)
+
+		# Computes spectral features
+		frameSpectrum = spectrum(windowedFrame)
+		frameSR = sr(frameSpectrum)
+		frameSF = sf(frameSpectrum)
+		# Discards the bands
+		mfcc_coeffs = mfcc(frameSpectrum)[1]
+
+		# Adds the values to the pool
+		pool.add('ZCR', frameZCR)
+		pool.add('SC', frameSC)
+		pool.add('SR', frameSR)
+		pool.add('SF', frameSF)
+		pool.add('mfcc', mfcc_coeffs)
+
+	YamlOutput(filename = outputPath, format = 'json', writeVersion = False)(pool)
+
+# Prints a nice message to let the user know the module was imported
+print('feature_extractor loaded')
+
+# Enables executing the module as a standalone script
+if __name__ == "__main__":
+	import sys
+	extractFeatures(sys.argv[1], sys.argv[2], int(sys.argv[3]))
--- a/classifier/hello
+++ b/classifier/hello
@ -1 +0,0 @@
-hello world