diff --git a/classifier/README.md b/classifier/README.md new file mode 100644 index 0000000..d507e21 --- /dev/null +++ b/classifier/README.md @@ -0,0 +1,16 @@ +# Feature extraction + +The file `feature_extractor` is a python module that uses the open-source library [Essentia](http://essentia.upf.edu/documentation/index.html) to extract audio features from a file in the path specified in the first parameter and save the features' values to a binary file in the path specified in the second parameter. + +**Dependencies:** +- essentia +- numpy +- scipy +- matplotlib + +All dependencies are available both for python2 and python3 versions and can all be installed using the commands `pip install ` or `pip3 install ` for python2 and python3 respectively. + +The module can be imported or executed as a script using one of the following commands +`python feature_extractor.py ` +or +`python3 feature_extractor.py ` \ No newline at end of file diff --git a/classifier/feature_extractor.py b/classifier/feature_extractor.py new file mode 100644 index 0000000..ef09ffe --- /dev/null +++ b/classifier/feature_extractor.py @@ -0,0 +1,59 @@ +import essentia +import essentia.standard +from essentia.standard import * +import essentia.streaming +from pylab import plot, show, figure, imshow +import matplotlib.pyplot as plt + +def extractFeatures(audioPath, outputPath, sampleRate): + # Loads the audio file specified + loader = essentia.standard.MonoLoader(filename = audioPath, sampleRate = sampleRate) + audio = loader() + + # Sets up the functions that will be used + # TODO check if zero phase windowing is something we might want + window = Windowing(normalized = False, size = 6144, type = 'hamming', + zeroPhase = False) + spectrum = Spectrum() + mfcc = MFCC(inputSize = 6144, sampleRate = sampleRate) + zcr = ZeroCrossingRate() + sc = SpectralCentroidTime(sampleRate = sampleRate) + sr = RollOff(sampleRate = sampleRate) + sf = Flux() + + # Creates a pool to collect the values of the features + pool = essentia.Pool() + + # Slices the signal into frames + for frame in FrameGenerator(audio, frameSize = 6144, hopSize = 3072, + startFromZero = True , validFrameThresholdRatio = 0.7): + # Applies a window function to the frame + windowedFrame = window(frame) + + # Computes time domain features + frameZCR = zcr(windowedFrame) + frameSC = sc(windowedFrame) + + # Computes spectral features + frameSpectrum = spectrum(windowedFrame) + frameSR = sr(frameSpectrum) + frameSF = sf(frameSpectrum) + # Discards the bands + mfcc_coeffs = mfcc(frameSpectrum)[1] + + # Adds the values to the pool + pool.add('ZCR', frameZCR) + pool.add('SC', frameSC) + pool.add('SR', frameSR) + pool.add('SF', frameSF) + pool.add('mfcc', mfcc_coeffs) + + YamlOutput(filename = outputPath, format = 'json', writeVersion = False)(pool) + +# Prints a nice message to let the user know the module was imported +print('feature_extractor loaded') + +# Enables executing the module as a standalone script +if __name__ == "__main__": + import sys + extractFeatures(sys.argv[1], sys.argv[2], int(sys.argv[3])) \ No newline at end of file diff --git a/classifier/hello there.txt b/classifier/hello there.txt deleted file mode 100644 index 3b18e51..0000000 --- a/classifier/hello there.txt +++ /dev/null @@ -1 +0,0 @@ -hello world