Apostolos Fanakis
6 years ago
3 changed files with 75 additions and 1 deletions
@ -0,0 +1,16 @@ |
|||
# Feature extraction |
|||
|
|||
The file `feature_extractor` is a python module that uses the open-source library [Essentia](http://essentia.upf.edu/documentation/index.html) to extract audio features from a file in the path specified in the first parameter and save the features' values to a binary file in the path specified in the second parameter. |
|||
|
|||
**Dependencies:** |
|||
- essentia |
|||
- numpy |
|||
- scipy |
|||
- matplotlib |
|||
|
|||
All dependencies are available both for python2 and python3 versions and can all be installed using the commands `pip install <package_name>` or `pip3 install <package_name>` for python2 and python3 respectively. |
|||
|
|||
The module can be imported or executed as a script using one of the following commands |
|||
`python feature_extractor.py <audio_file_path> <extracted_features_file_path> <audio_file_sample_rate>` |
|||
or |
|||
`python3 feature_extractor.py <audio_file_path> <extracted_features_file_path> <audio_file_sample_rate>` |
@ -0,0 +1,59 @@ |
|||
import essentia |
|||
import essentia.standard |
|||
from essentia.standard import * |
|||
import essentia.streaming |
|||
from pylab import plot, show, figure, imshow |
|||
import matplotlib.pyplot as plt |
|||
|
|||
def extractFeatures(audioPath, outputPath, sampleRate): |
|||
# Loads the audio file specified |
|||
loader = essentia.standard.MonoLoader(filename = audioPath, sampleRate = sampleRate) |
|||
audio = loader() |
|||
|
|||
# Sets up the functions that will be used |
|||
# TODO check if zero phase windowing is something we might want |
|||
window = Windowing(normalized = False, size = 6144, type = 'hamming', |
|||
zeroPhase = False) |
|||
spectrum = Spectrum() |
|||
mfcc = MFCC(inputSize = 6144, sampleRate = sampleRate) |
|||
zcr = ZeroCrossingRate() |
|||
sc = SpectralCentroidTime(sampleRate = sampleRate) |
|||
sr = RollOff(sampleRate = sampleRate) |
|||
sf = Flux() |
|||
|
|||
# Creates a pool to collect the values of the features |
|||
pool = essentia.Pool() |
|||
|
|||
# Slices the signal into frames |
|||
for frame in FrameGenerator(audio, frameSize = 6144, hopSize = 3072, |
|||
startFromZero = True , validFrameThresholdRatio = 0.7): |
|||
# Applies a window function to the frame |
|||
windowedFrame = window(frame) |
|||
|
|||
# Computes time domain features |
|||
frameZCR = zcr(windowedFrame) |
|||
frameSC = sc(windowedFrame) |
|||
|
|||
# Computes spectral features |
|||
frameSpectrum = spectrum(windowedFrame) |
|||
frameSR = sr(frameSpectrum) |
|||
frameSF = sf(frameSpectrum) |
|||
# Discards the bands |
|||
mfcc_coeffs = mfcc(frameSpectrum)[1] |
|||
|
|||
# Adds the values to the pool |
|||
pool.add('ZCR', frameZCR) |
|||
pool.add('SC', frameSC) |
|||
pool.add('SR', frameSR) |
|||
pool.add('SF', frameSF) |
|||
pool.add('mfcc', mfcc_coeffs) |
|||
|
|||
YamlOutput(filename = outputPath, format = 'json', writeVersion = False)(pool) |
|||
|
|||
# Prints a nice message to let the user know the module was imported |
|||
print('feature_extractor loaded') |
|||
|
|||
# Enables executing the module as a standalone script |
|||
if __name__ == "__main__": |
|||
import sys |
|||
extractFeatures(sys.argv[1], sys.argv[2], int(sys.argv[3])) |
@ -1 +0,0 @@ |
|||
hello world |
Loading…
Reference in new issue