Apostolos Fanakis
6 years ago
3 changed files with 75 additions and 1 deletions
@ -0,0 +1,16 @@ |
|||||
|
# Feature extraction |
||||
|
|
||||
|
The file `feature_extractor` is a python module that uses the open-source library [Essentia](http://essentia.upf.edu/documentation/index.html) to extract audio features from a file in the path specified in the first parameter and save the features' values to a binary file in the path specified in the second parameter. |
||||
|
|
||||
|
**Dependencies:** |
||||
|
- essentia |
||||
|
- numpy |
||||
|
- scipy |
||||
|
- matplotlib |
||||
|
|
||||
|
All dependencies are available both for python2 and python3 versions and can all be installed using the commands `pip install <package_name>` or `pip3 install <package_name>` for python2 and python3 respectively. |
||||
|
|
||||
|
The module can be imported or executed as a script using one of the following commands |
||||
|
`python feature_extractor.py <audio_file_path> <extracted_features_file_path> <audio_file_sample_rate>` |
||||
|
or |
||||
|
`python3 feature_extractor.py <audio_file_path> <extracted_features_file_path> <audio_file_sample_rate>` |
@ -0,0 +1,59 @@ |
|||||
|
import essentia |
||||
|
import essentia.standard |
||||
|
from essentia.standard import * |
||||
|
import essentia.streaming |
||||
|
from pylab import plot, show, figure, imshow |
||||
|
import matplotlib.pyplot as plt |
||||
|
|
||||
|
def extractFeatures(audioPath, outputPath, sampleRate): |
||||
|
# Loads the audio file specified |
||||
|
loader = essentia.standard.MonoLoader(filename = audioPath, sampleRate = sampleRate) |
||||
|
audio = loader() |
||||
|
|
||||
|
# Sets up the functions that will be used |
||||
|
# TODO check if zero phase windowing is something we might want |
||||
|
window = Windowing(normalized = False, size = 6144, type = 'hamming', |
||||
|
zeroPhase = False) |
||||
|
spectrum = Spectrum() |
||||
|
mfcc = MFCC(inputSize = 6144, sampleRate = sampleRate) |
||||
|
zcr = ZeroCrossingRate() |
||||
|
sc = SpectralCentroidTime(sampleRate = sampleRate) |
||||
|
sr = RollOff(sampleRate = sampleRate) |
||||
|
sf = Flux() |
||||
|
|
||||
|
# Creates a pool to collect the values of the features |
||||
|
pool = essentia.Pool() |
||||
|
|
||||
|
# Slices the signal into frames |
||||
|
for frame in FrameGenerator(audio, frameSize = 6144, hopSize = 3072, |
||||
|
startFromZero = True , validFrameThresholdRatio = 0.7): |
||||
|
# Applies a window function to the frame |
||||
|
windowedFrame = window(frame) |
||||
|
|
||||
|
# Computes time domain features |
||||
|
frameZCR = zcr(windowedFrame) |
||||
|
frameSC = sc(windowedFrame) |
||||
|
|
||||
|
# Computes spectral features |
||||
|
frameSpectrum = spectrum(windowedFrame) |
||||
|
frameSR = sr(frameSpectrum) |
||||
|
frameSF = sf(frameSpectrum) |
||||
|
# Discards the bands |
||||
|
mfcc_coeffs = mfcc(frameSpectrum)[1] |
||||
|
|
||||
|
# Adds the values to the pool |
||||
|
pool.add('ZCR', frameZCR) |
||||
|
pool.add('SC', frameSC) |
||||
|
pool.add('SR', frameSR) |
||||
|
pool.add('SF', frameSF) |
||||
|
pool.add('mfcc', mfcc_coeffs) |
||||
|
|
||||
|
YamlOutput(filename = outputPath, format = 'json', writeVersion = False)(pool) |
||||
|
|
||||
|
# Prints a nice message to let the user know the module was imported |
||||
|
print('feature_extractor loaded') |
||||
|
|
||||
|
# Enables executing the module as a standalone script |
||||
|
if __name__ == "__main__": |
||||
|
import sys |
||||
|
extractFeatures(sys.argv[1], sys.argv[2], int(sys.argv[3])) |
@ -1 +0,0 @@ |
|||||
hello world |
|
Loading…
Reference in new issue