|
|
|
import numpy as np
|
|
|
|
import pandas as pd
|
|
|
|
from feature_extraction.feature_extractor import extractFeatures
|
|
|
|
from feature_extraction.batch_feature_extractor import batchExtract
|
|
|
|
from preprocessing.data_preprocessing import arrayFromJSON, standardization, PCA
|
|
|
|
from training.model_training import simpleTrain, kFCrossValid
|
|
|
|
|
|
|
|
musicFeatures = batchExtract('../dataset/music_wav/', 'feature_extraction/music_features/', 22050)
|
|
|
|
musicFeatures = musicFeatures.assign(target=0)
|
|
|
|
speechFeatures = batchExtract('../dataset/speech_wav/', 'feature_extraction/speech_features/', 22050)
|
|
|
|
speechFeatures = speechFeatures.assign(target=1)
|
|
|
|
|
|
|
|
dataset = pd.concat([musicFeatures, speechFeatures])
|
|
|
|
target = dataset.pop('target').values
|
|
|
|
|
|
|
|
dataset = standardization(dataset)
|
|
|
|
# _, dataset = PCA(dataset)
|
|
|
|
|
|
|
|
print('Simple train accuracy achieved = ' + str(simpleTrain(dataset, target)))
|
|
|
|
kFCrossValid(dataset, target, model = 'svm')
|
|
|
|
clf = kFCrossValid(dataset, target, model = 'rndForest')
|
|
|
|
|
|
|
|
features = extractFeatures('compined.wav', 'tmp.json', 22050)
|
|
|
|
features = standardization(features)
|
|
|
|
audioClass = clf.predict(features)
|
|
|
|
print(audioClass)
|