Speech/Music classification of audio files using machine learning techniques.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

35 lines
1.3 KiB

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
dataset = pd.read_pickle('../preprocessing/dataset.pkl')
target = dataset.pop('target')
# Feature evaluation
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.feature_selection import SelectFromModel
clf = ExtraTreesClassifier(n_estimators=1000)
clf = clf.fit(dataset, target)
model = SelectFromModel(clf, prefit=True, max_features = 6)
print('Retaining features:')
print(dataset.columns.values[model.get_support()])
reducedDataset = pd.DataFrame(model.transform(dataset),
columns = dataset.columns.values[model.get_support()])
# Every combination of the 6 best features with length equal to 4 features
import itertools
featureCombinations = itertools.combinations(range(6), 4)
for plotIndex, subset in enumerate(featureCombinations):
featurePlot = sns.pairplot(data=(reducedDataset.iloc[:, list(subset)]).assign(target=target),
hue='target', palette='Set1', vars=reducedDataset.columns.values[list(subset)]);
featurePlot.fig.savefig("output/figure_" + str(plotIndex+1) + ".png")
# sns.relplot(x="4HzMod", y="Flat", data=dataset[["4HzMod", "Flat"]], hue = target, style = target)
# sns.jointplot(x="SLAtt", y="ZCR", data=dataset[["SLAtt", "ZCR"]]);
# plt.show()