import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns sns.set() dataset = pd.read_pickle('../preprocessing/dataset.pkl') target = dataset.pop('target') # Feature evaluation from sklearn.ensemble import ExtraTreesClassifier from sklearn.feature_selection import SelectFromModel clf = ExtraTreesClassifier(n_estimators=1000) clf = clf.fit(dataset, target) model = SelectFromModel(clf, prefit=True, max_features = 6) print('Retaining features:') print(dataset.columns.values[model.get_support()]) reducedDataset = pd.DataFrame(model.transform(dataset), columns = dataset.columns.values[model.get_support()]) # Every combination of the 6 best features with length equal to 4 features import itertools featureCombinations = itertools.combinations(range(6), 4) for plotIndex, subset in enumerate(featureCombinations): featurePlot = sns.pairplot(data=(reducedDataset.iloc[:, list(subset)]).assign(target=target), hue='target', palette='Set1', vars=reducedDataset.columns.values[list(subset)]); featurePlot.fig.savefig("output/figure_" + str(plotIndex+1) + ".png") # sns.relplot(x="4HzMod", y="Flat", data=dataset[["4HzMod", "Flat"]], hue = target, style = target) # sns.jointplot(x="SLAtt", y="ZCR", data=dataset[["SLAtt", "ZCR"]]); # plt.show()