import numpy as np class bcolors: BLUE = '\033[94m' GREEN = '\033[92m' YELLOW = '\033[93m' RED = '\033[91m' ENDC = '\033[0m' def simpleTrain(dataset, target, model='all'): from sklearn.model_selection import train_test_split trainingSet, testSet, trainingTarget, testTarget = train_test_split(dataset, target, test_size=0.4, random_state=0) if model == 'svm' or model == 'all': # SVM training from sklearn.svm import SVC clf = SVC(gamma='scale') clf.fit(trainingSet, trainingTarget) svmAccuracy = clf.score(testSet, testTarget) if model == 'dtree' or model == 'all': # Decision tree from sklearn import tree clf = tree.DecisionTreeClassifier() clf.fit(trainingSet, trainingTarget) dtreeAccuracy = clf.score(testSet, testTarget) if model == 'nn' or model == 'all': # Multi-layer Perceptron from sklearn.neural_network import MLPClassifier clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 3), random_state=2) clf.fit(trainingSet, trainingTarget) nnAccuracy = clf.score(testSet, testTarget) if model == 'bayes' or model == 'all': # Naive Bayes from sklearn.naive_bayes import GaussianNB clf = GaussianNB() clf.fit(trainingSet, trainingTarget) bayesAccuracy = clf.score(testSet, testTarget) if model == 'all': return max([svmAccuracy, dtreeAccuracy, nnAccuracy, bayesAccuracy]) elif model == 'svm': return svmAccuracy elif model == 'dtree': return dtreeAccuracy elif model == 'nn': return nnAccuracy elif model == 'bayes': return bayesAccuracy def kFCrossValid(dataset, target, model = 'svm'): from sklearn.model_selection import cross_val_score from sklearn import metrics clf = None if model == 'svm': # SVM training from sklearn.svm import SVC clf = SVC(gamma='scale') elif model == 'dtree': # Decision tree from sklearn import tree clf = tree.DecisionTreeClassifier() elif model == 'nn': # Multi-layer Perceptron from sklearn.neural_network import MLPClassifier clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 3), random_state=2) elif model == 'bayes': # Naive Bayes from sklearn.naive_bayes import GaussianNB clf = GaussianNB() else: print('Error. model specified not supported') return None from sklearn.model_selection import KFold kf = KFold(n_splits=5, shuffle=True, random_state=2) for k, (train_index, test_index) in enumerate(kf.split(dataset)): kTrainSet, kTestSet = dataset[train_index], dataset[test_index] kTrainTarget, kTestTarget = target[train_index], target[test_index] clf.fit(kTrainSet, kTrainTarget) print("[fold {0}], score: {1:.2f}".format(k, 100*clf.score(kTestSet, kTestTarget))) # Prints a nice message to let the user know the module was imported print(bcolors.BLUE + 'model_training loaded' + bcolors.ENDC) # Enables executing the module as a standalone script if __name__ == "__main__": import sys dataset = np.load(sys.argv[1] + 'dataset.npy') target = np.load(sys.argv[1] + 'target.npy') featureKeys = np.load(sys.argv[1] + 'featureKeys.npy') # simpleTrain(dataset, target) kFCrossValid(dataset, target, 'svm')