Speech/Music classification of audio files using machine learning techniques.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

109 lines
2.3 KiB

'''
Created on Apr 25, 2016
test code
@author: Wenqiang Feng
'''
import pandas as pd
#import numpy as np
import matplotlib.pyplot as plt
from pandas.tools.plotting import scatter_matrix
from docutils.parsers.rst.directives import path
if __name__ == '__main__':
path ='~/Dropbox/MachineLearningAlgorithms/python_code/data/Heart.csv'
rawdata = pd.read_csv(path)
print "data summary"
print rawdata.describe()
# summary plot of the data
scatter_matrix(rawdata,figsize=[15,15])
plt.show()
# Histogram
rawdata.hist()
plt.show()
# boxplot
pd.DataFrame.boxplot(rawdata)
plt.show()
print "Raw data size"
nrow, ncol = rawdata.shape
print nrow, ncol
path = ('/home/feng/Dropbox/MachineLearningAlgorithms/python_code/data/'
'energy_efficiency.xlsx')
path
rawdataEnergy= pd.read_excel(path,sheetname=0)
nrow=rawdata.shape[0] #gives number of row count
ncol=rawdata.shape[1] #gives number of col count
print nrow, ncol
col_names = rawdata.columns.tolist()
print "Column names:"
print col_names
print "Data Format:"
print rawdata.dtypes
print "\nSample data:"
print(rawdata.head(6))
print "\n correlation Matrix"
print rawdata.corr()
# cocorrelation Matrix plot
pd.DataFrame.corr(rawdata)
plt.show()
print "\n covariance Matrix"
print rawdata.cov()
print rawdata[['Age','Ca']].corr()
pd.DataFrame.corr(rawdata)
plt.show()
# define colors list, to be used to plot survived either red (=0) or green (=1)
colors=['red','green']
# make a scatter plot
# rawdata.info()
from scipy import stats
import seaborn as sns # just a conventional alias, don't know why
sns.corrplot(rawdata) # compute and plot the pair-wise correlations
# save to file, remove the big white borders
#plt.savefig('attribute_correlations.png', tight_layout=True)
plt.show()
attr = rawdata['Age']
sns.distplot(attr)
plt.show()
sns.distplot(attr, kde=False, fit=stats.gamma);
plt.show()
# Two subplots, the axes array is 1-d
plt.figure(1)
plt.title('Histogram of Age')
plt.subplot(211) # 21,1 means first one of 2 rows, 1 col
sns.distplot(attr)
plt.subplot(212) # 21,2 means second one of 2 rows, 1 col
sns.distplot(attr, kde=False, fit=stats.gamma);
plt.show()