rm(list = ls()) # set the enverionment path ='~/Dropbox/MachineLearningAlgorithms/python_code/data/Heart.csv' rawdata = read.csv(path) # summary of the data summary(rawdata) # plot of the summary plot(rawdata) dim(rawdata) head(rawdata) tail(rawdata) colnames(rawdata) attach(rawdata) # get numerical data and remove NAN numdata=na.omit(rawdata[,c(1:2,4:12)]) cor(numdata) cov(numdata) dev.off() # laod cocorrelation Matrix plot lib library(corrplot) M <- cor(numdata) #par(mfrow =c (1,2)) #corrplot(M, method = "square") corrplot.mixed(M) nrow=nrow(rawdata) ncol=ncol(rawdata) c(nrow, ncol) Nvars=ncol(numdata) # checking data format typeof(rawdata) install.packages("mlbench") library(mlbench) sapply(rawdata, class) dev.off() name=colnames(numdata) Nvars=ncol(numdata) # boxplot par(mfrow =c (4,3)) for (i in 1:Nvars) { #boxplot(numdata[,i]~numdata[,Nvars],data=data,main=name[i]) boxplot(numdata[,i],data=numdata,main=name[i]) } # Histogram with normal curve plot dev.off() Nvars=ncol(numdata) name=colnames(numdata) par(mfrow =c (3,5)) for (i in 1:Nvars) { x<- numdata[,i] h<-hist(x, breaks=10, freq=TRUE, col="blue", xlab=name[i],main=" ", font.lab=1) axis(1, tck=1, col.ticks="light gray") axis(1, tck=-0.015, col.ticks="black") axis(2, tck=1, col.ticks="light gray", lwd.ticks="1") axis(2, tck=-0.015) xfit<-seq(min(x),max(x),length=40) yfit<-dnorm(xfit,mean=mean(x),sd=sd(x)) yfit <- yfit*diff(h$mids[1:2])*length(x) lines(xfit, yfit, col="blue", lwd=2) } library(reshape2) library(ggplot2) d <- melt(diamonds[,-c(2:4)]) ggplot(d,aes(x = value)) + facet_wrap(~variable,scales = "free_x") + geom_histogram()