83 lines
1.6 KiB
83 lines
1.6 KiB
rm(list = ls())
|
|
# set the enverionment
|
|
path ='~/Dropbox/MachineLearningAlgorithms/python_code/data/Heart.csv'
|
|
rawdata = read.csv(path)
|
|
|
|
# summary of the data
|
|
summary(rawdata)
|
|
# plot of the summary
|
|
plot(rawdata)
|
|
|
|
dim(rawdata)
|
|
head(rawdata)
|
|
tail(rawdata)
|
|
|
|
colnames(rawdata)
|
|
attach(rawdata)
|
|
|
|
# get numerical data and remove NAN
|
|
numdata=na.omit(rawdata[,c(1:2,4:12)])
|
|
|
|
cor(numdata)
|
|
cov(numdata)
|
|
|
|
dev.off()
|
|
# laod cocorrelation Matrix plot lib
|
|
library(corrplot)
|
|
M <- cor(numdata)
|
|
#par(mfrow =c (1,2))
|
|
#corrplot(M, method = "square")
|
|
corrplot.mixed(M)
|
|
|
|
|
|
nrow=nrow(rawdata)
|
|
ncol=ncol(rawdata)
|
|
c(nrow, ncol)
|
|
|
|
|
|
|
|
Nvars=ncol(numdata)
|
|
# checking data format
|
|
typeof(rawdata)
|
|
install.packages("mlbench")
|
|
library(mlbench)
|
|
sapply(rawdata, class)
|
|
|
|
dev.off()
|
|
name=colnames(numdata)
|
|
Nvars=ncol(numdata)
|
|
# boxplot
|
|
par(mfrow =c (4,3))
|
|
for (i in 1:Nvars)
|
|
{
|
|
#boxplot(numdata[,i]~numdata[,Nvars],data=data,main=name[i])
|
|
boxplot(numdata[,i],data=numdata,main=name[i])
|
|
}
|
|
|
|
# Histogram with normal curve plot
|
|
dev.off()
|
|
Nvars=ncol(numdata)
|
|
name=colnames(numdata)
|
|
par(mfrow =c (3,5))
|
|
for (i in 1:Nvars)
|
|
{
|
|
x<- numdata[,i]
|
|
h<-hist(x, breaks=10, freq=TRUE, col="blue", xlab=name[i],main=" ",
|
|
font.lab=1)
|
|
axis(1, tck=1, col.ticks="light gray")
|
|
axis(1, tck=-0.015, col.ticks="black")
|
|
axis(2, tck=1, col.ticks="light gray", lwd.ticks="1")
|
|
axis(2, tck=-0.015)
|
|
xfit<-seq(min(x),max(x),length=40)
|
|
yfit<-dnorm(xfit,mean=mean(x),sd=sd(x))
|
|
yfit <- yfit*diff(h$mids[1:2])*length(x)
|
|
lines(xfit, yfit, col="blue", lwd=2)
|
|
}
|
|
|
|
|
|
library(reshape2)
|
|
library(ggplot2)
|
|
d <- melt(diamonds[,-c(2:4)])
|
|
ggplot(d,aes(x = value)) +
|
|
facet_wrap(~variable,scales = "free_x") +
|
|
geom_histogram()
|
|
|