SlideShare a Scribd company logo
Cross-Validation-naiveBayes file:///E:/users/rkannan/cuny/fall2020/fall2020/ML-Handbook/m09-cv-r...
1 of 7 11/23/2020, 5:27 PM
Cross-Validation-naiveBayes file:///E:/users/rkannan/cuny/fall2020/fall2020/ML-Handbook/m09-cv-r...
2 of 7 11/23/2020, 5:27 PM
library('e1071')
file<-'c://Users/rk215/Data/heart.csv'
heart<-read.csv(file,head=T,sep=',',stringsAsFactors=F)
head(heart)
## age sex cp trestbps chol fbs restecg thalach exang oldpeak slope ca tha
l
## 1 63 1 3 145 233 1 0 150 0 2.3 0 0
1
## 2 37 1 2 130 250 0 1 187 0 3.5 0 0
2
## 3 41 0 1 130 204 0 0 172 0 1.4 2 0
2
## 4 56 1 1 120 236 0 1 178 0 0.8 2 0
2
## 5 57 0 0 120 354 0 1 163 1 0.6 2 0
2
## 6 57 1 0 140 192 0 1 148 0 0.4 1 0
1
## target
## 1 1
## 2 1
## 3 1
## 4 1
## 5 1
## 6 1
catheart<-heart[,c(2,3,6,7,9,11,12,13,14)]
set.seed(43)
trdidx<-sample(1:nrow(catheart),0.7*nrow(catheart),replace=F)
trcatheart<-catheart[trdidx,]
tstcatheart<-catheart[-trdidx,]
nb.model<-naiveBayes(target~.,data=trcatheart)
#str(nbtr.model)
object.size(nb.model) #11096
## 11096 bytes
nb.pred<-predict(nb.model,tstcatheart[,-c(9)],type='raw')
nb.class<-unlist(apply(round(nb.pred),1,which.max))-1
nb.tbl<-table(tstcatheart[[9]], nb.class)
nb.cfm<-caret::confusionMatrix(nb.tbl)
nb.cfm
Cross-Validation-naiveBayes file:///E:/users/rkannan/cuny/fall2020/fall2020/ML-Handbook/m09-cv-r...
3 of 7 11/23/2020, 5:27 PM
## Confusion Matrix and Statistics
##
## nb.class
## 0 1
## 0 28 12
## 1 3 48
##
## Accuracy : 0.8352
## 95% CI : (0.7427, 0.9047)
## No Information Rate : 0.6593
## P-Value [Acc > NIR] : 0.0001482
##
## Kappa : 0.6571
##
## Mcnemar's Test P-Value : 0.0388671
##
## Sensitivity : 0.9032
## Specificity : 0.8000
## Pos Pred Value : 0.7000
## Neg Pred Value : 0.9412
## Prevalence : 0.3407
## Detection Rate : 0.3077
## Detection Prevalence : 0.4396
## Balanced Accuracy : 0.8516
##
## 'Positive' Class : 0
##
start_tm <- proc.time()
N<-nrow(trcatheart)
NF=10
folds<-split(1:N,cut(1:N, quantile(1:N, probs = seq(0, 1, by =1/NF))))
length(folds)
## [1] 10
lapply(folds,length)
Cross-Validation-naiveBayes file:///E:/users/rkannan/cuny/fall2020/fall2020/ML-Handbook/m09-cv-r...
4 of 7 11/23/2020, 5:27 PM
## $`(1,22.1]`
## [1] 21
##
## $`(22.1,43.2]`
## [1] 21
##
## $`(43.2,64.3]`
## [1] 21
##
## $`(64.3,85.4]`
## [1] 21
##
## $`(85.4,106]`
## [1] 21
##
## $`(106,128]`
## [1] 21
##
## $`(128,149]`
## [1] 21
##
## $`(149,170]`
## [1] 21
##
## $`(170,191]`
## [1] 21
##
## $`(191,212]`
## [1] 22
ridx<-sample(1:nrow(trcatheart),nrow(trcatheart),replace=FALSE) # randomize
the data
cv_df<-do.call('rbind',lapply(folds,FUN=function(idx,data=trcatheart[ridx,])
{
m<-naiveBayes(target~.,data=data[-idx,]) # keep one fold for validation
p<-predict(m,data[idx,-c(9)],type='raw') # predict for that test fold
pc<-unlist(apply(round(p),1,which.max))-1
pred_tbl<-table(data[idx,c(9)],pc) #table(actual,predicted)
pred_cfm<-caret::confusionMatrix(pred_tbl)
list(fold=idx,m=m,cfm=pred_cfm) # store the fold, model,cfm
}
)) # lapply repeats over all folds
Cross-Validation-naiveBayes file:///E:/users/rkannan/cuny/fall2020/fall2020/ML-Handbook/m09-cv-r...
5 of 7 11/23/2020, 5:27 PM
cv_df<-as.data.frame(cv_df)
tstcv.perf<-as.data.frame(do.call('rbind',lapply(cv_df$cfm,FUN=function(cfm)
c(cfm$overall,cfm$byClass))))
(cv.tst.perf<-apply(tstcv.perf[tstcv.perf$AccuracyPValue<0.01,-c(6:7)],2,mea
n))
## Accuracy Kappa AccuracyLower
## 0.8683983 0.7318000 0.6545460
## AccuracyUpper AccuracyNull Sensitivity
## 0.9700452 0.5666667 0.8373377
## Specificity Pos Pred Value Neg Pred Value
## 0.8900699 0.8924825 0.8629060
## Precision Recall F1
## 0.8924825 0.8373377 0.8583395
## Prevalence Detection Rate Detection Prevalence
## 0.4523810 0.3766234 0.4324675
## Balanced Accuracy
## 0.8637038
(cv.tst.perf.var<-apply(tstcv.perf[tstcv.perf$AccuracyPValue<0.01,-c(6:7)],
2,sd))
## Accuracy Kappa AccuracyLower
## 0.06018717 0.11323819 0.07464027
## AccuracyUpper AccuracyNull Sensitivity
## 0.02365967 0.07221786 0.06971453
## Specificity Pos Pred Value Neg Pred Value
## 0.12234605 0.10504433 0.07414154
## Precision Recall F1
## 0.10504433 0.06971453 0.04174328
## Prevalence Detection Rate Detection Prevalence
## 0.08908708 0.06878895 0.12277136
## Balanced Accuracy
## 0.05126200
Cross-Validation-naiveBayes file:///E:/users/rkannan/cuny/fall2020/fall2020/ML-Handbook/m09-cv-r...
6 of 7 11/23/2020, 5:27 PM
tstcv_preds<-lapply(cv_df$m,FUN=function(M,D=tstcatheart[,-c(9)])predict(M,
D,type='raw'))
tstcv_cfm<-lapply(tstcv_preds,FUN=function(P,A=tstcatheart[[9]])
{pred_class<-unlist(apply(round(P),1,which.max))-1
pred_tbl<-table(pred_class,A)
pred_cfm<-caret::confusionMatrix(pred_tbl)
pred_cfm
})
tstcv.perf<-as.data.frame(do.call('rbind',lapply(tstcv_cfm,FUN=function(cfm)
c(cfm$overall,cfm$byClass))))
cv.tst.perf<-apply(tstcv.perf[tstcv.perf$AccuracyPValue<0.01,-c(6:7)],2,mea
n)
cv.tst.perf.var<-apply(tstcv.perf[tstcv.perf$AccuracyPValue<0.01,-c(6:7)],2,
sd)
###################
Cross-Validation-naiveBayes file:///E:/users/rkannan/cuny/fall2020/fall2020/ML-Handbook/m09-cv-r...
7 of 7 11/23/2020, 5:27 PM

More Related Content

PDF
M11 bagging loo cv
PDF
M12 random forest-part01
PDF
Chapter 2: R tutorial Handbook for Data Science and Machine Learning Practiti...
KEY
実践 memcached
PDF
Comparative Genomics with GMOD and BioPerl
PDF
Redis 101
TXT
Command
PPTX
Data manipulation and visualization in r 20190711 myanmarucsy
M11 bagging loo cv
M12 random forest-part01
Chapter 2: R tutorial Handbook for Data Science and Machine Learning Practiti...
実践 memcached
Comparative Genomics with GMOD and BioPerl
Redis 101
Command
Data manipulation and visualization in r 20190711 myanmarucsy

What's hot (17)

PDF
Bytes in the Machine: Inside the CPython interpreter
PPTX
Gevent rabbit rpc
PPTX
System Calls
PDF
Allison Kaptur: Bytes in the Machine: Inside the CPython interpreter, PyGotha...
PDF
Cooking pies with Celery
PDF
Search and Replacement Techniques in Emacs: avy, swiper, multiple-cursor, ag,...
PDF
Sangam 19 - Successful Applications on Autonomous
DOC
Capital onehadoopclass
PPTX
Detection of errors and potential vulnerabilities in C and C++ code using the...
PDF
Parallel Computing With Dask - PyDays 2017
PDF
Sangam 19 - PLSQL still the coolest
PDF
python高级内存管理
PDF
Simple Ways To Be A Better Programmer (OSCON 2007)
PDF
Playing 44CON CTF for fun and profit
PDF
Agile Database Development with JSON
PDF
Python testing-frameworks overview
PDF
PostgreSQL Procedural Languages: Tips, Tricks and Gotchas
Bytes in the Machine: Inside the CPython interpreter
Gevent rabbit rpc
System Calls
Allison Kaptur: Bytes in the Machine: Inside the CPython interpreter, PyGotha...
Cooking pies with Celery
Search and Replacement Techniques in Emacs: avy, swiper, multiple-cursor, ag,...
Sangam 19 - Successful Applications on Autonomous
Capital onehadoopclass
Detection of errors and potential vulnerabilities in C and C++ code using the...
Parallel Computing With Dask - PyDays 2017
Sangam 19 - PLSQL still the coolest
python高级内存管理
Simple Ways To Be A Better Programmer (OSCON 2007)
Playing 44CON CTF for fun and profit
Agile Database Development with JSON
Python testing-frameworks overview
PostgreSQL Procedural Languages: Tips, Tricks and Gotchas
Ad

Similar to M09-Cross validating-naive-bayes (20)

PPTX
HRFLM important engineering topics for first sem.pptx
PPTX
21049542 Nischal Khatiwada Islington.pptx
PDF
IRJET -Improving the Accuracy of the Heart Disease Prediction using Hybrid Ma...
PDF
Heart disease classification using Random Forest
PDF
238_heartdisease (1).pdf
PDF
Classification examp
PDF
project_ppt_merged.pdf
PDF
Android Based Questionnaires Application for Heart Disease Prediction System
PPTX
heart final last sem.pptx
PDF
IRJET- Human Heart Disease Prediction using Ensemble Learning and Particle Sw...
PDF
E-Healthcare monitoring System for diagnosis of Heart Disease using Machine L...
PDF
Peterson_-_Machine_Learning_Project
PDF
Fundamentals of data science presentation
DOCX
KKKK.docx5555555555555555555555555555555555
PDF
Heart Disease Prediction Using Multi Feature and Hybrid Approach
PDF
Accuracy based-stacked ensemble learning model for the prediction of coronary...
PPTX
Heart Disease Prediction Analysis - Sushil Gupta.pptx
PDF
M03 nb-02
PPTX
DIAGNOSIS OF CARDIOVASCULAR DISEASE USING MACHINE LEARNING ALGORITHMS.pptx
PPTX
ppt.pptx
HRFLM important engineering topics for first sem.pptx
21049542 Nischal Khatiwada Islington.pptx
IRJET -Improving the Accuracy of the Heart Disease Prediction using Hybrid Ma...
Heart disease classification using Random Forest
238_heartdisease (1).pdf
Classification examp
project_ppt_merged.pdf
Android Based Questionnaires Application for Heart Disease Prediction System
heart final last sem.pptx
IRJET- Human Heart Disease Prediction using Ensemble Learning and Particle Sw...
E-Healthcare monitoring System for diagnosis of Heart Disease using Machine L...
Peterson_-_Machine_Learning_Project
Fundamentals of data science presentation
KKKK.docx5555555555555555555555555555555555
Heart Disease Prediction Using Multi Feature and Hybrid Approach
Accuracy based-stacked ensemble learning model for the prediction of coronary...
Heart Disease Prediction Analysis - Sushil Gupta.pptx
M03 nb-02
DIAGNOSIS OF CARDIOVASCULAR DISEASE USING MACHINE LEARNING ALGORITHMS.pptx
ppt.pptx
Ad

More from Raman Kannan (20)

PDF
conversations-withchatGPT-Claude-gemini-on-vibration-12-112024.pdf
PDF
Essays on-civic-responsibilty
PDF
M12 boosting-part02
PDF
M10 gradient descent
PDF
M06 tree
PDF
M07 svm
PDF
M08 BiasVarianceTradeoff
PDF
Chapter 05 k nn
PDF
Chapter 04-discriminant analysis
PDF
Augmented 11022020-ieee
PDF
Chapter 02-logistic regression
PDF
Chapter01 introductory handbook
PDF
A voyage-inward-02
PDF
Evaluating classifierperformance ml-cs6923
PDF
A data scientist's study plan
PDF
Cognitive Assistants
PDF
Essay on-data-analysis
PDF
Joy of Unix
PDF
How to-run-ols-diagnostics-02
PDF
Sdr dodd frankbirdseyeview
conversations-withchatGPT-Claude-gemini-on-vibration-12-112024.pdf
Essays on-civic-responsibilty
M12 boosting-part02
M10 gradient descent
M06 tree
M07 svm
M08 BiasVarianceTradeoff
Chapter 05 k nn
Chapter 04-discriminant analysis
Augmented 11022020-ieee
Chapter 02-logistic regression
Chapter01 introductory handbook
A voyage-inward-02
Evaluating classifierperformance ml-cs6923
A data scientist's study plan
Cognitive Assistants
Essay on-data-analysis
Joy of Unix
How to-run-ols-diagnostics-02
Sdr dodd frankbirdseyeview

Recently uploaded (20)

PPT
Predictive modeling basics in data cleaning process
PDF
[EN] Industrial Machine Downtime Prediction
PPTX
Qualitative Qantitative and Mixed Methods.pptx
PPT
Reliability_Chapter_ presentation 1221.5784
PPT
Quality review (1)_presentation of this 21
PPTX
Acceptance and paychological effects of mandatory extra coach I classes.pptx
PPTX
AI Strategy room jwfjksfksfjsjsjsjsjfsjfsj
PPTX
Microsoft-Fabric-Unifying-Analytics-for-the-Modern-Enterprise Solution.pptx
PPTX
IBA_Chapter_11_Slides_Final_Accessible.pptx
PDF
Business Analytics and business intelligence.pdf
PDF
22.Patil - Early prediction of Alzheimer’s disease using convolutional neural...
PDF
Data Engineering Interview Questions & Answers Cloud Data Stacks (AWS, Azure,...
PPTX
climate analysis of Dhaka ,Banglades.pptx
PDF
Clinical guidelines as a resource for EBP(1).pdf
PPTX
Introduction to Basics of Ethical Hacking and Penetration Testing -Unit No. 1...
PDF
Optimise Shopper Experiences with a Strong Data Estate.pdf
PDF
annual-report-2024-2025 original latest.
PPTX
modul_python (1).pptx for professional and student
PPTX
iec ppt-1 pptx icmr ppt on rehabilitation.pptx
PPTX
Computer network topology notes for revision
Predictive modeling basics in data cleaning process
[EN] Industrial Machine Downtime Prediction
Qualitative Qantitative and Mixed Methods.pptx
Reliability_Chapter_ presentation 1221.5784
Quality review (1)_presentation of this 21
Acceptance and paychological effects of mandatory extra coach I classes.pptx
AI Strategy room jwfjksfksfjsjsjsjsjfsjfsj
Microsoft-Fabric-Unifying-Analytics-for-the-Modern-Enterprise Solution.pptx
IBA_Chapter_11_Slides_Final_Accessible.pptx
Business Analytics and business intelligence.pdf
22.Patil - Early prediction of Alzheimer’s disease using convolutional neural...
Data Engineering Interview Questions & Answers Cloud Data Stacks (AWS, Azure,...
climate analysis of Dhaka ,Banglades.pptx
Clinical guidelines as a resource for EBP(1).pdf
Introduction to Basics of Ethical Hacking and Penetration Testing -Unit No. 1...
Optimise Shopper Experiences with a Strong Data Estate.pdf
annual-report-2024-2025 original latest.
modul_python (1).pptx for professional and student
iec ppt-1 pptx icmr ppt on rehabilitation.pptx
Computer network topology notes for revision

M09-Cross validating-naive-bayes