file<-'c://Users/rk215/Data/heart.csv'
heart<-read.csv(file,head=T,sep=',',stringsAsFactors=F)
unlist(lapply(names(heart),FUN=function(x,data=heart){c(cname=x,uvalfreq=len
gth(unique(data[[x]])))}))
M12-RandomForest file:///E:/users/rkannan/cuny/fall2020/fall2020/ML-Handbook/m12-rFore...
1 of 14 11/23/2020, 5:44 PM
## cname uvalfreq cname uvalfreq cname uvalfreq cn
ame
## "age" "41" "sex" "2" "cp" "4" "trestb
ps"
## uvalfreq cname uvalfreq cname uvalfreq cname uvalf
req
## "49" "chol" "152" "fbs" "2" "restecg"
"3"
## cname uvalfreq cname uvalfreq cname uvalfreq cn
ame
## "thalach" "91" "exang" "2" "oldpeak" "40" "slo
pe"
## uvalfreq cname uvalfreq cname uvalfreq cname uvalf
req
## "3" "ca" "5" "thal" "4" "target"
"2"
nvpairsdf3<-do.call('rbind',lapply(names(heart),FUN=function(x,data=heart){c
(cname=x,uvalfreq=length(unique(data[[x]])))}))
categoricalFeatures<-function(dset,ncol=7)
{
df4<-as.data.frame(do.call('rbind',lapply(names(dset),FUN=function(x,data=
dset){c(cname=x,uvalfreq=length(unique(data[[x]])))})))
df4$uvalfreq=as.numeric(df4$uvalfreq)
dset[,df4$uvalfreq<=ncol]
}
X<-categoricalFeatures(heart)
table(heart$target)
##
## 0 1
## 138 165
dim(X)
## [1] 303 9
M12-RandomForest file:///E:/users/rkannan/cuny/fall2020/fall2020/ML-Handbook/m12-rFore...
2 of 14 11/23/2020, 5:44 PM
class_col<-which(names(X)=='target')
fvcnt<-ncol(X[,-c(class_col)])
#set.seed if you want to repeatability
#RF implementations consider sqrt(p) features
# to avoid too many common features
# here we are seeking to establish that it matters
# we are concerned about features being present in both
exp_fset1<-sample(1:fvcnt,fvcnt-1,replace=F)
exp_fset2<-sample(1:fvcnt,fvcnt-1,replace=F)
table(sort(exp_fset1)==sort(exp_fset2))
##
## TRUE
## 7
exp_fset1
## [1] 1 2 5 4 3 6 7
exp_fset2
## [1] 7 3 1 2 5 6 4
M12-RandomForest file:///E:/users/rkannan/cuny/fall2020/fall2020/ML-Handbook/m12-rFore...
3 of 14 11/23/2020, 5:44 PM
##
## rcpALL 0 1
## 0 116 14
## 1 22 151
## [1] 0.8778656
## [1] 0.0000000 0.1594203 1.0000000
## [1] 0.0000000 0.9151515 1.0000000
X1<-X[,c(exp_fset1,9)]
rpart.model1<-rpart(target~.,data=X1,minsplit=3)
rpart.plot(rpart.model1)
M12-RandomForest file:///E:/users/rkannan/cuny/fall2020/fall2020/ML-Handbook/m12-rFore...
4 of 14 11/23/2020, 5:44 PM
rcp1<-predict(rpart.model1,
X1[,-c(which(names(X1)=="target"))],type="
class")
(rpart_mtab1<-table(rcp1,X$target))
##
## rcp1 0 1
## 0 107 15
## 1 31 150
L1<-getMetrics(X1$target,as.numeric(rcp1))
L1$auc
## [1] 0.8422266
L1$fpr
## [1] 0.0000000 0.2246377 1.0000000
L1$tpr
M12-RandomForest file:///E:/users/rkannan/cuny/fall2020/fall2020/ML-Handbook/m12-rFore...
5 of 14 11/23/2020, 5:44 PM
## [1] 0.0000000 0.9090909 1.0000000
X2<-X[,c(exp_fset2,9)]
rpart.model2<-rpart(target~.,data=X2,minsplit=3)
rpart.plot(rpart.model2)
rcp2<-predict(rpart.model2,
X2[,-c(which(names(X2)=="target"))],type="
class")
(rpart_mtab2<-table(rcp2,X2$target))
##
## rcp2 0 1
## 0 107 15
## 1 31 150
L2<-getMetrics(X2$target,as.numeric(rcp2))
L2$auc
## [1] 0.8422266
M12-RandomForest file:///E:/users/rkannan/cuny/fall2020/fall2020/ML-Handbook/m12-rFore...
6 of 14 11/23/2020, 5:44 PM
L2$fpr
## [1] 0.0000000 0.2246377 1.0000000
L2$tpr
## [1] 0.0000000 0.9090909 1.0000000
exp_fset3<-sample(1:fvcnt,fvcnt-1,replace=F)
X3<-X[,c(exp_fset3,9)]
rpart.model3<-rpart(target~.,data=X3,minsplit=3)
rpart.plot(rpart.model2)
rcp3<-predict(rpart.model3,
X3[,-c(which(names(X3)=="target"))],type="
class")
(rpart_mtab3<-table(rcp3,X3$target))
M12-RandomForest file:///E:/users/rkannan/cuny/fall2020/fall2020/ML-Handbook/m12-rFore...
7 of 14 11/23/2020, 5:44 PM
##
## rcp3 0 1
## 0 111 24
## 1 27 141
L3<-getMetrics(X3$target,as.numeric(rcp3))
L3$auc
## [1] 0.8294466
L3$fpr
## [1] 0.0000000 0.1956522 1.0000000
L3$tpr
## [1] 0.0000000 0.8545455 1.0000000
dfpred<-data.frame(actual=X$target,rcpALL,rcp1,rcp2,rcp3)
head(dfpred)
## actual rcpALL rcp1 rcp2 rcp3
## 1 1 1 1 1 1
## 2 1 1 1 1 1
## 3 1 1 1 1 1
## 4 1 1 1 1 1
## 5 1 1 1 1 0
## 6 1 1 1 1 1
table(X$target,rcpALL)
## rcpALL
## 0 1
## 0 116 22
## 1 14 151
table(X$target,rcp1)
M12-RandomForest file:///E:/users/rkannan/cuny/fall2020/fall2020/ML-Handbook/m12-rFore...
8 of 14 11/23/2020, 5:44 PM
## rcp1
## 0 1
## 0 107 31
## 1 15 150
table(X$target,rcp2)
## rcp2
## 0 1
## 0 107 31
## 1 15 150
table(X$target,rcp3)
## rcp3
## 0 1
## 0 111 27
## 1 24 141
…
…
if(!require(randomForest))require(randomForest)
## Loading required package: randomForest
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
rf_model<-randomForest(target~.,data=X)
rf_pred<-predict(rf_model,X[,-c(which(names(X)=="target"))])
rf_mtab<-table(X$target,rf_pred)
rf_cmx<-caret::confusionMatrix(rf_mtab)
rf_mtab
## rf_pred
## 0 1
## 0 124 14
## 1 10 155
(rf_accuracy<-sum(diag(rf_mtab))/sum(rf_mtab))
M12-RandomForest file:///E:/users/rkannan/cuny/fall2020/fall2020/ML-Handbook/m12-rFore...
9 of 14 11/23/2020, 5:44 PM
## [1] 0.9207921
rf_cmx$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNul
l
## 9.207921e-01 8.399366e-01 8.844364e-01 9.485923e-01 5.577558e-0
1
## AccuracyPValue McnemarPValue
## 1.441683e-44 5.402914e-01
rf_cmx$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9253731 0.9171598 0.8985507
## Neg Pred Value Precision Recall
## 0.9393939 0.8985507 0.9253731
## F1 Prevalence Detection Rate
## 0.9117647 0.4422442 0.4092409
## Detection Prevalence Balanced Accuracy
## 0.4554455 0.9212664
rf1k_model<-randomForest(target~.,data=X,ntree=1000)
rf1k_pred<-predict(rf1k_model,X[,-c(which(names(X)=="target"))])
rf1k_mtab<-table(X$target,rf1k_pred)
rf1k_cmx<-caret::confusionMatrix(rf1k_mtab)
rf1k_mtab
## rf1k_pred
## 0 1
## 0 121 17
## 1 8 157
(rf1k_accuracy<-sum(diag(rf1k_mtab))/sum(rf1k_mtab))
## [1] 0.9174917
rf1k_cmx$overall
M12-RandomForest file:///E:/users/rkannan/cuny/fall2020/fall2020/ML-Handbook/m12-rFore...
10 of 14 11/23/2020, 5:44 PM
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNul
l
## 9.174917e-01 8.327704e-01 8.806106e-01 9.458942e-01 5.742574e-0
1
## AccuracyPValue McnemarPValue
## 1.656736e-40 1.095986e-01
rf1k_cmx$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9379845 0.9022989 0.8768116
## Neg Pred Value Precision Recall
## 0.9515152 0.8768116 0.9379845
## F1 Prevalence Detection Rate
## 0.9063670 0.4257426 0.3993399
## Detection Prevalence Balanced Accuracy
## 0.4554455 0.9201417
rf100_model<-randomForest(target~.,data=X,ntree=100)
rf100_pred<-predict(rf100_model,X[,-c(which(names(X)=="target"))])
rf100_mtab<-table(X$target,rf100_pred)
rf100_cmx<-caret::confusionMatrix(rf100_mtab)
rf100_mtab
## rf100_pred
## 0 1
## 0 123 15
## 1 11 154
(rf100_accuracy<-sum(diag(rf100_mtab))/sum(rf100_mtab))
## [1] 0.9141914
rf100_cmx$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNul
l
## 9.141914e-01 8.265980e-01 8.767989e-01 9.431813e-01 5.577558e-0
1
## AccuracyPValue McnemarPValue
## 1.093523e-42 5.562985e-01
rf100_cmx$byClass
M12-RandomForest file:///E:/users/rkannan/cuny/fall2020/fall2020/ML-Handbook/m12-rFore...
11 of 14 11/23/2020, 5:44 PM
## Sensitivity Specificity Pos Pred Value
## 0.9179104 0.9112426 0.8913043
## Neg Pred Value Precision Recall
## 0.9333333 0.8913043 0.9179104
## F1 Prevalence Detection Rate
## 0.9044118 0.4422442 0.4059406
## Detection Prevalence Balanced Accuracy
## 0.4554455 0.9145765
rf50_model<-randomForest(target~.,data=X,ntree=50)
rf50_pred<-predict(rf50_model,X[,-c(which(names(X)=="target"))])
rf50_mtab<-table(X$target,rf50_pred)
rf50_cmx<-caret::confusionMatrix(rf50_mtab)
rf50_mtab
## rf50_pred
## 0 1
## 0 124 14
## 1 11 154
(rf1k_accuracy<-sum(diag(rf50_mtab))/sum(rf50_mtab))
## [1] 0.9174917
rf1k_cmx$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNul
l
## 9.174917e-01 8.327704e-01 8.806106e-01 9.458942e-01 5.742574e-0
1
## AccuracyPValue McnemarPValue
## 1.656736e-40 1.095986e-01
rf1k_cmx$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9379845 0.9022989 0.8768116
## Neg Pred Value Precision Recall
## 0.9515152 0.8768116 0.9379845
## F1 Prevalence Detection Rate
## 0.9063670 0.4257426 0.3993399
## Detection Prevalence Balanced Accuracy
## 0.4554455 0.9201417
M12-RandomForest file:///E:/users/rkannan/cuny/fall2020/fall2020/ML-Handbook/m12-rFore...
12 of 14 11/23/2020, 5:44 PM
rfgrow<-grow(rf50_model,50)
rfgrow_pred<-predict(rfgrow,X[,-c(which(names(X)=="target"))])
rfgrow_mtab<-table(X$target,rfgrow_pred)
rfgrow_cmx<-caret::confusionMatrix(rfgrow_mtab)
rfgrow_mtab
## rfgrow_pred
## 0 1
## 0 123 15
## 1 9 156
(rfgrow_accuracy<-sum(diag(rfgrow_mtab))/sum(rfgrow_mtab))
## [1] 0.9207921
rfgrow_cmx$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNul
l
## 9.207921e-01 8.397461e-01 8.844364e-01 9.485923e-01 5.643564e-0
1
## AccuracyPValue McnemarPValue
## 2.685766e-43 3.074342e-01
rfgrow_cmx$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9318182 0.9122807 0.8913043
## Neg Pred Value Precision Recall
## 0.9454545 0.8913043 0.9318182
## F1 Prevalence Detection Rate
## 0.9111111 0.4356436 0.4059406
## Detection Prevalence Balanced Accuracy
## 0.4554455 0.9220494
M12-RandomForest file:///E:/users/rkannan/cuny/fall2020/fall2020/ML-Handbook/m12-rFore...
13 of 14 11/23/2020, 5:44 PM
M12-RandomForest file:///E:/users/rkannan/cuny/fall2020/fall2020/ML-Handbook/m12-rFore...
14 of 14 11/23/2020, 5:44 PM

More Related Content

PDF
M11 bagging loo cv
PDF
M09-Cross validating-naive-bayes
PDF
Chapter 2: R tutorial Handbook for Data Science and Machine Learning Practiti...
KEY
実践 memcached
PDF
Comparative Genomics with GMOD and BioPerl
ODP
Caching and tuning fun for high scalability @ LOAD2012
PDF
Visualization of Supervised Learning with {arules} + {arulesViz}
PDF
Redis 101
M11 bagging loo cv
M09-Cross validating-naive-bayes
Chapter 2: R tutorial Handbook for Data Science and Machine Learning Practiti...
実践 memcached
Comparative Genomics with GMOD and BioPerl
Caching and tuning fun for high scalability @ LOAD2012
Visualization of Supervised Learning with {arules} + {arulesViz}
Redis 101

What's hot (19)

TXT
Command
PPTX
PDF
Association Rule Mining with R
ODP
Beyond php - it's not (just) about the code
PPTX
Data manipulation and visualization in r 20190711 myanmarucsy
PPTX
Gevent rabbit rpc
ODP
Beyond php it's not (just) about the code
PDF
Regression and Classification with R
PDF
Bytes in the Machine: Inside the CPython interpreter
PDF
Allison Kaptur: Bytes in the Machine: Inside the CPython interpreter, PyGotha...
PDF
Parallel Computing With Dask - PyDays 2017
PPTX
System Calls
PDF
Cooking pies with Celery
PDF
Sangam 19 - Successful Applications on Autonomous
PDF
Sangam 19 - PLSQL still the coolest
ODP
Beyond PHP - It's not (just) about the code
PDF
Python testing-frameworks overview
PDF
python高级内存管理
PDF
Agile Database Development with JSON
Command
Association Rule Mining with R
Beyond php - it's not (just) about the code
Data manipulation and visualization in r 20190711 myanmarucsy
Gevent rabbit rpc
Beyond php it's not (just) about the code
Regression and Classification with R
Bytes in the Machine: Inside the CPython interpreter
Allison Kaptur: Bytes in the Machine: Inside the CPython interpreter, PyGotha...
Parallel Computing With Dask - PyDays 2017
System Calls
Cooking pies with Celery
Sangam 19 - Successful Applications on Autonomous
Sangam 19 - PLSQL still the coolest
Beyond PHP - It's not (just) about the code
Python testing-frameworks overview
python高级内存管理
Agile Database Development with JSON
Ad

Similar to M12 random forest-part01 (20)

DOCX
R (Shiny Package) - Server Side Code for Decision Support System
PDF
Spark + Clojure for Topic Discovery - Zalando Tech Clojure/Conj Talk
PDF
R is a very flexible and powerful programming language, as well as a.pdf
PPTX
Introduction to R
PDF
NCCU: Statistics in the Criminal Justice System, R basics and Simulation - Pr...
PDF
Being functional in PHP (PHPDay Italy 2016)
PPTX
Seminar PSU 10.10.2014 mme
PDF
R code for data manipulation
PDF
R code for data manipulation
PDF
Solutions for Exercises in Digital Signal Processing Using MATLAB, 4th Editio...
DOCX
A Shiny Example-- R
DOCX
Ns2programs
PDF
Do snow.rwn
TXT
R code
PDF
Rcommands-for those who interested in R.
PDF
Pre-Bootcamp introduction to Elixir
PPTX
Php functions
PDF
Prediction
PDF
Time Series Analysis and Mining with R
R (Shiny Package) - Server Side Code for Decision Support System
Spark + Clojure for Topic Discovery - Zalando Tech Clojure/Conj Talk
R is a very flexible and powerful programming language, as well as a.pdf
Introduction to R
NCCU: Statistics in the Criminal Justice System, R basics and Simulation - Pr...
Being functional in PHP (PHPDay Italy 2016)
Seminar PSU 10.10.2014 mme
R code for data manipulation
R code for data manipulation
Solutions for Exercises in Digital Signal Processing Using MATLAB, 4th Editio...
A Shiny Example-- R
Ns2programs
Do snow.rwn
R code
Rcommands-for those who interested in R.
Pre-Bootcamp introduction to Elixir
Php functions
Prediction
Time Series Analysis and Mining with R
Ad

More from Raman Kannan (20)

PDF
conversations-withchatGPT-Claude-gemini-on-vibration-12-112024.pdf
PDF
Essays on-civic-responsibilty
PDF
M12 boosting-part02
PDF
M10 gradient descent
PDF
M06 tree
PDF
M07 svm
PDF
M08 BiasVarianceTradeoff
PDF
Chapter 05 k nn
PDF
Chapter 04-discriminant analysis
PDF
M03 nb-02
PDF
Augmented 11022020-ieee
PDF
Chapter 02-logistic regression
PDF
Chapter01 introductory handbook
PDF
A voyage-inward-02
PDF
Evaluating classifierperformance ml-cs6923
PDF
A data scientist's study plan
PDF
Cognitive Assistants
PDF
Essay on-data-analysis
PDF
Joy of Unix
PDF
How to-run-ols-diagnostics-02
conversations-withchatGPT-Claude-gemini-on-vibration-12-112024.pdf
Essays on-civic-responsibilty
M12 boosting-part02
M10 gradient descent
M06 tree
M07 svm
M08 BiasVarianceTradeoff
Chapter 05 k nn
Chapter 04-discriminant analysis
M03 nb-02
Augmented 11022020-ieee
Chapter 02-logistic regression
Chapter01 introductory handbook
A voyage-inward-02
Evaluating classifierperformance ml-cs6923
A data scientist's study plan
Cognitive Assistants
Essay on-data-analysis
Joy of Unix
How to-run-ols-diagnostics-02

Recently uploaded (20)

PPTX
CYBER SECURITY the Next Warefare Tactics
PPTX
(Ali Hamza) Roll No: (F24-BSCS-1103).pptx
PDF
Data Engineering Interview Questions & Answers Batch Processing (Spark, Hadoo...
PPTX
New ISO 27001_2022 standard and the changes
PDF
Introduction to the R Programming Language
PDF
Systems Analysis and Design, 12th Edition by Scott Tilley Test Bank.pdf
PDF
Navigating the Thai Supplements Landscape.pdf
PDF
REAL ILLUMINATI AGENT IN KAMPALA UGANDA CALL ON+256765750853/0705037305
PPT
lectureusjsjdhdsjjshdshshddhdhddhhd1.ppt
PPTX
Managing Community Partner Relationships
PPT
DU, AIS, Big Data and Data Analytics.ppt
PPTX
Business_Capability_Map_Collection__pptx
PPT
Image processing and pattern recognition 2.ppt
PPTX
Pilar Kemerdekaan dan Identi Bangsa.pptx
PPTX
Lesson-01intheselfoflifeofthekennyrogersoftheunderstandoftheunderstanded
PPTX
Copy of 16 Timeline & Flowchart Templates – HubSpot.pptx
PPTX
Introduction to Inferential Statistics.pptx
PDF
Votre score augmente si vous choisissez une catégorie et que vous rédigez une...
PDF
Tetra Pak Index 2023 - The future of health and nutrition - Full report.pdf
PPTX
DS-40-Pre-Engagement and Kickoff deck - v8.0.pptx
CYBER SECURITY the Next Warefare Tactics
(Ali Hamza) Roll No: (F24-BSCS-1103).pptx
Data Engineering Interview Questions & Answers Batch Processing (Spark, Hadoo...
New ISO 27001_2022 standard and the changes
Introduction to the R Programming Language
Systems Analysis and Design, 12th Edition by Scott Tilley Test Bank.pdf
Navigating the Thai Supplements Landscape.pdf
REAL ILLUMINATI AGENT IN KAMPALA UGANDA CALL ON+256765750853/0705037305
lectureusjsjdhdsjjshdshshddhdhddhhd1.ppt
Managing Community Partner Relationships
DU, AIS, Big Data and Data Analytics.ppt
Business_Capability_Map_Collection__pptx
Image processing and pattern recognition 2.ppt
Pilar Kemerdekaan dan Identi Bangsa.pptx
Lesson-01intheselfoflifeofthekennyrogersoftheunderstandoftheunderstanded
Copy of 16 Timeline & Flowchart Templates – HubSpot.pptx
Introduction to Inferential Statistics.pptx
Votre score augmente si vous choisissez une catégorie et que vous rédigez une...
Tetra Pak Index 2023 - The future of health and nutrition - Full report.pdf
DS-40-Pre-Engagement and Kickoff deck - v8.0.pptx

M12 random forest-part01

  • 2. ## cname uvalfreq cname uvalfreq cname uvalfreq cn ame ## "age" "41" "sex" "2" "cp" "4" "trestb ps" ## uvalfreq cname uvalfreq cname uvalfreq cname uvalf req ## "49" "chol" "152" "fbs" "2" "restecg" "3" ## cname uvalfreq cname uvalfreq cname uvalfreq cn ame ## "thalach" "91" "exang" "2" "oldpeak" "40" "slo pe" ## uvalfreq cname uvalfreq cname uvalfreq cname uvalf req ## "3" "ca" "5" "thal" "4" "target" "2" nvpairsdf3<-do.call('rbind',lapply(names(heart),FUN=function(x,data=heart){c (cname=x,uvalfreq=length(unique(data[[x]])))})) categoricalFeatures<-function(dset,ncol=7) { df4<-as.data.frame(do.call('rbind',lapply(names(dset),FUN=function(x,data= dset){c(cname=x,uvalfreq=length(unique(data[[x]])))}))) df4$uvalfreq=as.numeric(df4$uvalfreq) dset[,df4$uvalfreq<=ncol] } X<-categoricalFeatures(heart) table(heart$target) ## ## 0 1 ## 138 165 dim(X) ## [1] 303 9 M12-RandomForest file:///E:/users/rkannan/cuny/fall2020/fall2020/ML-Handbook/m12-rFore... 2 of 14 11/23/2020, 5:44 PM
  • 3. class_col<-which(names(X)=='target') fvcnt<-ncol(X[,-c(class_col)]) #set.seed if you want to repeatability #RF implementations consider sqrt(p) features # to avoid too many common features # here we are seeking to establish that it matters # we are concerned about features being present in both exp_fset1<-sample(1:fvcnt,fvcnt-1,replace=F) exp_fset2<-sample(1:fvcnt,fvcnt-1,replace=F) table(sort(exp_fset1)==sort(exp_fset2)) ## ## TRUE ## 7 exp_fset1 ## [1] 1 2 5 4 3 6 7 exp_fset2 ## [1] 7 3 1 2 5 6 4 M12-RandomForest file:///E:/users/rkannan/cuny/fall2020/fall2020/ML-Handbook/m12-rFore... 3 of 14 11/23/2020, 5:44 PM
  • 4. ## ## rcpALL 0 1 ## 0 116 14 ## 1 22 151 ## [1] 0.8778656 ## [1] 0.0000000 0.1594203 1.0000000 ## [1] 0.0000000 0.9151515 1.0000000 X1<-X[,c(exp_fset1,9)] rpart.model1<-rpart(target~.,data=X1,minsplit=3) rpart.plot(rpart.model1) M12-RandomForest file:///E:/users/rkannan/cuny/fall2020/fall2020/ML-Handbook/m12-rFore... 4 of 14 11/23/2020, 5:44 PM
  • 5. rcp1<-predict(rpart.model1, X1[,-c(which(names(X1)=="target"))],type=" class") (rpart_mtab1<-table(rcp1,X$target)) ## ## rcp1 0 1 ## 0 107 15 ## 1 31 150 L1<-getMetrics(X1$target,as.numeric(rcp1)) L1$auc ## [1] 0.8422266 L1$fpr ## [1] 0.0000000 0.2246377 1.0000000 L1$tpr M12-RandomForest file:///E:/users/rkannan/cuny/fall2020/fall2020/ML-Handbook/m12-rFore... 5 of 14 11/23/2020, 5:44 PM
  • 6. ## [1] 0.0000000 0.9090909 1.0000000 X2<-X[,c(exp_fset2,9)] rpart.model2<-rpart(target~.,data=X2,minsplit=3) rpart.plot(rpart.model2) rcp2<-predict(rpart.model2, X2[,-c(which(names(X2)=="target"))],type=" class") (rpart_mtab2<-table(rcp2,X2$target)) ## ## rcp2 0 1 ## 0 107 15 ## 1 31 150 L2<-getMetrics(X2$target,as.numeric(rcp2)) L2$auc ## [1] 0.8422266 M12-RandomForest file:///E:/users/rkannan/cuny/fall2020/fall2020/ML-Handbook/m12-rFore... 6 of 14 11/23/2020, 5:44 PM
  • 7. L2$fpr ## [1] 0.0000000 0.2246377 1.0000000 L2$tpr ## [1] 0.0000000 0.9090909 1.0000000 exp_fset3<-sample(1:fvcnt,fvcnt-1,replace=F) X3<-X[,c(exp_fset3,9)] rpart.model3<-rpart(target~.,data=X3,minsplit=3) rpart.plot(rpart.model2) rcp3<-predict(rpart.model3, X3[,-c(which(names(X3)=="target"))],type=" class") (rpart_mtab3<-table(rcp3,X3$target)) M12-RandomForest file:///E:/users/rkannan/cuny/fall2020/fall2020/ML-Handbook/m12-rFore... 7 of 14 11/23/2020, 5:44 PM
  • 8. ## ## rcp3 0 1 ## 0 111 24 ## 1 27 141 L3<-getMetrics(X3$target,as.numeric(rcp3)) L3$auc ## [1] 0.8294466 L3$fpr ## [1] 0.0000000 0.1956522 1.0000000 L3$tpr ## [1] 0.0000000 0.8545455 1.0000000 dfpred<-data.frame(actual=X$target,rcpALL,rcp1,rcp2,rcp3) head(dfpred) ## actual rcpALL rcp1 rcp2 rcp3 ## 1 1 1 1 1 1 ## 2 1 1 1 1 1 ## 3 1 1 1 1 1 ## 4 1 1 1 1 1 ## 5 1 1 1 1 0 ## 6 1 1 1 1 1 table(X$target,rcpALL) ## rcpALL ## 0 1 ## 0 116 22 ## 1 14 151 table(X$target,rcp1) M12-RandomForest file:///E:/users/rkannan/cuny/fall2020/fall2020/ML-Handbook/m12-rFore... 8 of 14 11/23/2020, 5:44 PM
  • 9. ## rcp1 ## 0 1 ## 0 107 31 ## 1 15 150 table(X$target,rcp2) ## rcp2 ## 0 1 ## 0 107 31 ## 1 15 150 table(X$target,rcp3) ## rcp3 ## 0 1 ## 0 111 27 ## 1 24 141 … … if(!require(randomForest))require(randomForest) ## Loading required package: randomForest ## randomForest 4.6-14 ## Type rfNews() to see new features/changes/bug fixes. rf_model<-randomForest(target~.,data=X) rf_pred<-predict(rf_model,X[,-c(which(names(X)=="target"))]) rf_mtab<-table(X$target,rf_pred) rf_cmx<-caret::confusionMatrix(rf_mtab) rf_mtab ## rf_pred ## 0 1 ## 0 124 14 ## 1 10 155 (rf_accuracy<-sum(diag(rf_mtab))/sum(rf_mtab)) M12-RandomForest file:///E:/users/rkannan/cuny/fall2020/fall2020/ML-Handbook/m12-rFore... 9 of 14 11/23/2020, 5:44 PM
  • 10. ## [1] 0.9207921 rf_cmx$overall ## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNul l ## 9.207921e-01 8.399366e-01 8.844364e-01 9.485923e-01 5.577558e-0 1 ## AccuracyPValue McnemarPValue ## 1.441683e-44 5.402914e-01 rf_cmx$byClass ## Sensitivity Specificity Pos Pred Value ## 0.9253731 0.9171598 0.8985507 ## Neg Pred Value Precision Recall ## 0.9393939 0.8985507 0.9253731 ## F1 Prevalence Detection Rate ## 0.9117647 0.4422442 0.4092409 ## Detection Prevalence Balanced Accuracy ## 0.4554455 0.9212664 rf1k_model<-randomForest(target~.,data=X,ntree=1000) rf1k_pred<-predict(rf1k_model,X[,-c(which(names(X)=="target"))]) rf1k_mtab<-table(X$target,rf1k_pred) rf1k_cmx<-caret::confusionMatrix(rf1k_mtab) rf1k_mtab ## rf1k_pred ## 0 1 ## 0 121 17 ## 1 8 157 (rf1k_accuracy<-sum(diag(rf1k_mtab))/sum(rf1k_mtab)) ## [1] 0.9174917 rf1k_cmx$overall M12-RandomForest file:///E:/users/rkannan/cuny/fall2020/fall2020/ML-Handbook/m12-rFore... 10 of 14 11/23/2020, 5:44 PM
  • 11. ## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNul l ## 9.174917e-01 8.327704e-01 8.806106e-01 9.458942e-01 5.742574e-0 1 ## AccuracyPValue McnemarPValue ## 1.656736e-40 1.095986e-01 rf1k_cmx$byClass ## Sensitivity Specificity Pos Pred Value ## 0.9379845 0.9022989 0.8768116 ## Neg Pred Value Precision Recall ## 0.9515152 0.8768116 0.9379845 ## F1 Prevalence Detection Rate ## 0.9063670 0.4257426 0.3993399 ## Detection Prevalence Balanced Accuracy ## 0.4554455 0.9201417 rf100_model<-randomForest(target~.,data=X,ntree=100) rf100_pred<-predict(rf100_model,X[,-c(which(names(X)=="target"))]) rf100_mtab<-table(X$target,rf100_pred) rf100_cmx<-caret::confusionMatrix(rf100_mtab) rf100_mtab ## rf100_pred ## 0 1 ## 0 123 15 ## 1 11 154 (rf100_accuracy<-sum(diag(rf100_mtab))/sum(rf100_mtab)) ## [1] 0.9141914 rf100_cmx$overall ## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNul l ## 9.141914e-01 8.265980e-01 8.767989e-01 9.431813e-01 5.577558e-0 1 ## AccuracyPValue McnemarPValue ## 1.093523e-42 5.562985e-01 rf100_cmx$byClass M12-RandomForest file:///E:/users/rkannan/cuny/fall2020/fall2020/ML-Handbook/m12-rFore... 11 of 14 11/23/2020, 5:44 PM
  • 12. ## Sensitivity Specificity Pos Pred Value ## 0.9179104 0.9112426 0.8913043 ## Neg Pred Value Precision Recall ## 0.9333333 0.8913043 0.9179104 ## F1 Prevalence Detection Rate ## 0.9044118 0.4422442 0.4059406 ## Detection Prevalence Balanced Accuracy ## 0.4554455 0.9145765 rf50_model<-randomForest(target~.,data=X,ntree=50) rf50_pred<-predict(rf50_model,X[,-c(which(names(X)=="target"))]) rf50_mtab<-table(X$target,rf50_pred) rf50_cmx<-caret::confusionMatrix(rf50_mtab) rf50_mtab ## rf50_pred ## 0 1 ## 0 124 14 ## 1 11 154 (rf1k_accuracy<-sum(diag(rf50_mtab))/sum(rf50_mtab)) ## [1] 0.9174917 rf1k_cmx$overall ## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNul l ## 9.174917e-01 8.327704e-01 8.806106e-01 9.458942e-01 5.742574e-0 1 ## AccuracyPValue McnemarPValue ## 1.656736e-40 1.095986e-01 rf1k_cmx$byClass ## Sensitivity Specificity Pos Pred Value ## 0.9379845 0.9022989 0.8768116 ## Neg Pred Value Precision Recall ## 0.9515152 0.8768116 0.9379845 ## F1 Prevalence Detection Rate ## 0.9063670 0.4257426 0.3993399 ## Detection Prevalence Balanced Accuracy ## 0.4554455 0.9201417 M12-RandomForest file:///E:/users/rkannan/cuny/fall2020/fall2020/ML-Handbook/m12-rFore... 12 of 14 11/23/2020, 5:44 PM
  • 13. rfgrow<-grow(rf50_model,50) rfgrow_pred<-predict(rfgrow,X[,-c(which(names(X)=="target"))]) rfgrow_mtab<-table(X$target,rfgrow_pred) rfgrow_cmx<-caret::confusionMatrix(rfgrow_mtab) rfgrow_mtab ## rfgrow_pred ## 0 1 ## 0 123 15 ## 1 9 156 (rfgrow_accuracy<-sum(diag(rfgrow_mtab))/sum(rfgrow_mtab)) ## [1] 0.9207921 rfgrow_cmx$overall ## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNul l ## 9.207921e-01 8.397461e-01 8.844364e-01 9.485923e-01 5.643564e-0 1 ## AccuracyPValue McnemarPValue ## 2.685766e-43 3.074342e-01 rfgrow_cmx$byClass ## Sensitivity Specificity Pos Pred Value ## 0.9318182 0.9122807 0.8913043 ## Neg Pred Value Precision Recall ## 0.9454545 0.8913043 0.9318182 ## F1 Prevalence Detection Rate ## 0.9111111 0.4356436 0.4059406 ## Detection Prevalence Balanced Accuracy ## 0.4554455 0.9220494 M12-RandomForest file:///E:/users/rkannan/cuny/fall2020/fall2020/ML-Handbook/m12-rFore... 13 of 14 11/23/2020, 5:44 PM