SlideShare a Scribd company logo
Machine Learning with R
Machine Learning with R
Machine Learning with R
Machine Learning with R
download.file(url = "https://d396qusza40orc.cloudfront.net/predmachlearn/pml-training.csv", 
destfile = "pml-training.csv") 
download.file(url = "https://d396qusza40orc.cloudfront.net/predmachlearn/pml-testing.csv", 
destfile = "pml-testing.csv") 
pmlTraining <- read.csv("pml-training.csv") 
pmlTesting <- read.csv("pml-testing.csv")
dim(pmlTraining) 
## [1] 19622 160 
dim(pmlTesting) 
## [1] 20 160
View(pmlTraining)
Machine Learning with R
Machine Learning with R
ProcessData <- function (dataframe){ 
col <- vector(mode = "numeric") 
for(i in 1:ncol(dataframe)) 
{ 
if(colnames(dataframe)[i]=='classe'){next;} 
total <- length(dataframe[,i]); 
NAs <- length(dataframe[is.na(dataframe[,i]),i]); 
empty <- length(dataframe[dataframe[,i]=='',i]) 
notNumber <- length(dataframe[!is.numeric(dataframe[,i]),i]); 
if(round(NAs/total)==1 | round(empty/total)==1 | round(notNumber/total)==1){ 
col <- rbind(col, i) 
} 
} 
process <- dataframe[,as.numeric(col)*(-1)] 
for(i in 1:(ncol(process)-1)){ process[,i] <- as.numeric(process[,i]); } 
process[,c(1,2,3,4)*(-1)] 
}
cleanedData <- ProcessData(pmlTraining) 
dim(cleanedData) 
## [1] 19622 53
Machine Learning with R
Machine Learning with R
library(caret) 
inTrain <- createDataPartition(y = cleanedData$classe, p = 0.7, list = FALSE) 
trainSet <- cleanedData[inTrain,] 
validationSet <- cleanedData[-inTrain,] 
dim(trainSet); dim(validationSet) 
## [1] 13737 53 
## [1] 5885 53
Machine Learning with R
library(caret) 
fit <- train(classe ~ ., data = trainSet, method = 'rf') 
library(randomForest) 
fit <- randomForest(classe ~ ., data = trainSet)
predictResult <- predict(fit, validationSet[,-53]) 
confusionMatrix(validationSet[,53], predictResult)
## Confusion Matrix and Statistics 
## 
## Reference 
## Prediction A B C D E 
## A 1673 1 0 0 0 
## B 8 1128 3 0 0 
## C 0 8 1014 4 0 
## D 0 0 8 956 0 
## E 0 0 0 1 1081 
## 
## Overall Statistics 
## 
## Accuracy : 0.994 
## 95% CI : (0.992, 0.996) 
## No Information Rate : 0.286 
## P-Value [Acc > NIR] : <2e-16 
## 
## Kappa : 0.993 
## Mcnemar's Test P-Value : NA 
## 
## Statistics by Class: 
## 
## Class: A Class: B Class: C Class: D Class: E 
## Sensitivity 0.995 0.992 0.989 0.995 1.000 
## Specificity 1.000 0.998 0.998 0.998 1.000 
## Pos Pred Value 0.999 0.990 0.988 0.992 0.999 
## Neg Pred Value 0.998 0.998 0.998 0.999 1.000 
## Prevalence 0.286 0.193 0.174 0.163 0.184 
## Detection Rate 0.284 0.192 0.172 0.162 0.184 
## Detection Prevalence 0.284 0.194 0.174 0.164 0.184 
## Balanced Accuracy 0.998 0.995 0.993 0.997 1.000
cleanedTest <- ProcessData(pmlTesting) 
predict(fit, cleanedTest) 
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 
## B A B A A E D B A A B C B A E E A B B B 
## Levels: A B C D E
Machine Learning with R

More Related Content

KEY
Perlで任意精度計算
PPTX
Maximal slice problem
PDF
Efficient Programs
DOCX
C++ program: Day calculation .cpp
PDF
EKON22 Introduction to Machinelearning
PPTX
Midia Kit Minha Vida 2010
PDF
Peterson_-_Machine_Learning_Project
PDF
Classification examp
Perlで任意精度計算
Maximal slice problem
Efficient Programs
C++ program: Day calculation .cpp
EKON22 Introduction to Machinelearning
Midia Kit Minha Vida 2010
Peterson_-_Machine_Learning_Project
Classification examp

Similar to Machine Learning with R (20)

PDF
Course Project for Coursera Practical Machine Learning
PDF
Hybrid prediction model with missing value imputation for medical data 2015-g...
PDF
Human_Activity_Recognition_Predictive_Model
PDF
ACM Bay Area Data Mining Workshop: Pattern, PMML, Hadoop
PDF
IRJET - Comparative Analysis of GUI based Prediction of Parkinson Disease usi...
PDF
Simple rules for building robust machine learning models
PPTX
Predictive Modeling Workshop
PDF
IRJET - A Survey on Machine Learning Intelligence Techniques for Medical ...
PDF
Benchmarking_ML_Tools
PPTX
Farid Ali Presentation_Final.pptx
PDF
VSSML18. Ensembles and Logistic Regressions
PPTX
Too good to be true? How validate your data
PDF
A Survey on Stroke Prediction
PDF
A survey on heart stroke prediction
PDF
Improving support vector machine and backpropagation performance for diabetes...
PDF
VSSML18. Evaluations
PDF
General Tips for participating Kaggle Competitions
PDF
Optimized stacking ensemble for early-stage diabetes mellitus prediction
PDF
Experimental Design for Distributed Machine Learning with Myles Baker
PDF
MLSD18 Evaluations
Course Project for Coursera Practical Machine Learning
Hybrid prediction model with missing value imputation for medical data 2015-g...
Human_Activity_Recognition_Predictive_Model
ACM Bay Area Data Mining Workshop: Pattern, PMML, Hadoop
IRJET - Comparative Analysis of GUI based Prediction of Parkinson Disease usi...
Simple rules for building robust machine learning models
Predictive Modeling Workshop
IRJET - A Survey on Machine Learning Intelligence Techniques for Medical ...
Benchmarking_ML_Tools
Farid Ali Presentation_Final.pptx
VSSML18. Ensembles and Logistic Regressions
Too good to be true? How validate your data
A Survey on Stroke Prediction
A survey on heart stroke prediction
Improving support vector machine and backpropagation performance for diabetes...
VSSML18. Evaluations
General Tips for participating Kaggle Competitions
Optimized stacking ensemble for early-stage diabetes mellitus prediction
Experimental Design for Distributed Machine Learning with Myles Baker
MLSD18 Evaluations
Ad

More from Taurã Figueiredo (10)

PPTX
How Email Marketing represent 50% of all new subscriptions with no day by day...
PPTX
Connections latam | Case Minha Vida
PPT
Cases marketing de serviço v2
PPT
Apresentacao customizacao em_massa
PPT
Gestão de Serviços e Marketing Interno
How Email Marketing represent 50% of all new subscriptions with no day by day...
Connections latam | Case Minha Vida
Cases marketing de serviço v2
Apresentacao customizacao em_massa
Gestão de Serviços e Marketing Interno
Ad

Recently uploaded (20)

PPTX
mbdjdhjjodule 5-1 rhfhhfjtjjhafbrhfnfbbfnb
PPTX
Computer network topology notes for revision
PDF
Capcut Pro Crack For PC Latest Version {Fully Unlocked 2025}
PPTX
The THESIS FINAL-DEFENSE-PRESENTATION.pptx
PPT
Reliability_Chapter_ presentation 1221.5784
PPT
Predictive modeling basics in data cleaning process
PPTX
STUDY DESIGN details- Lt Col Maksud (21).pptx
PPTX
Introduction to Basics of Ethical Hacking and Penetration Testing -Unit No. 1...
PPTX
iec ppt-1 pptx icmr ppt on rehabilitation.pptx
PDF
168300704-gasification-ppt.pdfhghhhsjsjhsuxush
PPTX
SAP 2 completion done . PRESENTATION.pptx
PPTX
Leprosy and NLEP programme community medicine
PPTX
AI Strategy room jwfjksfksfjsjsjsjsjfsjfsj
PDF
Optimise Shopper Experiences with a Strong Data Estate.pdf
PPT
Miokarditis (Inflamasi pada Otot Jantung)
PDF
Data Engineering Interview Questions & Answers Cloud Data Stacks (AWS, Azure,...
PDF
22.Patil - Early prediction of Alzheimer’s disease using convolutional neural...
PPT
Quality review (1)_presentation of this 21
PDF
Introduction to Data Science and Data Analysis
mbdjdhjjodule 5-1 rhfhhfjtjjhafbrhfnfbbfnb
Computer network topology notes for revision
Capcut Pro Crack For PC Latest Version {Fully Unlocked 2025}
The THESIS FINAL-DEFENSE-PRESENTATION.pptx
Reliability_Chapter_ presentation 1221.5784
Predictive modeling basics in data cleaning process
STUDY DESIGN details- Lt Col Maksud (21).pptx
Introduction to Basics of Ethical Hacking and Penetration Testing -Unit No. 1...
iec ppt-1 pptx icmr ppt on rehabilitation.pptx
168300704-gasification-ppt.pdfhghhhsjsjhsuxush
SAP 2 completion done . PRESENTATION.pptx
Leprosy and NLEP programme community medicine
AI Strategy room jwfjksfksfjsjsjsjsjfsjfsj
Optimise Shopper Experiences with a Strong Data Estate.pdf
Miokarditis (Inflamasi pada Otot Jantung)
Data Engineering Interview Questions & Answers Cloud Data Stacks (AWS, Azure,...
22.Patil - Early prediction of Alzheimer’s disease using convolutional neural...
Quality review (1)_presentation of this 21
Introduction to Data Science and Data Analysis

Machine Learning with R

  • 5. download.file(url = "https://d396qusza40orc.cloudfront.net/predmachlearn/pml-training.csv", destfile = "pml-training.csv") download.file(url = "https://d396qusza40orc.cloudfront.net/predmachlearn/pml-testing.csv", destfile = "pml-testing.csv") pmlTraining <- read.csv("pml-training.csv") pmlTesting <- read.csv("pml-testing.csv")
  • 6. dim(pmlTraining) ## [1] 19622 160 dim(pmlTesting) ## [1] 20 160
  • 10. ProcessData <- function (dataframe){ col <- vector(mode = "numeric") for(i in 1:ncol(dataframe)) { if(colnames(dataframe)[i]=='classe'){next;} total <- length(dataframe[,i]); NAs <- length(dataframe[is.na(dataframe[,i]),i]); empty <- length(dataframe[dataframe[,i]=='',i]) notNumber <- length(dataframe[!is.numeric(dataframe[,i]),i]); if(round(NAs/total)==1 | round(empty/total)==1 | round(notNumber/total)==1){ col <- rbind(col, i) } } process <- dataframe[,as.numeric(col)*(-1)] for(i in 1:(ncol(process)-1)){ process[,i] <- as.numeric(process[,i]); } process[,c(1,2,3,4)*(-1)] }
  • 11. cleanedData <- ProcessData(pmlTraining) dim(cleanedData) ## [1] 19622 53
  • 14. library(caret) inTrain <- createDataPartition(y = cleanedData$classe, p = 0.7, list = FALSE) trainSet <- cleanedData[inTrain,] validationSet <- cleanedData[-inTrain,] dim(trainSet); dim(validationSet) ## [1] 13737 53 ## [1] 5885 53
  • 16. library(caret) fit <- train(classe ~ ., data = trainSet, method = 'rf') library(randomForest) fit <- randomForest(classe ~ ., data = trainSet)
  • 17. predictResult <- predict(fit, validationSet[,-53]) confusionMatrix(validationSet[,53], predictResult)
  • 18. ## Confusion Matrix and Statistics ## ## Reference ## Prediction A B C D E ## A 1673 1 0 0 0 ## B 8 1128 3 0 0 ## C 0 8 1014 4 0 ## D 0 0 8 956 0 ## E 0 0 0 1 1081 ## ## Overall Statistics ## ## Accuracy : 0.994 ## 95% CI : (0.992, 0.996) ## No Information Rate : 0.286 ## P-Value [Acc > NIR] : <2e-16 ## ## Kappa : 0.993 ## Mcnemar's Test P-Value : NA ## ## Statistics by Class: ## ## Class: A Class: B Class: C Class: D Class: E ## Sensitivity 0.995 0.992 0.989 0.995 1.000 ## Specificity 1.000 0.998 0.998 0.998 1.000 ## Pos Pred Value 0.999 0.990 0.988 0.992 0.999 ## Neg Pred Value 0.998 0.998 0.998 0.999 1.000 ## Prevalence 0.286 0.193 0.174 0.163 0.184 ## Detection Rate 0.284 0.192 0.172 0.162 0.184 ## Detection Prevalence 0.284 0.194 0.174 0.164 0.184 ## Balanced Accuracy 0.998 0.995 0.993 0.997 1.000
  • 19. cleanedTest <- ProcessData(pmlTesting) predict(fit, cleanedTest) ## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 ## B A B A A E D B A A B C B A E E A B B B ## Levels: A B C D E