R:


     sesejun@is.ocha.ac.jp
     2009/10/29(        )
•                                                contacts_train.csv


     •
     •                               (setwd
             >       >                        )
"Pred","Young","Myope","Astimatic","Tear"
"P","Y","Y","Y","N"
"P","Y","Y","N","N"
"P","N","Y","Y","N"
"P","N","Y","Y","N"
"N","Y","Y","Y","Y"
"N","Y","Y","N","Y"
"N","N","N","N","Y"
"N","N","N","N","N"
"N","N","N","N","Y"
"N","N","N","N","N"
                                                    contacts.csv
> contacts.train<-read.table("contacts_train.csv", header=T,
sep=",")
> contacts.train
   Pred Young Myope Astimatic Tear
1     P     Y     Y         Y    N
2     P     Y     Y         N    N
3     P     N     Y         Y    N
4     P     N     Y         Y    N
5     N     Y     Y         Y    Y
6     N     Y     Y         N    Y
7     N     N     N         N    Y
8     N     N     N         N    N
9     N     N     N         N    Y
10    N     N     N         N    N
> contacts.train[1,]
    Pred Young Myope Astimatic Tear
 1     P     Y     Y         Y    N
 > contacts.train[,2]
   [1] Y Y N N Y Y N N N N
 Levels: N Y
 > contacts.train[,"Pred"]
   [1] P P P P N N N N N N
 Levels: N P
 > contacts.train$Pred
   [1] P P P P N N N N N N
 Levels: N P



> contacts.train[c(-1,-3,-5,-7,-9),]
   Pred Young Myope Astimatic Tear
2     P     Y     Y         N    N
4     P     N     Y         Y    N
6     N     Y     Y         N    Y
8     N     N     N         N    N
10    N     N     N         N    N
> class(contacts.train)
[1] "data.frame"




> forecast <- data.frame(date=c("10/1","10/2","10/3"),
weather=c("sunny","sunny","rain"))
> forecast
  date weather
1 10/1   sunny
2 10/2   sunny
3 10/3     rain
> forecast$weather
[1] sunny sunny rain
Levels: rain sunny
> forecast$date
[1] 10/1 10/2 10/3
> nrow(contacts.train)
[1] 10
> ncol(contacts.train)
[1] 5
> rownames(contacts.train)
 [1] "1" "2" "3" "4" "5" "6" "7"         "8"   "9"   "10"
> colnames(contacts.train)
[1] "Pred"      "Young"    "Myope"       "Astimatic" "Tear"

> colnames(contacts.train)[2]
[1] "Young"

> colnames(contacts.train)[2] <- "Old"
> colnames(contacts.train)
[1] "Pred"      "Old"       "Myope"      "Astimatic" "Tear"

> colnames(contacts.train)[2] <- "Young"
> contacts.train$Young
  [1] Y Y N N Y Y N N N N
Levels: N Y
> order(contacts.train$Young)
  [1] 3 4 7 8 9 10 1 2 5 6
> contacts.train[order(contacts.train$Young),]
    Pred Young Myope Astimatic Tear
3      P     N     Y         Y    N
4      P     N     Y         Y    N
7      N     N     N         N    Y
8      N     N     N         N    N
9      N     N     N         N    Y
10     N     N     N         N    N
1      P     Y     Y         Y    N
2      P     Y     Y         N    N
5      N     Y     Y         Y    Y
6      N     Y     Y         N    Y
> library("mvpart")
> rpart(Young~., data=contacts.train, method="class")
n= 10

node), split, n, loss, yval, (yprob)
      * denotes terminal node
1) root 10 4 N (0.6000000 0.4000000)
  2) Myope=N 4 0 N (1.0000000 0.0000000) *
  3) Myope=Y 6 2 Y (0.3333333 0.6666667) *


> rpart(Young~., data=contacts.train, method="class",
control=rpart.control(cp=-1))
n= 10

node), split, n, loss, yval, (yprob)
      * denotes terminal node
1) root 10 4 N (0.6000000 0.4000000)
  2) Myope=N 4 0 N (1.0000000 0.0000000) *
  3) Myope=Y 6 2 Y (0.3333333 0.6666667)
    6) Pred=P 4 2 N (0.5000000 0.5000000) *
    7) Pred=N 2 0 Y (0.0000000 1.0000000) *
IRIS
 •   http://archive.ics.uci.edu/ml/machine-learning-databases/iris/     iris.data


     •               iris.name
     •                                                                (setosa, versicolor, virginia)


 •                          http://togodb.sel.is.ocha.ac.jp/


> iris.train <- read.table("iris_train.csv", sep=",", header=T)
> length(rownames(iris.train))
[1] 120
> length(colnames(iris.train))
[1] 5




> hist(iris.train$Sepal.length)
> hist(iris.train$Petal.length)
> library(“mvpart”)
> rpart(Class~., data=iris.train, method="class",
control=rpart.control(cp=.1))
n= 120

node), split, n, loss, yval, (yprob)
      * denotes terminal node

1) root 120 77 Iris-setosa (0.35833333 0.34166667 0.30000000)
  2) Petal.length< 2.45 43 0 Iris-setosa (1.00000000 0.00000000
0.00000000) *
  3) Petal.length>=2.45 77 36 Iris-versicolor (0.00000000 0.53246753
0.46753247)
    6) Petal.length< 4.75 37 1 Iris-versicolor (0.00000000 0.97297297
0.02702703) *
    7) Petal.length>=4.75 40 5 Iris-virginica (0.00000000 0.12500000
0.87500000) *
> iris.dtree<-rpart(Class~., data=iris.train, method="class",
control=rpart.control(cp=.1))
> plot.new()
> plot(iris.dtree,uniform=T,margin=0.5)
> text(iris.dtree,use.n=T,all.leaves=F)
> plot(iris.train$Petal.length, iris.train$Petal.width, pch =
c(1,2,3)[unclass(iris.train$Class)])
> iris.test <- read.table("iris_test.csv", sep=",", header=T)


> iris.predict <- predict(iris.dtree, iris.test[1:4], type="class")
> iris.predict
               2              4              18              34
    Iris-setosa     Iris-setosa     Iris-setosa     Iris-setosa
...

> iris.predict ==   iris.test$Class
 [1] TRUE TRUE      TRUE TRUE TRUE    TRUE   TRUE   TRUE FALSE   TRUE
[11] TRUE TRUE      TRUE TRUE TRUE    TRUE   TRUE   TRUE TRUE    TRUE
[21] TRUE TRUE      TRUE TRUE TRUE    TRUE   TRUE   TRUE TRUE    TRUE

> sum(iris.predict == iris.test$Class) / length(iris.test$Class)
[1] 0.9666667
> sum(iris.predict != iris.test$Class) / length(iris.test$Class)
[1] 0.03333333
•
    •
        •
        •
        •   rpart       control=rpart.control(cp=.1)   .1


    •                                                       10


    •               3                2                      3

More Related Content

PDF
Datamining r 2nd
PDF
Calculo2lista2
PDF
El mundo de la incertidumbre 472 - diciembre de 2015
PPTX
Rss y lectores
PDF
Trois conferences Dalcroze
PPTX
What are scal­able best prac­tices to spread smart health?
PDF
Vera peralta
PPTX
Lector rss
Datamining r 2nd
Calculo2lista2
El mundo de la incertidumbre 472 - diciembre de 2015
Rss y lectores
Trois conferences Dalcroze
What are scal­able best prac­tices to spread smart health?
Vera peralta
Lector rss

Similar to Datamining R 2nd (20)

PDF
ゲーム理論 BASIC 演習83 -アナウンスは効果あるか-
PDF
Oceans 2019 tutorial-geophysical-nav_7-updated
PDF
第13回数学カフェ「素数!!」二次会 LT資料「乱数!!」
PDF
機械学習モデルの判断根拠の説明
PPTX
Data Science for Folks Without (or With!) a Ph.D.
DOCX
Simpatía
PPT
การสุ่มตัวอย่างในงานวิจัยสาธารณสุข
KEY
Introduction to Perl Best Practices
PDF
IIT-JEE Mains 2016 Online Previous Question Paper Day 1
PDF
Data Manipulation Using R (& dplyr)
PDF
A note on estimation of population mean in sample survey using auxiliary info...
PDF
Datamining R 4th
PDF
Chap02-Solutions-Ex-2-2-Calculus (1).pdf
PDF
Chap02-Solutions-Ex-2-2-Calculus.pdfjhsdaoihsdaiousadjh
PDF
Intoduction to numpy
PDF
Jamieson_Jain2018
PDF
Regression and Classification with R
PDF
Slides ensae-2016-11
PDF
Datamining r 1st
PDF
Ai2418281871
ゲーム理論 BASIC 演習83 -アナウンスは効果あるか-
Oceans 2019 tutorial-geophysical-nav_7-updated
第13回数学カフェ「素数!!」二次会 LT資料「乱数!!」
機械学習モデルの判断根拠の説明
Data Science for Folks Without (or With!) a Ph.D.
Simpatía
การสุ่มตัวอย่างในงานวิจัยสาธารณสุข
Introduction to Perl Best Practices
IIT-JEE Mains 2016 Online Previous Question Paper Day 1
Data Manipulation Using R (& dplyr)
A note on estimation of population mean in sample survey using auxiliary info...
Datamining R 4th
Chap02-Solutions-Ex-2-2-Calculus (1).pdf
Chap02-Solutions-Ex-2-2-Calculus.pdfjhsdaoihsdaiousadjh
Intoduction to numpy
Jamieson_Jain2018
Regression and Classification with R
Slides ensae-2016-11
Datamining r 1st
Ai2418281871
Ad

More from sesejun (20)

PDF
RNAseqによる変動遺伝子抽出の統計: A Review
PDF
バイオインフォマティクスによる遺伝子発現解析
PDF
次世代シーケンサが求める機械学習
PDF
20110602labseminar pub
PDF
20110524zurichngs 2nd pub
PDF
20110524zurichngs 1st pub
PDF
20110214nips2010 read
PDF
Datamining 9th association_rule.key
PDF
Datamining 8th hclustering
PDF
Datamining r 4th
PDF
Datamining r 3rd
PDF
Datamining 6th svm
PDF
Datamining 5th knn
PDF
Datamining 4th adaboost
PDF
Datamining 3rd naivebayes
PDF
Datamining 2nd decisiontree
PDF
Datamining 7th kmeans
PDF
100401 Bioinfoinfra
PDF
Datamining 8th Hclustering
PDF
Datamining 9th Association Rule
RNAseqによる変動遺伝子抽出の統計: A Review
バイオインフォマティクスによる遺伝子発現解析
次世代シーケンサが求める機械学習
20110602labseminar pub
20110524zurichngs 2nd pub
20110524zurichngs 1st pub
20110214nips2010 read
Datamining 9th association_rule.key
Datamining 8th hclustering
Datamining r 4th
Datamining r 3rd
Datamining 6th svm
Datamining 5th knn
Datamining 4th adaboost
Datamining 3rd naivebayes
Datamining 2nd decisiontree
Datamining 7th kmeans
100401 Bioinfoinfra
Datamining 8th Hclustering
Datamining 9th Association Rule
Ad

Datamining R 2nd

  • 1. R: sesejun@is.ocha.ac.jp 2009/10/29( )
  • 2. contacts_train.csv • • (setwd > > ) "Pred","Young","Myope","Astimatic","Tear" "P","Y","Y","Y","N" "P","Y","Y","N","N" "P","N","Y","Y","N" "P","N","Y","Y","N" "N","Y","Y","Y","Y" "N","Y","Y","N","Y" "N","N","N","N","Y" "N","N","N","N","N" "N","N","N","N","Y" "N","N","N","N","N" contacts.csv
  • 3. > contacts.train<-read.table("contacts_train.csv", header=T, sep=",") > contacts.train Pred Young Myope Astimatic Tear 1 P Y Y Y N 2 P Y Y N N 3 P N Y Y N 4 P N Y Y N 5 N Y Y Y Y 6 N Y Y N Y 7 N N N N Y 8 N N N N N 9 N N N N Y 10 N N N N N
  • 4. > contacts.train[1,] Pred Young Myope Astimatic Tear 1 P Y Y Y N > contacts.train[,2] [1] Y Y N N Y Y N N N N Levels: N Y > contacts.train[,"Pred"] [1] P P P P N N N N N N Levels: N P > contacts.train$Pred [1] P P P P N N N N N N Levels: N P > contacts.train[c(-1,-3,-5,-7,-9),] Pred Young Myope Astimatic Tear 2 P Y Y N N 4 P N Y Y N 6 N Y Y N Y 8 N N N N N 10 N N N N N
  • 5. > class(contacts.train) [1] "data.frame" > forecast <- data.frame(date=c("10/1","10/2","10/3"), weather=c("sunny","sunny","rain")) > forecast date weather 1 10/1 sunny 2 10/2 sunny 3 10/3 rain > forecast$weather [1] sunny sunny rain Levels: rain sunny > forecast$date [1] 10/1 10/2 10/3
  • 6. > nrow(contacts.train) [1] 10 > ncol(contacts.train) [1] 5 > rownames(contacts.train) [1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" > colnames(contacts.train) [1] "Pred" "Young" "Myope" "Astimatic" "Tear" > colnames(contacts.train)[2] [1] "Young" > colnames(contacts.train)[2] <- "Old" > colnames(contacts.train) [1] "Pred" "Old" "Myope" "Astimatic" "Tear" > colnames(contacts.train)[2] <- "Young"
  • 7. > contacts.train$Young [1] Y Y N N Y Y N N N N Levels: N Y > order(contacts.train$Young) [1] 3 4 7 8 9 10 1 2 5 6 > contacts.train[order(contacts.train$Young),] Pred Young Myope Astimatic Tear 3 P N Y Y N 4 P N Y Y N 7 N N N N Y 8 N N N N N 9 N N N N Y 10 N N N N N 1 P Y Y Y N 2 P Y Y N N 5 N Y Y Y Y 6 N Y Y N Y
  • 8. > library("mvpart") > rpart(Young~., data=contacts.train, method="class") n= 10 node), split, n, loss, yval, (yprob) * denotes terminal node 1) root 10 4 N (0.6000000 0.4000000) 2) Myope=N 4 0 N (1.0000000 0.0000000) * 3) Myope=Y 6 2 Y (0.3333333 0.6666667) * > rpart(Young~., data=contacts.train, method="class", control=rpart.control(cp=-1)) n= 10 node), split, n, loss, yval, (yprob) * denotes terminal node 1) root 10 4 N (0.6000000 0.4000000) 2) Myope=N 4 0 N (1.0000000 0.0000000) * 3) Myope=Y 6 2 Y (0.3333333 0.6666667) 6) Pred=P 4 2 N (0.5000000 0.5000000) * 7) Pred=N 2 0 Y (0.0000000 1.0000000) *
  • 9. IRIS • http://archive.ics.uci.edu/ml/machine-learning-databases/iris/ iris.data • iris.name • (setosa, versicolor, virginia) • http://togodb.sel.is.ocha.ac.jp/ > iris.train <- read.table("iris_train.csv", sep=",", header=T) > length(rownames(iris.train)) [1] 120 > length(colnames(iris.train)) [1] 5 > hist(iris.train$Sepal.length) > hist(iris.train$Petal.length)
  • 10. > library(“mvpart”) > rpart(Class~., data=iris.train, method="class", control=rpart.control(cp=.1)) n= 120 node), split, n, loss, yval, (yprob) * denotes terminal node 1) root 120 77 Iris-setosa (0.35833333 0.34166667 0.30000000) 2) Petal.length< 2.45 43 0 Iris-setosa (1.00000000 0.00000000 0.00000000) * 3) Petal.length>=2.45 77 36 Iris-versicolor (0.00000000 0.53246753 0.46753247) 6) Petal.length< 4.75 37 1 Iris-versicolor (0.00000000 0.97297297 0.02702703) * 7) Petal.length>=4.75 40 5 Iris-virginica (0.00000000 0.12500000 0.87500000) *
  • 11. > iris.dtree<-rpart(Class~., data=iris.train, method="class", control=rpart.control(cp=.1)) > plot.new() > plot(iris.dtree,uniform=T,margin=0.5) > text(iris.dtree,use.n=T,all.leaves=F)
  • 12. > plot(iris.train$Petal.length, iris.train$Petal.width, pch = c(1,2,3)[unclass(iris.train$Class)])
  • 13. > iris.test <- read.table("iris_test.csv", sep=",", header=T) > iris.predict <- predict(iris.dtree, iris.test[1:4], type="class") > iris.predict 2 4 18 34 Iris-setosa Iris-setosa Iris-setosa Iris-setosa ... > iris.predict == iris.test$Class [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE [11] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE [21] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE > sum(iris.predict == iris.test$Class) / length(iris.test$Class) [1] 0.9666667 > sum(iris.predict != iris.test$Class) / length(iris.test$Class) [1] 0.03333333
  • 14. • • • • rpart control=rpart.control(cp=.1) .1 • 10 • 3 2 3