SlideShare a Scribd company logo
R:


     sesejun@is.ocha.ac.jp
          2010/10/21
•                                                contacts_train.csv


     •
     •                               (setwd
             >       >                        )
"Pred","Young","Myope","Astimatic","Tear"
"P","Y","Y","Y","N"
"P","Y","Y","N","N"
"P","N","Y","Y","N"
"P","N","Y","Y","N"
"N","Y","Y","Y","Y"
"N","Y","Y","N","Y"
"N","N","N","N","Y"
"N","N","N","N","N"
"N","N","N","N","Y"
"N","N","N","N","N"
                                                    contacts.csv
> contacts.train<-read.table("contacts_train.csv", header=T,
sep=",")
> contacts.train
   Pred Young Myope Astimatic Tear
1     P     Y     Y         Y    N
2     P     Y     Y         N    N
3     P     N     Y         Y    N
4     P     N     Y         Y    N
5     N     Y     Y         Y    Y
6     N     Y     Y         N    Y
7     N     N     N         N    Y
8     N     N     N         N    N
9     N     N     N         N    Y
10    N     N     N         N    N
> contacts.train[1,]
    Pred Young Myope Astimatic Tear
 1     P     Y     Y         Y    N
 > contacts.train[,2]
   [1] Y Y N N Y Y N N N N
 Levels: N Y
 > contacts.train[,"Pred"]
   [1] P P P P N N N N N N
 Levels: N P
 > contacts.train$Pred
   [1] P P P P N N N N N N
 Levels: N P



> contacts.train[c(-1,-3,-5,-7,-9),]
   Pred Young Myope Astimatic Tear
2     P     Y     Y         N    N
4     P     N     Y         Y    N
6     N     Y     Y         N    Y
8     N     N     N         N    N
10    N     N     N         N    N
> class(contacts.train)
[1] "data.frame"




> forecast <- data.frame(date=c("10/1","10/2","10/3"), weather=c
("sunny","sunny","rain"))
> forecast
  date weather
1 10/1   sunny
2 10/2   sunny
3 10/3     rain
> forecast$weather
[1] sunny sunny rain
Levels: rain sunny
> forecast$date
[1] 10/1 10/2 10/3
> nrow(contacts.train)
[1] 10
> ncol(contacts.train)
[1] 5
> rownames(contacts.train)
 [1] "1" "2" "3" "4" "5" "6" "7"           "8"   "9"   "10"
> colnames(contacts.train)
[1] "Pred"      "Young"    "Myope"         "Astimatic" "Tear"

> colnames(contacts.train)[2]
[1] "Young"

> colnames(contacts.train)[2] <- "Old"
> colnames(contacts.train)
[1] "Pred"      "Old"       "Myope"        "Astimatic" "Tear"

> colnames(contacts.train)[2] <- "Young"
> contacts.train$Young
  [1] Y Y N N Y Y N N N N
Levels: N Y
> order(contacts.train$Young)
  [1] 3 4 7 8 9 10 1 2 5 6
> contacts.train[order(contacts.train$Young),]
    Pred Young Myope Astimatic Tear
3      P     N     Y         Y    N
4      P     N     Y         Y    N
7      N     N     N         N    Y
8      N     N     N         N    N
9      N     N     N         N    Y
10     N     N     N         N    N
1      P     Y     Y         Y    N
2      P     Y     Y         N    N
5      N     Y     Y         Y    Y
6      N     Y     Y         N    Y
> library("mvpart")
> rpart(Young~., data=contacts.train, method="class")
n= 10

node), split, n, loss, yval, (yprob)
      * denotes terminal node
1) root 10 4 N (0.6000000 0.4000000)
  2) Myope=N 4 0 N (1.0000000 0.0000000) *
  3) Myope=Y 6 2 Y (0.3333333 0.6666667) *


> rpart(Young~., data=contacts.train, method="class",
control=rpart.control(cp=-1))
n= 10

node), split, n, loss, yval, (yprob)
      * denotes terminal node
1) root 10 4 N (0.6000000 0.4000000)
  2) Myope=N 4 0 N (1.0000000 0.0000000) *
  3) Myope=Y 6 2 Y (0.3333333 0.6666667)
    6) Pred=P 4 2 N (0.5000000 0.5000000) *
    7) Pred=N 2 0 Y (0.0000000 1.0000000) *
IRIS
 •   http://archive.ics.uci.edu/ml/machine-learning-databases/iris/     iris.data


     •               iris.name
     •                                                                (setosa, versicolor, virginia)


 •                          http://togodb.sel.is.ocha.ac.jp/


> iris.train <- read.table("iris_train.csv", sep=",", header=T)
> length(rownames(iris.train))
[1] 120
> length(colnames(iris.train))
[1] 5




> hist(iris.train$Sepal.length)
> hist(iris.train$Petal.length)
> library(“mvpart”)
> rpart(Class~., data=iris.train, method="class", control=rpart.control
(cp=.1))
n= 120

node), split, n, loss, yval, (yprob)
      * denotes terminal node

1) root 120 77 Iris-setosa (0.35833333 0.34166667 0.30000000)
  2) Petal.length< 2.45 43 0 Iris-setosa (1.00000000 0.00000000
0.00000000) *
  3) Petal.length>=2.45 77 36 Iris-versicolor (0.00000000 0.53246753
0.46753247)
    6) Petal.length< 4.75 37 1 Iris-versicolor (0.00000000 0.97297297
0.02702703) *
    7) Petal.length>=4.75 40 5 Iris-virginica (0.00000000 0.12500000
0.87500000) *
> iris.dtree<-rpart(Class~., data=iris.train, method="class",
control=rpart.control(cp=.1))
> plot.new()
> plot(iris.dtree,uniform=T,margin=0.5)
> text(iris.dtree,use.n=T,all.leaves=F)
> plot(iris.train$Petal.length, iris.train$Petal.width, pch = c
(1,2,3)[unclass(iris.train$Class)])
> iris.test <- read.table("iris_test.csv", sep=",", header=T)


> iris.predict <- predict(iris.dtree, iris.test[1:4], type="class")
> iris.predict
               2              4              18              34
    Iris-setosa     Iris-setosa     Iris-setosa     Iris-setosa
...

> iris.predict ==   iris.test$Class
 [1] TRUE TRUE      TRUE TRUE TRUE    TRUE   TRUE   TRUE FALSE   TRUE
[11] TRUE TRUE      TRUE TRUE TRUE    TRUE   TRUE   TRUE TRUE    TRUE
[21] TRUE TRUE      TRUE TRUE TRUE    TRUE   TRUE   TRUE TRUE    TRUE

> sum(iris.predict == iris.test$Class) / length(iris.test$Class)
[1] 0.9666667
> sum(iris.predict != iris.test$Class) / length(iris.test$Class)
[1] 0.03333333
•
    •
        •
        •
        •   rpart       control=rpart.control(cp=.1)   .1


    •                                                       10


    •               3                2                      3

More Related Content

PDF
Datamining R 2nd
PDF
Calculo2lista2
PDF
Datamining 5th knn
PDF
Datamining r 4th
PDF
Datamining r 3rd
PDF
Datamining 9th Association Rule
PDF
Ohp Seijoen H20 02 Hensu To Kata
PDF
Datamining 2nd decisiontree
Datamining R 2nd
Calculo2lista2
Datamining 5th knn
Datamining r 4th
Datamining r 3rd
Datamining 9th Association Rule
Ohp Seijoen H20 02 Hensu To Kata
Datamining 2nd decisiontree

Viewers also liked (7)

PDF
Datamining 3rd Naivebayes
PDF
Datamining 5th Knn
PDF
bioinfolec_20070706 4th
PDF
080806
PDF
Datamining r 1st
PDF
Datamining 4th adaboost
PDF
080806
Datamining 3rd Naivebayes
Datamining 5th Knn
bioinfolec_20070706 4th
080806
Datamining r 1st
Datamining 4th adaboost
080806
Ad

Similar to Datamining r 2nd (20)

PDF
ゲーム理論 BASIC 演習83 -アナウンスは効果あるか-
PDF
Oceans 2019 tutorial-geophysical-nav_7-updated
PDF
第13回数学カフェ「素数!!」二次会 LT資料「乱数!!」
KEY
Introduction to Perl Best Practices
DOCX
Simpatía
PPTX
Data Science for Folks Without (or With!) a Ph.D.
PDF
機械学習モデルの判断根拠の説明
PPT
การสุ่มตัวอย่างในงานวิจัยสาธารณสุข
PDF
IIT-JEE Mains 2016 Online Previous Question Paper Day 1
PDF
ゲーム理論 BASIC 演習89 -安全保障理事会決議における投票力指数-
PDF
カルマンフィルタ講義資料
PDF
Chap02-Solutions-Ex-2-2-Calculus (1).pdf
PDF
Chap02-Solutions-Ex-2-2-Calculus.pdfjhsdaoihsdaiousadjh
PDF
A note on estimation of population mean in sample survey using auxiliary info...
PDF
Regression and Classification with R
PDF
Data Manipulation Using R (& dplyr)
PPTX
3aquine-mccluskeymethod-191016140548 (2)[1].pptx
PDF
Statistical Physics of Ecological Networks: from patterns to principles
PDF
Jamieson_Jain2018
PDF
Eカードをゲーム理論で分析
ゲーム理論 BASIC 演習83 -アナウンスは効果あるか-
Oceans 2019 tutorial-geophysical-nav_7-updated
第13回数学カフェ「素数!!」二次会 LT資料「乱数!!」
Introduction to Perl Best Practices
Simpatía
Data Science for Folks Without (or With!) a Ph.D.
機械学習モデルの判断根拠の説明
การสุ่มตัวอย่างในงานวิจัยสาธารณสุข
IIT-JEE Mains 2016 Online Previous Question Paper Day 1
ゲーム理論 BASIC 演習89 -安全保障理事会決議における投票力指数-
カルマンフィルタ講義資料
Chap02-Solutions-Ex-2-2-Calculus (1).pdf
Chap02-Solutions-Ex-2-2-Calculus.pdfjhsdaoihsdaiousadjh
A note on estimation of population mean in sample survey using auxiliary info...
Regression and Classification with R
Data Manipulation Using R (& dplyr)
3aquine-mccluskeymethod-191016140548 (2)[1].pptx
Statistical Physics of Ecological Networks: from patterns to principles
Jamieson_Jain2018
Eカードをゲーム理論で分析
Ad

More from sesejun (20)

PDF
RNAseqによる変動遺伝子抽出の統計: A Review
PDF
バイオインフォマティクスによる遺伝子発現解析
PDF
次世代シーケンサが求める機械学習
PDF
20110602labseminar pub
PDF
20110524zurichngs 2nd pub
PDF
20110524zurichngs 1st pub
PDF
20110214nips2010 read
PDF
Datamining 9th association_rule.key
PDF
Datamining 8th hclustering
PDF
Datamining 6th svm
PDF
Datamining 3rd naivebayes
PDF
Datamining 7th kmeans
PDF
100401 Bioinfoinfra
PDF
Datamining 8th Hclustering
PDF
Datamining 9th Association Rule
PDF
Datamining 8th Hclustering
PDF
Datamining 7th Kmeans
PDF
Datamining R 4th
PDF
Datamining 6th Svm
PDF
Datamining 4th Adaboost
RNAseqによる変動遺伝子抽出の統計: A Review
バイオインフォマティクスによる遺伝子発現解析
次世代シーケンサが求める機械学習
20110602labseminar pub
20110524zurichngs 2nd pub
20110524zurichngs 1st pub
20110214nips2010 read
Datamining 9th association_rule.key
Datamining 8th hclustering
Datamining 6th svm
Datamining 3rd naivebayes
Datamining 7th kmeans
100401 Bioinfoinfra
Datamining 8th Hclustering
Datamining 9th Association Rule
Datamining 8th Hclustering
Datamining 7th Kmeans
Datamining R 4th
Datamining 6th Svm
Datamining 4th Adaboost

Datamining r 2nd

  • 1. R: sesejun@is.ocha.ac.jp 2010/10/21
  • 2. contacts_train.csv • • (setwd > > ) "Pred","Young","Myope","Astimatic","Tear" "P","Y","Y","Y","N" "P","Y","Y","N","N" "P","N","Y","Y","N" "P","N","Y","Y","N" "N","Y","Y","Y","Y" "N","Y","Y","N","Y" "N","N","N","N","Y" "N","N","N","N","N" "N","N","N","N","Y" "N","N","N","N","N" contacts.csv
  • 3. > contacts.train<-read.table("contacts_train.csv", header=T, sep=",") > contacts.train Pred Young Myope Astimatic Tear 1 P Y Y Y N 2 P Y Y N N 3 P N Y Y N 4 P N Y Y N 5 N Y Y Y Y 6 N Y Y N Y 7 N N N N Y 8 N N N N N 9 N N N N Y 10 N N N N N
  • 4. > contacts.train[1,] Pred Young Myope Astimatic Tear 1 P Y Y Y N > contacts.train[,2] [1] Y Y N N Y Y N N N N Levels: N Y > contacts.train[,"Pred"] [1] P P P P N N N N N N Levels: N P > contacts.train$Pred [1] P P P P N N N N N N Levels: N P > contacts.train[c(-1,-3,-5,-7,-9),] Pred Young Myope Astimatic Tear 2 P Y Y N N 4 P N Y Y N 6 N Y Y N Y 8 N N N N N 10 N N N N N
  • 5. > class(contacts.train) [1] "data.frame" > forecast <- data.frame(date=c("10/1","10/2","10/3"), weather=c ("sunny","sunny","rain")) > forecast date weather 1 10/1 sunny 2 10/2 sunny 3 10/3 rain > forecast$weather [1] sunny sunny rain Levels: rain sunny > forecast$date [1] 10/1 10/2 10/3
  • 6. > nrow(contacts.train) [1] 10 > ncol(contacts.train) [1] 5 > rownames(contacts.train) [1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" > colnames(contacts.train) [1] "Pred" "Young" "Myope" "Astimatic" "Tear" > colnames(contacts.train)[2] [1] "Young" > colnames(contacts.train)[2] <- "Old" > colnames(contacts.train) [1] "Pred" "Old" "Myope" "Astimatic" "Tear" > colnames(contacts.train)[2] <- "Young"
  • 7. > contacts.train$Young [1] Y Y N N Y Y N N N N Levels: N Y > order(contacts.train$Young) [1] 3 4 7 8 9 10 1 2 5 6 > contacts.train[order(contacts.train$Young),] Pred Young Myope Astimatic Tear 3 P N Y Y N 4 P N Y Y N 7 N N N N Y 8 N N N N N 9 N N N N Y 10 N N N N N 1 P Y Y Y N 2 P Y Y N N 5 N Y Y Y Y 6 N Y Y N Y
  • 8. > library("mvpart") > rpart(Young~., data=contacts.train, method="class") n= 10 node), split, n, loss, yval, (yprob) * denotes terminal node 1) root 10 4 N (0.6000000 0.4000000) 2) Myope=N 4 0 N (1.0000000 0.0000000) * 3) Myope=Y 6 2 Y (0.3333333 0.6666667) * > rpart(Young~., data=contacts.train, method="class", control=rpart.control(cp=-1)) n= 10 node), split, n, loss, yval, (yprob) * denotes terminal node 1) root 10 4 N (0.6000000 0.4000000) 2) Myope=N 4 0 N (1.0000000 0.0000000) * 3) Myope=Y 6 2 Y (0.3333333 0.6666667) 6) Pred=P 4 2 N (0.5000000 0.5000000) * 7) Pred=N 2 0 Y (0.0000000 1.0000000) *
  • 9. IRIS • http://archive.ics.uci.edu/ml/machine-learning-databases/iris/ iris.data • iris.name • (setosa, versicolor, virginia) • http://togodb.sel.is.ocha.ac.jp/ > iris.train <- read.table("iris_train.csv", sep=",", header=T) > length(rownames(iris.train)) [1] 120 > length(colnames(iris.train)) [1] 5 > hist(iris.train$Sepal.length) > hist(iris.train$Petal.length)
  • 10. > library(“mvpart”) > rpart(Class~., data=iris.train, method="class", control=rpart.control (cp=.1)) n= 120 node), split, n, loss, yval, (yprob) * denotes terminal node 1) root 120 77 Iris-setosa (0.35833333 0.34166667 0.30000000) 2) Petal.length< 2.45 43 0 Iris-setosa (1.00000000 0.00000000 0.00000000) * 3) Petal.length>=2.45 77 36 Iris-versicolor (0.00000000 0.53246753 0.46753247) 6) Petal.length< 4.75 37 1 Iris-versicolor (0.00000000 0.97297297 0.02702703) * 7) Petal.length>=4.75 40 5 Iris-virginica (0.00000000 0.12500000 0.87500000) *
  • 11. > iris.dtree<-rpart(Class~., data=iris.train, method="class", control=rpart.control(cp=.1)) > plot.new() > plot(iris.dtree,uniform=T,margin=0.5) > text(iris.dtree,use.n=T,all.leaves=F)
  • 12. > plot(iris.train$Petal.length, iris.train$Petal.width, pch = c (1,2,3)[unclass(iris.train$Class)])
  • 13. > iris.test <- read.table("iris_test.csv", sep=",", header=T) > iris.predict <- predict(iris.dtree, iris.test[1:4], type="class") > iris.predict 2 4 18 34 Iris-setosa Iris-setosa Iris-setosa Iris-setosa ... > iris.predict == iris.test$Class [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE [11] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE [21] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE > sum(iris.predict == iris.test$Class) / length(iris.test$Class) [1] 0.9666667 > sum(iris.predict != iris.test$Class) / length(iris.test$Class) [1] 0.03333333
  • 14. • • • • rpart control=rpart.control(cp=.1) .1 • 10 • 3 2 3