SlideShare a Scribd company logo
1
2
•
•
    •
    •




        3
•
•

•
•




    4
•
•
    •
    •
•




        5
6
•
•
    •
    •
    •
•
•



        7
R CMD INSTALL 'package filename'
                                   8
9
> small.ints = to.dfs(1:10)
> out = mapreduce(input = small.ints, map = function(k,v) keyval(k, k^2))
> res = from.dfs(out)
> colres <- do.call('rbind', lapply(res,"[[",2))
> t(colres)
     [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
[1,]    1    4    9   16    25  36   49   64   81   100




> groups = to.dfs(rbinom(32, n = 50, prob = 0.4))
> out = mapreduce(input = groups, reduce = function(k,vv) keyval(k, length(vv)))
> res = from.dfs(out)
> colres <- do.call('rbind', lapply(res,"[[",2))
> t(colres)
     [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
[1,]    2    7    3    1    1   12    2    8     8    1     4     1



                                                                                   10
>  wordcount = function(input, output = NULL, pattern = " ") {
+    mapreduce(input = input,
+              output = output,
+              textinputformat = rawtextinputformat,
+              map = function(k ,v) {
+                 lapply(strsplit(x = v, split = pattern) [[1]],
+                        function(w) keyval(w,1))
+              },
+              reduce = function(k, vv) {
+                 keyval(k, sum(unlist(vv)))
+              },
+              combine = T)
+  }
>  out <- wordcount(input="/user/hidekazu/the_social_network.txt")
>  results <- from.dfs(out)
>  results <- data.frame(word=unlist(lapply(results,"[[",1)),
                          count=unlist(lapply(results,"[[",2)))
> results <- (results[order(results$count, decreasing=TRUE), ])
> head(results)
      word count
6313 the 1101
2381     a   700                                                     11
26     and   637
12
kmeans.iter =
  function(points, distfun, ncenters = length(centers), centers = NULL) {
    from.dfs(
              mapreduce(input = points,
                        map = if (is.null(centers)) {
                           function(k,v)keyval(sample(1:ncenters,1),v)
                        } else {
                           function(k,v) {
                             distances = lapply(centers,
                               function(c) distfun(c,v))
                             keyval(centers[[which.min(distances)]],v)
                           }
                        },
                        reduce = function(k,vv) keyval(NULL,
                                                apply(do.call(rbind, vv), 2, mean)))
              )
  }




                                                                                   13
kmeans =
    function(points, ncenters, iterations = 10,
             distfun =
             function(a,b) norm(as.matrix(a-b), type = 'F')) {
      newCenters = kmeans.iter(points, distfun = distfun, ncenters = ncenters)
      for(i in 1:iterations) {
        newCenters = lapply(values(newCenters), unlist)
        newCenters = kmeans.iter(points, distfun, centers=newCenters)
      }
      newCenters
    }




clustdata = lapply(1:10000,
                   function(i) keyval(i, c(rnorm(1, mean = i%%3, sd = 0.01),
                                           rnorm(1, mean = i%%4, sd = 0.01))))
to.dfs(clustdata, "/tmp/clustdata")
kmeans ("/tmp/clustdata", 12)


                                                                                 14
15
15
16
> model <- kmeans(iris[, 1:4], 3, nstart=10)
> modelfilename <- "my_smart_unique_name"
> modelfile <- hdfs.file(modelfilename, "w")
> hdfs.write(model, modelfile)
[1] TRUE
> hdfs.close(modelfile)
[1] TRUE




> modelfile = hdfs.file(modelfilename, "r")
> m <- hdfs.read(modelfile)
Warning message:
In function (h) : Closed unused DFS stream: my_smart_unique_name
> model <- unserialize(m)
> hdfs.close(modelfile)
[1] TRUE
> model


                                                                   17
18
•




    •




        19
•




    20
•
    •
    •
    •
•




        21

More Related Content

KEY
R meets Hadoop
DOCX
imager package in R and examples..
DOCX
Advanced Data Visualization in R- Somes Examples.
DOCX
Basic Calculus in R.
PDF
R + Hadoop = Big Data Analytics. How Revolution Analytics' RHadoop Project Al...
PDF
Some R Examples[R table and Graphics] -Advanced Data Visualization in R (Some...
DOCX
Mosaic plot in R.
DOCX
A Shiny Example-- R
R meets Hadoop
imager package in R and examples..
Advanced Data Visualization in R- Somes Examples.
Basic Calculus in R.
R + Hadoop = Big Data Analytics. How Revolution Analytics' RHadoop Project Al...
Some R Examples[R table and Graphics] -Advanced Data Visualization in R (Some...
Mosaic plot in R.
A Shiny Example-- R

What's hot (20)

DOCX
ggtimeseries-->ggplot2 extensions
PDF
CLIM Undergraduate Workshop: Tutorial on R Software - Huang Huang, Oct 23, 2017
PDF
C++ TUTORIAL 6
PDF
ECMAScript 6 major changes
PDF
Python hmm
PDF
C++ TUTORIAL 7
PDF
Om (Cont.)
DOCX
Plot3D Package and Example in R.-Data visualizat,on
PDF
Effector: we need to go deeper
PDF
C++ TUTORIAL 10
PDF
Angular Refactoring in Real World
PPTX
Hacking the Internet of Things for Fun & Profit
PDF
PDF
PDF
C++ TUTORIAL 3
PDF
C++ TUTORIAL 9
PPTX
Clojure to Slang
PDF
Metaprogramming
PDF
Go: It's Not Just For Google
ggtimeseries-->ggplot2 extensions
CLIM Undergraduate Workshop: Tutorial on R Software - Huang Huang, Oct 23, 2017
C++ TUTORIAL 6
ECMAScript 6 major changes
Python hmm
C++ TUTORIAL 7
Om (Cont.)
Plot3D Package and Example in R.-Data visualizat,on
Effector: we need to go deeper
C++ TUTORIAL 10
Angular Refactoring in Real World
Hacking the Internet of Things for Fun & Profit
C++ TUTORIAL 3
C++ TUTORIAL 9
Clojure to Slang
Metaprogramming
Go: It's Not Just For Google
Ad

Viewers also liked (20)

PDF
A world without islam-graham e. fuller
PPTX
Povezovanje kemijske panoge in delo z mladimi, KOCKE, Ziga Lampe, Drzava za g...
PDF
Sig App4
PPTX
Social Media Success in International Student Recruitment
PPT
SSBs Erling Holmøy_Norge eldes: Langsiktig økonomisk bærekraft 28.01.14
PDF
MvSM: 7) Co tam dávat - tvorba obsahu pro sociální média
PPT
Funcionario, ¿qué le pides tú a la administración electrónica?
PDF
Márkaépítés a fogyasztói kontroll korában 2.0
PDF
Lucy redes sociales myspace
PPT
Advanced php
PPTX
Buyer Persona - Key to B2B online marketing success
KEY
Design persuasivo: alcuni esempi
ODP
Balonmán touro
PPSX
Tesla Croatia
PPTX
Enquête Doctipharma : Les français et la vente de médicaments sur internet
PPT
Amnesty International
PPSX
Baby Love -Wildlife
PPSX
Aseemearth
PPTX
Subsidio i.1 demanda actual
PDF
Valsts pārvaldes institūciju pasūtīto pētījumu koordinācijas sistēma
A world without islam-graham e. fuller
Povezovanje kemijske panoge in delo z mladimi, KOCKE, Ziga Lampe, Drzava za g...
Sig App4
Social Media Success in International Student Recruitment
SSBs Erling Holmøy_Norge eldes: Langsiktig økonomisk bærekraft 28.01.14
MvSM: 7) Co tam dávat - tvorba obsahu pro sociální média
Funcionario, ¿qué le pides tú a la administración electrónica?
Márkaépítés a fogyasztói kontroll korában 2.0
Lucy redes sociales myspace
Advanced php
Buyer Persona - Key to B2B online marketing success
Design persuasivo: alcuni esempi
Balonmán touro
Tesla Croatia
Enquête Doctipharma : Les français et la vente de médicaments sur internet
Amnesty International
Baby Love -Wildlife
Aseemearth
Subsidio i.1 demanda actual
Valsts pārvaldes institūciju pasūtīto pētījumu koordinācijas sistēma
Ad

Similar to RHadoop の紹介 (20)

KEY
RHadoop, R meets Hadoop
DOCX
CLUSTERGRAM
PPT
Jan 2012 HUG: RHadoop
PDF
Monadologie
PPTX
世预赛买球-世预赛买球竞彩平台-世预赛买球竞猜平台|【​网址​🎉ac123.net🎉​】
PPTX
美洲杯买球-美洲杯买球怎么押注-美洲杯买球押注怎么玩|【​网址​🎉ac99.net🎉​】
PPTX
欧洲杯足彩-欧洲杯足彩线上体育买球-欧洲杯足彩买球推荐网站|【​网址​🎉ac55.net🎉​】
PPTX
欧洲杯下注-欧洲杯下注买球网-欧洲杯下注买球网站|【​网址​🎉ac10.net🎉​】
PPTX
世预赛买球-世预赛买球比赛投注-世预赛买球比赛投注官网|【​网址​🎉ac10.net🎉​】
PPTX
世预赛投注-世预赛投注投注官网app-世预赛投注官网app下载|【​网址​🎉ac123.net🎉​】
PPTX
欧洲杯体彩-欧洲杯体彩比赛投注-欧洲杯体彩比赛投注官网|【​网址​🎉ac99.net🎉​】
PPTX
欧洲杯买球-欧洲杯买球投注网-欧洲杯买球投注网站|【​网址​🎉ac44.net🎉​】
DOCX
Advanced Data Visualization Examples with R-Part II
PDF
Javascript
PPT
Python 101 language features and functional programming
KEY
Haskellで学ぶ関数型言語
PPTX
Super Advanced Python –act1
PDF
Scala by Luc Duponcheel
PDF
Map, Reduce and Filter in Swift
PDF
Algorithm Design and Analysis - Practical File
RHadoop, R meets Hadoop
CLUSTERGRAM
Jan 2012 HUG: RHadoop
Monadologie
世预赛买球-世预赛买球竞彩平台-世预赛买球竞猜平台|【​网址​🎉ac123.net🎉​】
美洲杯买球-美洲杯买球怎么押注-美洲杯买球押注怎么玩|【​网址​🎉ac99.net🎉​】
欧洲杯足彩-欧洲杯足彩线上体育买球-欧洲杯足彩买球推荐网站|【​网址​🎉ac55.net🎉​】
欧洲杯下注-欧洲杯下注买球网-欧洲杯下注买球网站|【​网址​🎉ac10.net🎉​】
世预赛买球-世预赛买球比赛投注-世预赛买球比赛投注官网|【​网址​🎉ac10.net🎉​】
世预赛投注-世预赛投注投注官网app-世预赛投注官网app下载|【​网址​🎉ac123.net🎉​】
欧洲杯体彩-欧洲杯体彩比赛投注-欧洲杯体彩比赛投注官网|【​网址​🎉ac99.net🎉​】
欧洲杯买球-欧洲杯买球投注网-欧洲杯买球投注网站|【​网址​🎉ac44.net🎉​】
Advanced Data Visualization Examples with R-Part II
Javascript
Python 101 language features and functional programming
Haskellで学ぶ関数型言語
Super Advanced Python –act1
Scala by Luc Duponcheel
Map, Reduce and Filter in Swift
Algorithm Design and Analysis - Practical File

More from Hidekazu Tanaka (10)

KEY
ggplot2 に入門してみた
KEY
データベースのお話
KEY
フォントのお話
KEY
フォントのお話
KEY
バギングで構築された各決定木
KEY
アンサンブル学習
KEY
Rの紹介
KEY
Rで解く最適化問題 線型計画問題編
PDF
RでMapreduce
PDF
Rによるやさしい統計学 第16章 : 因子分析
ggplot2 に入門してみた
データベースのお話
フォントのお話
フォントのお話
バギングで構築された各決定木
アンサンブル学習
Rの紹介
Rで解く最適化問題 線型計画問題編
RでMapreduce
Rによるやさしい統計学 第16章 : 因子分析

Recently uploaded (20)

PDF
TokAI - TikTok AI Agent : The First AI Application That Analyzes 10,000+ Vira...
PPTX
VMware vSphere Foundation How to Sell Presentation-Ver1.4-2-14-2024.pptx
PPTX
Detection-First SIEM: Rule Types, Dashboards, and Threat-Informed Strategy
PPTX
MYSQL Presentation for SQL database connectivity
PDF
Building Integrated photovoltaic BIPV_UPV.pdf
PPTX
Cloud computing and distributed systems.
PDF
KodekX | Application Modernization Development
PDF
Diabetes mellitus diagnosis method based random forest with bat algorithm
PDF
Shreyas Phanse Resume: Experienced Backend Engineer | Java • Spring Boot • Ka...
PDF
7 ChatGPT Prompts to Help You Define Your Ideal Customer Profile.pdf
PDF
Peak of Data & AI Encore- AI for Metadata and Smarter Workflows
PDF
The Rise and Fall of 3GPP – Time for a Sabbatical?
DOCX
The AUB Centre for AI in Media Proposal.docx
PDF
CIFDAQ's Market Insight: SEC Turns Pro Crypto
PDF
Reach Out and Touch Someone: Haptics and Empathic Computing
PDF
Agricultural_Statistics_at_a_Glance_2022_0.pdf
PDF
Dropbox Q2 2025 Financial Results & Investor Presentation
PDF
Mobile App Security Testing_ A Comprehensive Guide.pdf
PDF
Blue Purple Modern Animated Computer Science Presentation.pdf.pdf
PPTX
20250228 LYD VKU AI Blended-Learning.pptx
TokAI - TikTok AI Agent : The First AI Application That Analyzes 10,000+ Vira...
VMware vSphere Foundation How to Sell Presentation-Ver1.4-2-14-2024.pptx
Detection-First SIEM: Rule Types, Dashboards, and Threat-Informed Strategy
MYSQL Presentation for SQL database connectivity
Building Integrated photovoltaic BIPV_UPV.pdf
Cloud computing and distributed systems.
KodekX | Application Modernization Development
Diabetes mellitus diagnosis method based random forest with bat algorithm
Shreyas Phanse Resume: Experienced Backend Engineer | Java • Spring Boot • Ka...
7 ChatGPT Prompts to Help You Define Your Ideal Customer Profile.pdf
Peak of Data & AI Encore- AI for Metadata and Smarter Workflows
The Rise and Fall of 3GPP – Time for a Sabbatical?
The AUB Centre for AI in Media Proposal.docx
CIFDAQ's Market Insight: SEC Turns Pro Crypto
Reach Out and Touch Someone: Haptics and Empathic Computing
Agricultural_Statistics_at_a_Glance_2022_0.pdf
Dropbox Q2 2025 Financial Results & Investor Presentation
Mobile App Security Testing_ A Comprehensive Guide.pdf
Blue Purple Modern Animated Computer Science Presentation.pdf.pdf
20250228 LYD VKU AI Blended-Learning.pptx

RHadoop の紹介

  • 1. 1
  • 2. 2
  • 3. • • • • 3
  • 5. • • • • • 5
  • 6. 6
  • 7. • • • • • • • 7
  • 8. R CMD INSTALL 'package filename' 8
  • 9. 9
  • 10. > small.ints = to.dfs(1:10) > out = mapreduce(input = small.ints, map = function(k,v) keyval(k, k^2)) > res = from.dfs(out) > colres <- do.call('rbind', lapply(res,"[[",2)) > t(colres) [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [1,] 1 4 9 16 25 36 49 64 81 100 > groups = to.dfs(rbinom(32, n = 50, prob = 0.4)) > out = mapreduce(input = groups, reduce = function(k,vv) keyval(k, length(vv))) > res = from.dfs(out) > colres <- do.call('rbind', lapply(res,"[[",2)) > t(colres) [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [1,] 2 7 3 1 1 12 2 8 8 1 4 1 10
  • 11. > wordcount = function(input, output = NULL, pattern = " ") { + mapreduce(input = input, + output = output, + textinputformat = rawtextinputformat, + map = function(k ,v) { + lapply(strsplit(x = v, split = pattern) [[1]], + function(w) keyval(w,1)) + }, + reduce = function(k, vv) { + keyval(k, sum(unlist(vv))) + }, + combine = T) + } > out <- wordcount(input="/user/hidekazu/the_social_network.txt") > results <- from.dfs(out) > results <- data.frame(word=unlist(lapply(results,"[[",1)), count=unlist(lapply(results,"[[",2))) > results <- (results[order(results$count, decreasing=TRUE), ]) > head(results) word count 6313 the 1101 2381 a 700 11 26 and 637
  • 12. 12
  • 13. kmeans.iter = function(points, distfun, ncenters = length(centers), centers = NULL) { from.dfs( mapreduce(input = points, map = if (is.null(centers)) { function(k,v)keyval(sample(1:ncenters,1),v) } else { function(k,v) { distances = lapply(centers, function(c) distfun(c,v)) keyval(centers[[which.min(distances)]],v) } }, reduce = function(k,vv) keyval(NULL, apply(do.call(rbind, vv), 2, mean))) ) } 13
  • 14. kmeans = function(points, ncenters, iterations = 10, distfun = function(a,b) norm(as.matrix(a-b), type = 'F')) { newCenters = kmeans.iter(points, distfun = distfun, ncenters = ncenters) for(i in 1:iterations) { newCenters = lapply(values(newCenters), unlist) newCenters = kmeans.iter(points, distfun, centers=newCenters) } newCenters } clustdata = lapply(1:10000, function(i) keyval(i, c(rnorm(1, mean = i%%3, sd = 0.01), rnorm(1, mean = i%%4, sd = 0.01)))) to.dfs(clustdata, "/tmp/clustdata") kmeans ("/tmp/clustdata", 12) 14
  • 15. 15
  • 16. 15
  • 17. 16
  • 18. > model <- kmeans(iris[, 1:4], 3, nstart=10) > modelfilename <- "my_smart_unique_name" > modelfile <- hdfs.file(modelfilename, "w") > hdfs.write(model, modelfile) [1] TRUE > hdfs.close(modelfile) [1] TRUE > modelfile = hdfs.file(modelfilename, "r") > m <- hdfs.read(modelfile) Warning message: In function (h) : Closed unused DFS stream: my_smart_unique_name > model <- unserialize(m) > hdfs.close(modelfile) [1] TRUE > model 17
  • 19. 18
  • 20. • 19
  • 21. 20
  • 22. • • • • 21

Editor's Notes