SlideShare a Scribd company logo
mongoDB




                    advanced analytics and
                    statistics with mongodb
                         John A. De Goes @jdegoes




 http://precog.io                                   04/30/2012
mongoDB




          what do you want
           from your data?
mongoDB




          I want to get and
                                I want aggregates   I want deep insight
          put data


               MongoDB               MongoDB
                 Query              Aggregation             ???
               Language             Framework



                              SQL

     data storage                                           data intelligence
mongoDB




          I want to get and
                                I want aggregates   I want deep insight
          put data


               MongoDB               MongoDB
                                                           Map
                 Query              Aggregation
                                                          Reduce
               Language             Framework



                              SQL

     data storage                                           data intelligence
mongoDB

          function map() {
              emit(1, // Or put a GROUP BY key here
                   {sum: this.value, // the field you want stats for
                    min: this.value,
                    max: this.value,
                    count:1,
                    diff: 0, // M2,n: sum((val-mean)^2)
              });
          }

          function reduce(key, values) {
              var a = values[0]; // will reduce into here
              for (var i=1/*!*/; i < values.length; i++){
                  var b = values[i]; // will merge 'b' into 'a'


                  // temp helpers
                  var delta = a.sum/a.count - b.sum/b.count; // a.mean - b.mean
                  var weight = (a.count * b.count)/(a.count + b.count);
                  
                  // do the reducing
                  a.diff += b.diff + delta*delta*weight;
                  a.sum += b.sum;
                  a.count += b.count;
                  a.min = Math.min(a.min, b.min);
                  a.max = Math.max(a.max, b.max);
              }

              return a;
          }

          function finalize(key, value){
              value.avg = value.sum / value.count;
              value.variance = value.diff / value.count;
              value.stddev = Math.sqrt(value.variance);
              return value;
          }
mongoDB




          what if there were
           another way?
mongoDB




                 introducing


          • Statistical query language for JSON data
          • Purely declarative
          • Implicitly parallel
          • Inherently composable
mongoDB




          a taste of quirrel
          pageViews := //pageViews

          bound := 1.5 * stdDev(pageViews.duration)

          avg := mean(pageViews.duration)

          lengthyPageViews := 
            pageViews where pageViews.duration > (avg + bound)

          lengthyPageViews.userId
mongoDB




          a taste of quirrel
          pageViews := //pageViews

          bound := 1.5 * stdDev(pageViews.duration)

                    Users who spend an unusually
          avg := mean(pageViews.duration)          long
                   time looking at a page!
          lengthyPageViews := 
            pageViews where pageViews.duration > (avg + bound)

          lengthyPageViews.userId
mongoDB




          quirrel in 10 minutes
mongoDB




          set-oriented
          in Quirrel everything is
          a set of events
mongoDB




          event
          an event is a JSON value
          paired with an identity
mongoDB




          (really) basic queries
          quirrel> 1
          [1]

          quirrel> true
          [true]

          quirrel> {userId: 1239823, name: “John Doe”}
          [{userId: 1239823, name: “John Doe”}]

          quirrel>1 + 2
          [3]

          quirrel> sqrt(16) * 4 - 1 / 3
          [5]
mongoDB




          loading data
          quirrel> //payments

          [{"amount":5,"date":1329741127233,"recipients":
          ["research","marketing"]}, ...]


          quirrel> load(“/payments”)

          [{"amount":5,"date":1329741127233,"recipients":
          ["research","marketing"]}, ...]
mongoDB




          variables
          quirrel> payments := //payments
                 | payments

          [{"amount":5,"date":1329741127233,"recipients":
          ["research","marketing"]}, ...]


          quirrel> five := 5
                 | five * 2
          [10]
mongoDB




          filtered descent
          quirrel> //users.userId

          [9823461231, 916727123, 23987183, ...]


          quirrel> //payments.recipients[0]

          ["engineering","operations","research", ...]
mongoDB




          reductions
          quirrel> count(//users)
          24185132

          quirrel> mean(//payments.amount)
          87.39

          quirrel> sum(//payments.amount)
          921541.29

          quirrel> stdDev(//payments.amount)
          31.84
mongoDB




          identity matching
                 a*b
            a
            e1
                  ?    b
            e2         e8
            e3         e9
            e4    *    e10
            e5         e11
            e6         e12
                  ?
            e7
mongoDB




          identity matching
          quirrel> orders := //orders
                 | orders.subTotal +
                 | orders.subTotal *
                 | orders.taxRate +
                 | orders.shipping + orders.handling 
          [153.54805, 152.7618, 80.38365, ...]
mongoDB




          values
          quirrel> payments.amount * 0.10
          [6.1, 27.842, 29.084, 50, 0.5, 16.955, ...]
mongoDB




          filtering
          quirrel> users := //users
                 | segment := users.age > 19 & 
                 | users.age < 53 & users.income > 60000
                 | count(users where segment)
          [15]
mongoDB




          chaining
          pageViews := //pageViews

          bound := 1.5 * stdDev(pageViews.duration)

          avg := mean(pageViews.duration)

          lengthyPageViews := 
            pageViews where pageViews.duration > (avg + bound)

          lengthyPageViews.userId
mongoDB




          user functions
          quirrel> pageViews := //pageViews
                 |
                 | statsForUser('userId) :=
                 |   {userId:      'userId, 
                 |    meanPageView: mean(pageViews.duration 
                 |                       where pageViews.userId =  'userId)}
                 |
                 | statsForUser

          [{"userId":12353,"meanPageView":100.66666666666667},{"userId":
          12359,"meanPageView":83}, ...]
mongoDB




          lots more!
          • Cross-joins
          • Self-joins
          • Augmentation
          • Power-packed standard library
mongoDB




          quirrel -> mongodb
          • Quirrel is extremely expressive
          • Aggregation framework insufficient
          • Working with 10gen on new primitives
          • Backup plan: AF + MapReduce
mongoDB




          quirrel -> mongodb
          pageViews := //pageViews

          bound := 1.5 * stdDev(pageViews.duration)
                                                                  one-pass
          avg := mean(pageViews.duration)                         map/reduce
          lengthyPageViews := 
            pageViews where pageViews.duration > (avg + bound)

          lengthyPageViews.userId
                                                                 one-pass
                                                                 mongo filter
mongoDB




                            qa
                    John A. De Goes @jdegoes




 http://precog.io                              04/30/2012

More Related Content

PDF
Quirrel & R for Dummies
PPTX
Joins and Other Aggregation Enhancements Coming in MongoDB 3.2
PPTX
NoSQL with MongoDB
ODP
Aggregation Framework in MongoDB Overview Part-1
PPTX
The Aggregation Framework
PPTX
Mongo db – document oriented database
PPTX
MongoDB World 2016 : Advanced Aggregation
PPTX
MongoDB Analytics: Learn Aggregation by Example - Exploratory Analytics and V...
Quirrel & R for Dummies
Joins and Other Aggregation Enhancements Coming in MongoDB 3.2
NoSQL with MongoDB
Aggregation Framework in MongoDB Overview Part-1
The Aggregation Framework
Mongo db – document oriented database
MongoDB World 2016 : Advanced Aggregation
MongoDB Analytics: Learn Aggregation by Example - Exploratory Analytics and V...

What's hot (19)

PDF
Aggregation Framework MongoDB Days Munich
PDF
Hadoop - MongoDB Webinar June 2014
PPTX
Querying mongo db
PPTX
MongoDB Aggregation
KEY
Geospatial Indexing and Querying with MongoDB
PDF
MongoDB Aggregation Framework
PDF
3D + MongoDB = 3D Repo
PPTX
Agg framework selectgroup feb2015 v2
PDF
Embedding a language into string interpolator
PPTX
Web Development
PPTX
The Aggregation Framework
PDF
Mongodb Aggregation Pipeline
PPTX
Data Governance with JSON Schema
PPTX
MongoDB Analytics: Learn Aggregation by Example - Exploratory Analytics and V...
PPTX
"Powerful Analysis with the Aggregation Pipeline (Tutorial)"
PPTX
MongoDB - Aggregation Pipeline
PPTX
Getting Started with Geospatial Data in MongoDB
PDF
When to Use MongoDB
PPTX
Aggregation in MongoDB
Aggregation Framework MongoDB Days Munich
Hadoop - MongoDB Webinar June 2014
Querying mongo db
MongoDB Aggregation
Geospatial Indexing and Querying with MongoDB
MongoDB Aggregation Framework
3D + MongoDB = 3D Repo
Agg framework selectgroup feb2015 v2
Embedding a language into string interpolator
Web Development
The Aggregation Framework
Mongodb Aggregation Pipeline
Data Governance with JSON Schema
MongoDB Analytics: Learn Aggregation by Example - Exploratory Analytics and V...
"Powerful Analysis with the Aggregation Pipeline (Tutorial)"
MongoDB - Aggregation Pipeline
Getting Started with Geospatial Data in MongoDB
When to Use MongoDB
Aggregation in MongoDB
Ad

Viewers also liked (20)

PPTX
Using MongoDB As a Tick Database
PDF
Rise of the scientific database
PDF
In-Database Predictive Analytics
PDF
Post-Free: Life After Free Monads
PDF
Analytics Maturity Model
PDF
Фотоматериалы
DOCX
Universidad nacional de chimbor
PDF
Product Management and Systems Thinking
PDF
Barometrul mediului de afaceri romanesc 2016
PDF
How emotional abuse is wrecking your mental health
PPTX
Tulevaisuutemme verkossa
PDF
servo press P2113 BA for press fit
DOCX
Teoría de las relaciones humanas
PDF
Grafico diario del dax perfomance index para el 10 05-2012
DOCX
7 câu mẹ nào cũng muốn hỏi khi mang bầu
PDF
Got centerpiece? (#hewebar 2013 edition)
PPTX
Mobile is your friend, not enemy.
PDF
School of Fish: The MSC End of Term Report on sustainable fish in schools 2015
PPT
Ponencia experiencia e learning y web 2.0
PPT
Using MongoDB As a Tick Database
Rise of the scientific database
In-Database Predictive Analytics
Post-Free: Life After Free Monads
Analytics Maturity Model
Фотоматериалы
Universidad nacional de chimbor
Product Management and Systems Thinking
Barometrul mediului de afaceri romanesc 2016
How emotional abuse is wrecking your mental health
Tulevaisuutemme verkossa
servo press P2113 BA for press fit
Teoría de las relaciones humanas
Grafico diario del dax perfomance index para el 10 05-2012
7 câu mẹ nào cũng muốn hỏi khi mang bầu
Got centerpiece? (#hewebar 2013 edition)
Mobile is your friend, not enemy.
School of Fish: The MSC End of Term Report on sustainable fish in schools 2015
Ponencia experiencia e learning y web 2.0
Ad

Similar to Advanced Analytics & Statistics with MongoDB (20)

PPTX
Shankar's mongo db presentation
PDF
MongoDB and Python
PDF
Building your first app with MongoDB
ODP
Mongo db dla administratora
PDF
MongoDB SoCal 2020: Migrate Anything* to MongoDB Atlas
PDF
MongoDB and Ruby on Rails
PDF
Python and MongoDB
PPTX
Introduction to MongoDB at IGDTUW
PDF
Analytics with MongoDB Aggregation Framework and Hadoop Connector
PPTX
Webinar: Applikationsentwicklung mit MongoDB : Teil 5: Reporting & Aggregation
PPTX
Dev Jumpstart: Build Your First App with MongoDB
KEY
Mongodb intro
PPTX
Back to Basics Webinar 4: Advanced Indexing, Text and Geospatial Indexes
PDF
MongoDB.pdf
PDF
mongodb-introduction
PPTX
[MongoDB.local Bengaluru 2018] Just in Time Validation with JSON Schema
KEY
PPTX
Data Analytics with MongoDB - Jane Fine
PDF
full stack modul 5, mongodb,webpack,front-end,back-end
PPTX
MongoDB and Hadoop: Driving Business Insights
Shankar's mongo db presentation
MongoDB and Python
Building your first app with MongoDB
Mongo db dla administratora
MongoDB SoCal 2020: Migrate Anything* to MongoDB Atlas
MongoDB and Ruby on Rails
Python and MongoDB
Introduction to MongoDB at IGDTUW
Analytics with MongoDB Aggregation Framework and Hadoop Connector
Webinar: Applikationsentwicklung mit MongoDB : Teil 5: Reporting & Aggregation
Dev Jumpstart: Build Your First App with MongoDB
Mongodb intro
Back to Basics Webinar 4: Advanced Indexing, Text and Geospatial Indexes
MongoDB.pdf
mongodb-introduction
[MongoDB.local Bengaluru 2018] Just in Time Validation with JSON Schema
Data Analytics with MongoDB - Jane Fine
full stack modul 5, mongodb,webpack,front-end,back-end
MongoDB and Hadoop: Driving Business Insights

More from John De Goes (20)

PDF
Refactoring Functional Type Classes
PDF
One Monad to Rule Them All
PDF
Error Management: Future vs ZIO
PDF
Atomically { Delete Your Actors }
PDF
The Death of Final Tagless
PDF
Scalaz Stream: Rebirth
PDF
Scalaz Stream: Rebirth
PDF
ZIO Schedule: Conquering Flakiness & Recurrence with Pure Functional Programming
PDF
ZIO Queue
PDF
Blazing Fast, Pure Effects without Monads — LambdaConf 2018
PDF
Scalaz 8: A Whole New Game
PDF
Scalaz 8 vs Akka Actors
PDF
Orthogonal Functional Architecture
PDF
The Design of the Scalaz 8 Effect System
PDF
Quark: A Purely-Functional Scala DSL for Data Processing & Analytics
PDF
Streams for (Co)Free!
PDF
MTL Versus Free
PDF
The Easy-Peasy-Lemon-Squeezy, Statically-Typed, Purely Functional Programming...
PDF
Halogen: Past, Present, and Future
PDF
All Aboard The Scala-to-PureScript Express!
Refactoring Functional Type Classes
One Monad to Rule Them All
Error Management: Future vs ZIO
Atomically { Delete Your Actors }
The Death of Final Tagless
Scalaz Stream: Rebirth
Scalaz Stream: Rebirth
ZIO Schedule: Conquering Flakiness & Recurrence with Pure Functional Programming
ZIO Queue
Blazing Fast, Pure Effects without Monads — LambdaConf 2018
Scalaz 8: A Whole New Game
Scalaz 8 vs Akka Actors
Orthogonal Functional Architecture
The Design of the Scalaz 8 Effect System
Quark: A Purely-Functional Scala DSL for Data Processing & Analytics
Streams for (Co)Free!
MTL Versus Free
The Easy-Peasy-Lemon-Squeezy, Statically-Typed, Purely Functional Programming...
Halogen: Past, Present, and Future
All Aboard The Scala-to-PureScript Express!

Recently uploaded (20)

PPT
Teaching material agriculture food technology
PDF
Encapsulation_ Review paper, used for researhc scholars
PDF
KodekX | Application Modernization Development
PPTX
KOM of Painting work and Equipment Insulation REV00 update 25-dec.pptx
PDF
How UI/UX Design Impacts User Retention in Mobile Apps.pdf
PDF
Review of recent advances in non-invasive hemoglobin estimation
PPTX
Understanding_Digital_Forensics_Presentation.pptx
PDF
Optimiser vos workloads AI/ML sur Amazon EC2 et AWS Graviton
PDF
TokAI - TikTok AI Agent : The First AI Application That Analyzes 10,000+ Vira...
PDF
Building Integrated photovoltaic BIPV_UPV.pdf
PDF
Chapter 3 Spatial Domain Image Processing.pdf
DOCX
The AUB Centre for AI in Media Proposal.docx
PPT
“AI and Expert System Decision Support & Business Intelligence Systems”
PDF
Network Security Unit 5.pdf for BCA BBA.
PPTX
Digital-Transformation-Roadmap-for-Companies.pptx
PPTX
Big Data Technologies - Introduction.pptx
PPTX
Spectroscopy.pptx food analysis technology
PDF
Architecting across the Boundaries of two Complex Domains - Healthcare & Tech...
PPTX
MYSQL Presentation for SQL database connectivity
PDF
Mobile App Security Testing_ A Comprehensive Guide.pdf
Teaching material agriculture food technology
Encapsulation_ Review paper, used for researhc scholars
KodekX | Application Modernization Development
KOM of Painting work and Equipment Insulation REV00 update 25-dec.pptx
How UI/UX Design Impacts User Retention in Mobile Apps.pdf
Review of recent advances in non-invasive hemoglobin estimation
Understanding_Digital_Forensics_Presentation.pptx
Optimiser vos workloads AI/ML sur Amazon EC2 et AWS Graviton
TokAI - TikTok AI Agent : The First AI Application That Analyzes 10,000+ Vira...
Building Integrated photovoltaic BIPV_UPV.pdf
Chapter 3 Spatial Domain Image Processing.pdf
The AUB Centre for AI in Media Proposal.docx
“AI and Expert System Decision Support & Business Intelligence Systems”
Network Security Unit 5.pdf for BCA BBA.
Digital-Transformation-Roadmap-for-Companies.pptx
Big Data Technologies - Introduction.pptx
Spectroscopy.pptx food analysis technology
Architecting across the Boundaries of two Complex Domains - Healthcare & Tech...
MYSQL Presentation for SQL database connectivity
Mobile App Security Testing_ A Comprehensive Guide.pdf

Advanced Analytics & Statistics with MongoDB

  • 1. mongoDB advanced analytics and statistics with mongodb John A. De Goes @jdegoes http://precog.io 04/30/2012
  • 2. mongoDB what do you want from your data?
  • 3. mongoDB I want to get and I want aggregates I want deep insight put data MongoDB MongoDB Query Aggregation ??? Language Framework SQL data storage data intelligence
  • 4. mongoDB I want to get and I want aggregates I want deep insight put data MongoDB MongoDB Map Query Aggregation Reduce Language Framework SQL data storage data intelligence
  • 5. mongoDB function map() {     emit(1, // Or put a GROUP BY key here          {sum: this.value, // the field you want stats for           min: this.value,           max: this.value,           count:1,           diff: 0, // M2,n: sum((val-mean)^2)     }); } function reduce(key, values) {     var a = values[0]; // will reduce into here     for (var i=1/*!*/; i < values.length; i++){         var b = values[i]; // will merge 'b' into 'a'         // temp helpers         var delta = a.sum/a.count - b.sum/b.count; // a.mean - b.mean         var weight = (a.count * b.count)/(a.count + b.count);                  // do the reducing         a.diff += b.diff + delta*delta*weight;         a.sum += b.sum;         a.count += b.count;         a.min = Math.min(a.min, b.min);         a.max = Math.max(a.max, b.max);     }     return a; } function finalize(key, value){     value.avg = value.sum / value.count;     value.variance = value.diff / value.count;     value.stddev = Math.sqrt(value.variance);     return value; }
  • 6. mongoDB what if there were another way?
  • 7. mongoDB introducing • Statistical query language for JSON data • Purely declarative • Implicitly parallel • Inherently composable
  • 8. mongoDB a taste of quirrel pageViews := //pageViews bound := 1.5 * stdDev(pageViews.duration) avg := mean(pageViews.duration) lengthyPageViews :=  pageViews where pageViews.duration > (avg + bound) lengthyPageViews.userId
  • 9. mongoDB a taste of quirrel pageViews := //pageViews bound := 1.5 * stdDev(pageViews.duration) Users who spend an unusually avg := mean(pageViews.duration) long time looking at a page! lengthyPageViews :=  pageViews where pageViews.duration > (avg + bound) lengthyPageViews.userId
  • 10. mongoDB quirrel in 10 minutes
  • 11. mongoDB set-oriented in Quirrel everything is a set of events
  • 12. mongoDB event an event is a JSON value paired with an identity
  • 13. mongoDB (really) basic queries quirrel> 1 [1] quirrel> true [true] quirrel> {userId: 1239823, name: “John Doe”} [{userId: 1239823, name: “John Doe”}] quirrel>1 + 2 [3] quirrel> sqrt(16) * 4 - 1 / 3 [5]
  • 14. mongoDB loading data quirrel> //payments [{"amount":5,"date":1329741127233,"recipients": ["research","marketing"]}, ...] quirrel> load(“/payments”) [{"amount":5,"date":1329741127233,"recipients": ["research","marketing"]}, ...]
  • 15. mongoDB variables quirrel> payments := //payments | payments [{"amount":5,"date":1329741127233,"recipients": ["research","marketing"]}, ...] quirrel> five := 5 | five * 2 [10]
  • 16. mongoDB filtered descent quirrel> //users.userId [9823461231, 916727123, 23987183, ...] quirrel> //payments.recipients[0] ["engineering","operations","research", ...]
  • 17. mongoDB reductions quirrel> count(//users) 24185132 quirrel> mean(//payments.amount) 87.39 quirrel> sum(//payments.amount) 921541.29 quirrel> stdDev(//payments.amount) 31.84
  • 18. mongoDB identity matching a*b a e1 ? b e2 e8 e3 e9 e4 * e10 e5 e11 e6 e12 ? e7
  • 19. mongoDB identity matching quirrel> orders := //orders   | orders.subTotal + | orders.subTotal * | orders.taxRate + | orders.shipping + orders.handling  [153.54805, 152.7618, 80.38365, ...]
  • 20. mongoDB values quirrel> payments.amount * 0.10 [6.1, 27.842, 29.084, 50, 0.5, 16.955, ...]
  • 21. mongoDB filtering quirrel> users := //users   | segment := users.age > 19 &  | users.age < 53 & users.income > 60000   | count(users where segment) [15]
  • 22. mongoDB chaining pageViews := //pageViews bound := 1.5 * stdDev(pageViews.duration) avg := mean(pageViews.duration) lengthyPageViews :=  pageViews where pageViews.duration > (avg + bound) lengthyPageViews.userId
  • 23. mongoDB user functions quirrel> pageViews := //pageViews |   | statsForUser('userId) :=   |   {userId:  'userId,  | meanPageView: mean(pageViews.duration  | where pageViews.userId =  'userId)} |   | statsForUser [{"userId":12353,"meanPageView":100.66666666666667},{"userId": 12359,"meanPageView":83}, ...]
  • 24. mongoDB lots more! • Cross-joins • Self-joins • Augmentation • Power-packed standard library
  • 25. mongoDB quirrel -> mongodb • Quirrel is extremely expressive • Aggregation framework insufficient • Working with 10gen on new primitives • Backup plan: AF + MapReduce
  • 26. mongoDB quirrel -> mongodb pageViews := //pageViews bound := 1.5 * stdDev(pageViews.duration) one-pass avg := mean(pageViews.duration) map/reduce lengthyPageViews :=  pageViews where pageViews.duration > (avg + bound) lengthyPageViews.userId one-pass mongo filter
  • 27. mongoDB qa John A. De Goes @jdegoes http://precog.io 04/30/2012