1
Connect With Us
Website ( )
Free Online R Courses ( )
R Packages ( )
Shiny Apps ( )
Blog ( )
GitHub ( )
YouTube ( )
Twitter ( )
Facebook ( )
Linkedin ( )
• https://www.rsquaredacademy.com/
• https://rsquared-academy.thinkific.com/
• https://pkgs.rsquaredacademy.com
• https://apps.rsquaredacademy.com
• https://blog.rsquaredacademy.com
• https://github.com/rsquaredacademy
• https://www.youtube.com/user/rsquaredin/
• https://twitter.com/rsquaredacademy
• https://www.facebook.com/rsquaredacademy/
• https://in.linkedin.com/company/rsquared-academy
2
Resources
• Slides
• Code & Data
• RStudio Cloud
3
What?
Why?
How?
Use Cases
Demo
•
•
•
•
•
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
Data
• UCI
• data.world
19
Data Dictionary
invoice number
stock code
description
quantity
invoice date
unit price
customer id
country
•
•
•
•
•
•
•
•
20
Libraries
library(readxl)
library(readr)
library(mbar)
library(arules)
library(arulesViz)
library(magrittr)
library(dplyr)
library(lubridate)
library(forcats)
library(ggplot2)
21
Read Data
basket_data <- read.transactions("transaction_data.csv", format = "baske
sep = ",")
basket_data
## transactions in sparse format with
## 25901 transactions (rows) and
## 10085 items (columns)
22
Data Summary
summary(basket_data)
## transactions as itemMatrix in sparse format with
## 25901 rows (elements/itemsets/transactions) and
## 10085 columns (items) and a density of 0.001660018
##
## most frequent items:
## WHITE HANGING HEART T-LIGHT HOLDER REGENCY CAKESTAND 3 TIER
## 1999 1914
## JUMBO BAG RED RETROSPOT PARTY BUNTING
## 1806 1488
## LUNCH BAG RED RETROSPOT (Other)
## 1404 425005
##
## element (itemset/transaction) length distribution:
## sizes
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 1454 4578 1727 1208 942 891 781 715 696 683 612 642 547 530
## 15 16 17 18 19 20 21 22 23 24 25 26 27 28
## 555 537 479 459 491 428 405 328 311 280 248 261 235 221
23
24
Generate Rules
rules <- apriori(basket_data, parameter = list(supp=0.009, conf=0.8,
target = "rules", maxlen = 4))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support mi
## 0.8 0.1 1 none FALSE TRUE 5 0.009
## maxlen target ext
## 4 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 233
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[10085 item(s), 25901 transaction(s)] done [1.41s
## sorting and recoding items [508 item(s)] done [0 03s]
## Warning in apriori(basket_data, parameter = list(supp = 0.009, conf =
## 0.8, : Mining stopped (maxlen reached). Only patterns up to a length
## returned! 25
Rules Summary
summary(rules)
## set of 22 rules
##
## rule length distribution (lhs + rhs):sizes
## 2 3 4
## 11 9 2
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.000 2.000 2.500 2.591 3.000 4.000
##
## summary of quality measures:
## support confidence lift count
## Min. :0.009034 Min. :0.8035 Min. :22.59 Min. :234.0
## 1st Qu.:0.010453 1st Qu.:0.8530 1st Qu.:25.02 1st Qu.:270.8
## Median :0.013223 Median :0.8868 Median :55.94 Median :342.5
## Mean :0.012760 Mean :0.9120 Mean :48.55 Mean :330.5
## 3rd Qu.:0.014362 3rd Qu.:1.0000 3rd Qu.:61.23 3rd Qu.:372.0
## Max. :0.018339 Max. :1.0000 Max. :71.30 Max. :475.0
##
26
Inspect Rules
basket_rules <- sort(rules, by = 'confidence', decreasing = TRUE)
inspect(basket_rules[1:10])
## lhs rhs
## [1] {BACK DOOR} => {KEY FOB}
## [2] {SET 3 RETROSPOT TEA} => {SUGAR}
## [3] {SUGAR} => {SET 3 RETROSPOT TEA}
## [4] {SET 3 RETROSPOT TEA} => {COFFEE}
## [5] {SUGAR} => {COFFEE}
## [6] {SHED} => {KEY FOB}
## [7] {SET 3 RETROSPOT TEA,
## SUGAR} => {COFFEE}
## [8] {COFFEE,
## SET 3 RETROSPOT TEA} => {SUGAR}
## [9] {COFFEE,
## SUGAR} => {SET 3 RETROSPOT TEA}
## [10] {PINK REGENCY TEACUP AND SAUCER,
## REGENCY CAKESTAND 3 TIER,
## ROSES REGENCY TEACUP AND SAUCER} => {GREEN REGENCY TEACUP AND S
27
28
29
30
Redundant Rules
inspect(rules[is.redundant(rules)])
## lhs rhs support
## [1] {SET 3 RETROSPOT TEA,SUGAR} => {COFFEE} 0.01436238
## [2] {COFFEE,SET 3 RETROSPOT TEA} => {SUGAR} 0.01436238
## [3] {COFFEE,SUGAR} => {SET 3 RETROSPOT TEA} 0.01436238
## confidence lift count
## [1] 1 55.94168 372
## [2] 1 69.62634 372
## [3] 1 69.62634 372
31
Non Redundant Rules
inspect(rules[!is.redundant(rules)])
## lhs rhs
## [1] {REGENCY TEA PLATE PINK} => {REGENCY TEA PLATE GREEN
## [2] {BACK DOOR} => {KEY FOB}
## [3] {SET 3 RETROSPOT TEA} => {SUGAR}
## [4] {SUGAR} => {SET 3 RETROSPOT TEA}
## [5] {SET 3 RETROSPOT TEA} => {COFFEE}
## [6] {COFFEE} => {SET 3 RETROSPOT TEA}
## [7] {SUGAR} => {COFFEE}
## [8] {COFFEE} => {SUGAR}
## [9] {REGENCY TEA PLATE GREEN} => {REGENCY TEA PLATE ROSES
## [10] {SHED} => {KEY FOB}
## [11] {SET/6 RED SPOTTY PAPER CUPS} => {SET/6 RED SPOTTY PAPER
## [12] {SET/20 RED RETROSPOT PAPER NAPKINS,
## SET/6 RED SPOTTY PAPER CUPS} => {SET/6 RED SPOTTY PAPER
## [13] {PINK REGENCY TEACUP AND SAUCER,
## ROSES REGENCY TEACUP AND SAUCER} => {GREEN REGENCY TEACUP AN
## [14] {GREEN REGENCY TEACUP AND SAUCER,
## PINK REGENCY TEACUP AND SAUCER} => {ROSES REGENCY TEACUP AN
32
What influenced purchase of product X?
sugar_rules <- apriori(basket_data, parameter = list(supp = 0.009, conf
appearance = list(default = "lhs", rhs = "SUGAR"))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support mi
## 0.8 0.1 1 none FALSE TRUE 5 0.009
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 233
##
## set item appearances ...[1 item(s)] done [0.00s].
## set transactions ...[10085 item(s), 25901 transaction(s)] done [1.32s
## sorting and recoding items [508 item(s)] done [0 03s]
33
What influenced purchase of product X?
rules_sugar <- sort(sugar_rules, by = "confidence", decreasing = TRUE)
inspect(rules_sugar)
## lhs rhs support confidence lif
## [1] {SET 3 RETROSPOT TEA} => {SUGAR} 0.01436238 1.0000000 69.
## [2] {COFFEE,SET 3 RETROSPOT TEA} => {SUGAR} 0.01436238 1.0000000 69.
## [3] {COFFEE} => {SUGAR} 0.01436238 0.8034557 55.
## count
## [1] 372
## [2] 372
## [3] 372
34
What purchases did product X influence?
sugar_rules <- apriori(basket_data, parameter = list(supp = 0.009, conf
appearance = list(default = "rhs", lhs = "SUGAR"))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support mi
## 0.8 0.1 1 none FALSE TRUE 5 0.009
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 233
##
## set item appearances ...[1 item(s)] done [0.00s].
## set transactions ...[10085 item(s), 25901 transaction(s)] done [1.35s
## sorting and recoding items [508 item(s)] done [0 03s]
35
What purchases did product X influence?
rules_sugar <- sort(sugar_rules, by = "confidence", decreasing = TRUE)
inspect(rules_sugar)
## lhs rhs support confidence lift c
## [1] {SUGAR} => {SET 3 RETROSPOT TEA} 0.01436238 1 69.62634 3
## [2] {SUGAR} => {COFFEE} 0.01436238 1 55.94168 3
36
Top Rules by Support
supp_rules <- sort(rules, by = 'support', decreasing = TRUE)
top_rules <- supp_rules[1:10]
inspect(top_rules)
## lhs rhs
## [1] {PINK REGENCY TEACUP AND SAUCER,
## ROSES REGENCY TEACUP AND SAUCER} => {GREEN REGENCY TEACUP AND S
## [2] {GREEN REGENCY TEACUP AND SAUCER,
## PINK REGENCY TEACUP AND SAUCER} => {ROSES REGENCY TEACUP AND S
## [3] {SET 3 RETROSPOT TEA} => {SUGAR}
## [4] {SUGAR} => {SET 3 RETROSPOT TEA}
## [5] {SET 3 RETROSPOT TEA} => {COFFEE}
## [6] {COFFEE} => {SET 3 RETROSPOT TEA}
## [7] {SUGAR} => {COFFEE}
## [8] {COFFEE} => {SUGAR}
## [9] {SET 3 RETROSPOT TEA,
## SUGAR} => {COFFEE}
## [10] {COFFEE,
## SET 3 RETROSPOT TEA} => {SUGAR}
37
Top Rules by Confidence
conf_rules <- sort(rules, by = 'confidence', decreasing = TRUE)
top_rules <- conf_rules[1:10]
inspect(top_rules)
## lhs rhs
## [1] {BACK DOOR} => {KEY FOB}
## [2] {SET 3 RETROSPOT TEA} => {SUGAR}
## [3] {SUGAR} => {SET 3 RETROSPOT TEA}
## [4] {SET 3 RETROSPOT TEA} => {COFFEE}
## [5] {SUGAR} => {COFFEE}
## [6] {SHED} => {KEY FOB}
## [7] {SET 3 RETROSPOT TEA,
## SUGAR} => {COFFEE}
## [8] {COFFEE,
## SET 3 RETROSPOT TEA} => {SUGAR}
## [9] {COFFEE,
## SUGAR} => {SET 3 RETROSPOT TEA}
## [10] {PINK REGENCY TEACUP AND SAUCER,
## REGENCY CAKESTAND 3 TIER,
## ROSES REGENCY TEACUP AND SAUCER} => {GREEN REGENCY TEACUP AND S
38
Top Rules by Lift
lift_rules <- sort(rules, by = 'lift', decreasing = TRUE)
top_rules <- lift_rules[1:10]
inspect(top_rules)
## lhs rhs suppo
## [1] {REGENCY TEA PLATE PINK} => {REGENCY TEA PLATE GREEN} 0.0090344
## [2] {SET 3 RETROSPOT TEA} => {SUGAR} 0.0143623
## [3] {SUGAR} => {SET 3 RETROSPOT TEA} 0.0143623
## [4] {COFFEE,
## SET 3 RETROSPOT TEA} => {SUGAR} 0.0143623
## [5] {COFFEE,
## SUGAR} => {SET 3 RETROSPOT TEA} 0.0143623
## [6] {BACK DOOR} => {KEY FOB} 0.0096135
## [7] {SHED} => {KEY FOB} 0.0112736
## [8] {REGENCY TEA PLATE GREEN} => {REGENCY TEA PLATE ROSES} 0.0103470
## [9] {SET 3 RETROSPOT TEA} => {COFFEE} 0.0143623
## [10] {COFFEE} => {SET 3 RETROSPOT TEA} 0.0143623
39
40
41
42
43
44
45
Summary
unsupervised data mining technique
uncovers products frequently bought together
creates if-then scenario rules
cost-effective, insightful and actionable
association rule mining has applications in several industries
directionality of rule is lost while using lift
confidence as a measure can be misleading
•
•
•
•
•
•
•
46
47

More Related Content

PDF
Practical Introduction to Web scraping using R
PDF
Introduction to tibbles
PDF
Explore Data using dplyr
PDF
Read/Import data from flat/delimited files into R
PDF
Read data from Excel spreadsheets into R
PDF
Writing Readable Code with Pipes
PDF
Data Wrangling with dplyr
PDF
Easy HTML Tables in RStudio with Tabyl and kableExtra
Practical Introduction to Web scraping using R
Introduction to tibbles
Explore Data using dplyr
Read/Import data from flat/delimited files into R
Read data from Excel spreadsheets into R
Writing Readable Code with Pipes
Data Wrangling with dplyr
Easy HTML Tables in RStudio with Tabyl and kableExtra

What's hot (17)

PDF
第3回 データフレームの基本操作 その1(解答付き)
PDF
Chapter 2: R tutorial Handbook for Data Science and Machine Learning Practiti...
PDF
第3回 データフレームの基本操作 その1
PDF
第2回 基本演算,データ型の基礎,ベクトルの操作方法
PDF
第6回 関数とフロー制御
PPT
wreewrer
PDF
M12 random forest-part01
PDF
02 modèle microscopique du gaz parfait, pression et température
PDF
M11 bagging loo cv
PDF
Java Unicode with Live GUI Examples
PDF
Java Unicode with Cool GUI Examples
PDF
第5回 様々なファイル形式の読み込みとデータの書き出し(解答付き)
PDF
30ประชาคมอาเซียนมีผลอย่างไรต่อชีวิตของเรา
PDF
M09-Cross validating-naive-bayes
PDF
Dipôles linéaires, régime transitoire
PDF
Prediction
PDF
Causality
第3回 データフレームの基本操作 その1(解答付き)
Chapter 2: R tutorial Handbook for Data Science and Machine Learning Practiti...
第3回 データフレームの基本操作 その1
第2回 基本演算,データ型の基礎,ベクトルの操作方法
第6回 関数とフロー制御
wreewrer
M12 random forest-part01
02 modèle microscopique du gaz parfait, pression et température
M11 bagging loo cv
Java Unicode with Live GUI Examples
Java Unicode with Cool GUI Examples
第5回 様々なファイル形式の読み込みとデータの書き出し(解答付き)
30ประชาคมอาเซียนมีผลอย่างไรต่อชีวิตของเรา
M09-Cross validating-naive-bayes
Dipôles linéaires, régime transitoire
Prediction
Causality
Ad

Similar to Market Basket Analysis in R (20)

PDF
Case study on Transaction in Grocery Store
PDF
Association rules in r
PDF
Market basket analysis using apriori algorithm on
PDF
Data Science - Part VI - Market Basket and Product Recommendation Engines
PDF
6. Association Rule.pdf
PPTX
PPT
Data mining arm-2009-v0
PPTX
What is FP Growth Analysis and How Can a Business Use Frequent Pattern Mining...
PPTX
ASSOCIATION Rule plus MArket basket Analysis.pptx
PPTX
Ml Market Basket Analysis
PDF
Association rules and frequent pattern growth algorithms
PPTX
Associations analysis datamining and data warehouse
PPTX
Sample Thesis Proposal for all students.pptx
PPTX
Lect6 Association rule & Apriori algorithm
PPTX
Data mining in market basket analysis
PPTX
MODULE 5 _ Mining frequent patterns and associations.pptx
PPTX
Data SAcience with r progarmming Unit - V Part-1.pptx
PPTX
MBKM_Minggu 9_Association Rule with R Studio.pptx
PPT
MarketBasket(BahanAR-2)gfhjghhhbjbjbn.ppt
PDF
Market Basket Analysis of bakery Shop
Case study on Transaction in Grocery Store
Association rules in r
Market basket analysis using apriori algorithm on
Data Science - Part VI - Market Basket and Product Recommendation Engines
6. Association Rule.pdf
Data mining arm-2009-v0
What is FP Growth Analysis and How Can a Business Use Frequent Pattern Mining...
ASSOCIATION Rule plus MArket basket Analysis.pptx
Ml Market Basket Analysis
Association rules and frequent pattern growth algorithms
Associations analysis datamining and data warehouse
Sample Thesis Proposal for all students.pptx
Lect6 Association rule & Apriori algorithm
Data mining in market basket analysis
MODULE 5 _ Mining frequent patterns and associations.pptx
Data SAcience with r progarmming Unit - V Part-1.pptx
MBKM_Minggu 9_Association Rule with R Studio.pptx
MarketBasket(BahanAR-2)gfhjghhhbjbjbn.ppt
Market Basket Analysis of bakery Shop
Ad

More from Rsquared Academy (20)

PDF
Handling Date & Time in R
PDF
Joining Data with dplyr
PDF
Variables & Data Types in R
PDF
How to install & update R packages?
PDF
How to get help in R?
PDF
Introduction to R
PDF
RMySQL Tutorial For Beginners
PDF
R Markdown Tutorial For Beginners
PDF
R Data Visualization Tutorial: Bar Plots
PDF
R Programming: Introduction to Matrices
PDF
R Programming: Introduction to Vectors
PPTX
R Programming: Variables & Data Types
PDF
Data Visualization With R: Learn To Combine Multiple Graphs
PDF
R Data Visualization: Learn To Add Text Annotations To Plots
PDF
Data Visualization With R: Learn To Modify Font Of Graphical Parameters
PDF
Data Visualization With R: Learn To Modify Color Of Plots
PDF
Data Visualization With R: Learn To Modify Title, Axis Labels & Range
PDF
Data Visualization With R: Introduction
PDF
Data Visualization With R
PDF
R Programming: Mathematical Functions In R
Handling Date & Time in R
Joining Data with dplyr
Variables & Data Types in R
How to install & update R packages?
How to get help in R?
Introduction to R
RMySQL Tutorial For Beginners
R Markdown Tutorial For Beginners
R Data Visualization Tutorial: Bar Plots
R Programming: Introduction to Matrices
R Programming: Introduction to Vectors
R Programming: Variables & Data Types
Data Visualization With R: Learn To Combine Multiple Graphs
R Data Visualization: Learn To Add Text Annotations To Plots
Data Visualization With R: Learn To Modify Font Of Graphical Parameters
Data Visualization With R: Learn To Modify Color Of Plots
Data Visualization With R: Learn To Modify Title, Axis Labels & Range
Data Visualization With R: Introduction
Data Visualization With R
R Programming: Mathematical Functions In R

Recently uploaded (20)

PPTX
DS-40-Pre-Engagement and Kickoff deck - v8.0.pptx
PPTX
CHAPTER-2-THE-ACCOUNTING-PROCESS-2-4.pptx
PPTX
New ISO 27001_2022 standard and the changes
PPTX
Caseware_IDEA_Detailed_Presentation.pptx
PPTX
chrmotography.pptx food anaylysis techni
PPTX
statsppt this is statistics ppt for giving knowledge about this topic
PPTX
chuitkarjhanbijunsdivndsijvndiucbhsaxnmzsicvjsd
PDF
Tetra Pak Index 2023 - The future of health and nutrition - Full report.pdf
PDF
Votre score augmente si vous choisissez une catégorie et que vous rédigez une...
PPTX
Tapan_20220802057_Researchinternship_final_stage.pptx
PPT
statistics analysis - topic 3 - describing data visually
PPTX
Machine Learning and working of machine Learning
PPTX
Phase1_final PPTuwhefoegfohwfoiehfoegg.pptx
PPT
expt-design-lecture-12 hghhgfggjhjd (1).ppt
PDF
©️ 02_SKU Automatic SW Robotics for Microsoft PC.pdf
PDF
Session 11 - Data Visualization Storytelling (2).pdf
PPTX
retention in jsjsksksksnbsndjddjdnFPD.pptx
PDF
REAL ILLUMINATI AGENT IN KAMPALA UGANDA CALL ON+256765750853/0705037305
PDF
Navigating the Thai Supplements Landscape.pdf
PPTX
1 hour to get there before the game is done so you don’t need a car seat for ...
DS-40-Pre-Engagement and Kickoff deck - v8.0.pptx
CHAPTER-2-THE-ACCOUNTING-PROCESS-2-4.pptx
New ISO 27001_2022 standard and the changes
Caseware_IDEA_Detailed_Presentation.pptx
chrmotography.pptx food anaylysis techni
statsppt this is statistics ppt for giving knowledge about this topic
chuitkarjhanbijunsdivndsijvndiucbhsaxnmzsicvjsd
Tetra Pak Index 2023 - The future of health and nutrition - Full report.pdf
Votre score augmente si vous choisissez une catégorie et que vous rédigez une...
Tapan_20220802057_Researchinternship_final_stage.pptx
statistics analysis - topic 3 - describing data visually
Machine Learning and working of machine Learning
Phase1_final PPTuwhefoegfohwfoiehfoegg.pptx
expt-design-lecture-12 hghhgfggjhjd (1).ppt
©️ 02_SKU Automatic SW Robotics for Microsoft PC.pdf
Session 11 - Data Visualization Storytelling (2).pdf
retention in jsjsksksksnbsndjddjdnFPD.pptx
REAL ILLUMINATI AGENT IN KAMPALA UGANDA CALL ON+256765750853/0705037305
Navigating the Thai Supplements Landscape.pdf
1 hour to get there before the game is done so you don’t need a car seat for ...

Market Basket Analysis in R

  • 1. 1
  • 2. Connect With Us Website ( ) Free Online R Courses ( ) R Packages ( ) Shiny Apps ( ) Blog ( ) GitHub ( ) YouTube ( ) Twitter ( ) Facebook ( ) Linkedin ( ) • https://www.rsquaredacademy.com/ • https://rsquared-academy.thinkific.com/ • https://pkgs.rsquaredacademy.com • https://apps.rsquaredacademy.com • https://blog.rsquaredacademy.com • https://github.com/rsquaredacademy • https://www.youtube.com/user/rsquaredin/ • https://twitter.com/rsquaredacademy • https://www.facebook.com/rsquaredacademy/ • https://in.linkedin.com/company/rsquared-academy 2
  • 3. Resources • Slides • Code & Data • RStudio Cloud 3
  • 5. 5
  • 6. 6
  • 7. 7
  • 8. 8
  • 9. 9
  • 10. 10
  • 11. 11
  • 12. 12
  • 13. 13
  • 14. 14
  • 15. 15
  • 16. 16
  • 17. 17
  • 18. 18
  • 20. Data Dictionary invoice number stock code description quantity invoice date unit price customer id country • • • • • • • • 20
  • 22. Read Data basket_data <- read.transactions("transaction_data.csv", format = "baske sep = ",") basket_data ## transactions in sparse format with ## 25901 transactions (rows) and ## 10085 items (columns) 22
  • 23. Data Summary summary(basket_data) ## transactions as itemMatrix in sparse format with ## 25901 rows (elements/itemsets/transactions) and ## 10085 columns (items) and a density of 0.001660018 ## ## most frequent items: ## WHITE HANGING HEART T-LIGHT HOLDER REGENCY CAKESTAND 3 TIER ## 1999 1914 ## JUMBO BAG RED RETROSPOT PARTY BUNTING ## 1806 1488 ## LUNCH BAG RED RETROSPOT (Other) ## 1404 425005 ## ## element (itemset/transaction) length distribution: ## sizes ## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 ## 1454 4578 1727 1208 942 891 781 715 696 683 612 642 547 530 ## 15 16 17 18 19 20 21 22 23 24 25 26 27 28 ## 555 537 479 459 491 428 405 328 311 280 248 261 235 221 23
  • 24. 24
  • 25. Generate Rules rules <- apriori(basket_data, parameter = list(supp=0.009, conf=0.8, target = "rules", maxlen = 4)) ## Apriori ## ## Parameter specification: ## confidence minval smax arem aval originalSupport maxtime support mi ## 0.8 0.1 1 none FALSE TRUE 5 0.009 ## maxlen target ext ## 4 rules FALSE ## ## Algorithmic control: ## filter tree heap memopt load sort verbose ## 0.1 TRUE TRUE FALSE TRUE 2 TRUE ## ## Absolute minimum support count: 233 ## ## set item appearances ...[0 item(s)] done [0.00s]. ## set transactions ...[10085 item(s), 25901 transaction(s)] done [1.41s ## sorting and recoding items [508 item(s)] done [0 03s] ## Warning in apriori(basket_data, parameter = list(supp = 0.009, conf = ## 0.8, : Mining stopped (maxlen reached). Only patterns up to a length ## returned! 25
  • 26. Rules Summary summary(rules) ## set of 22 rules ## ## rule length distribution (lhs + rhs):sizes ## 2 3 4 ## 11 9 2 ## ## Min. 1st Qu. Median Mean 3rd Qu. Max. ## 2.000 2.000 2.500 2.591 3.000 4.000 ## ## summary of quality measures: ## support confidence lift count ## Min. :0.009034 Min. :0.8035 Min. :22.59 Min. :234.0 ## 1st Qu.:0.010453 1st Qu.:0.8530 1st Qu.:25.02 1st Qu.:270.8 ## Median :0.013223 Median :0.8868 Median :55.94 Median :342.5 ## Mean :0.012760 Mean :0.9120 Mean :48.55 Mean :330.5 ## 3rd Qu.:0.014362 3rd Qu.:1.0000 3rd Qu.:61.23 3rd Qu.:372.0 ## Max. :0.018339 Max. :1.0000 Max. :71.30 Max. :475.0 ## 26
  • 27. Inspect Rules basket_rules <- sort(rules, by = 'confidence', decreasing = TRUE) inspect(basket_rules[1:10]) ## lhs rhs ## [1] {BACK DOOR} => {KEY FOB} ## [2] {SET 3 RETROSPOT TEA} => {SUGAR} ## [3] {SUGAR} => {SET 3 RETROSPOT TEA} ## [4] {SET 3 RETROSPOT TEA} => {COFFEE} ## [5] {SUGAR} => {COFFEE} ## [6] {SHED} => {KEY FOB} ## [7] {SET 3 RETROSPOT TEA, ## SUGAR} => {COFFEE} ## [8] {COFFEE, ## SET 3 RETROSPOT TEA} => {SUGAR} ## [9] {COFFEE, ## SUGAR} => {SET 3 RETROSPOT TEA} ## [10] {PINK REGENCY TEACUP AND SAUCER, ## REGENCY CAKESTAND 3 TIER, ## ROSES REGENCY TEACUP AND SAUCER} => {GREEN REGENCY TEACUP AND S 27
  • 28. 28
  • 29. 29
  • 30. 30
  • 31. Redundant Rules inspect(rules[is.redundant(rules)]) ## lhs rhs support ## [1] {SET 3 RETROSPOT TEA,SUGAR} => {COFFEE} 0.01436238 ## [2] {COFFEE,SET 3 RETROSPOT TEA} => {SUGAR} 0.01436238 ## [3] {COFFEE,SUGAR} => {SET 3 RETROSPOT TEA} 0.01436238 ## confidence lift count ## [1] 1 55.94168 372 ## [2] 1 69.62634 372 ## [3] 1 69.62634 372 31
  • 32. Non Redundant Rules inspect(rules[!is.redundant(rules)]) ## lhs rhs ## [1] {REGENCY TEA PLATE PINK} => {REGENCY TEA PLATE GREEN ## [2] {BACK DOOR} => {KEY FOB} ## [3] {SET 3 RETROSPOT TEA} => {SUGAR} ## [4] {SUGAR} => {SET 3 RETROSPOT TEA} ## [5] {SET 3 RETROSPOT TEA} => {COFFEE} ## [6] {COFFEE} => {SET 3 RETROSPOT TEA} ## [7] {SUGAR} => {COFFEE} ## [8] {COFFEE} => {SUGAR} ## [9] {REGENCY TEA PLATE GREEN} => {REGENCY TEA PLATE ROSES ## [10] {SHED} => {KEY FOB} ## [11] {SET/6 RED SPOTTY PAPER CUPS} => {SET/6 RED SPOTTY PAPER ## [12] {SET/20 RED RETROSPOT PAPER NAPKINS, ## SET/6 RED SPOTTY PAPER CUPS} => {SET/6 RED SPOTTY PAPER ## [13] {PINK REGENCY TEACUP AND SAUCER, ## ROSES REGENCY TEACUP AND SAUCER} => {GREEN REGENCY TEACUP AN ## [14] {GREEN REGENCY TEACUP AND SAUCER, ## PINK REGENCY TEACUP AND SAUCER} => {ROSES REGENCY TEACUP AN 32
  • 33. What influenced purchase of product X? sugar_rules <- apriori(basket_data, parameter = list(supp = 0.009, conf appearance = list(default = "lhs", rhs = "SUGAR")) ## Apriori ## ## Parameter specification: ## confidence minval smax arem aval originalSupport maxtime support mi ## 0.8 0.1 1 none FALSE TRUE 5 0.009 ## maxlen target ext ## 10 rules FALSE ## ## Algorithmic control: ## filter tree heap memopt load sort verbose ## 0.1 TRUE TRUE FALSE TRUE 2 TRUE ## ## Absolute minimum support count: 233 ## ## set item appearances ...[1 item(s)] done [0.00s]. ## set transactions ...[10085 item(s), 25901 transaction(s)] done [1.32s ## sorting and recoding items [508 item(s)] done [0 03s] 33
  • 34. What influenced purchase of product X? rules_sugar <- sort(sugar_rules, by = "confidence", decreasing = TRUE) inspect(rules_sugar) ## lhs rhs support confidence lif ## [1] {SET 3 RETROSPOT TEA} => {SUGAR} 0.01436238 1.0000000 69. ## [2] {COFFEE,SET 3 RETROSPOT TEA} => {SUGAR} 0.01436238 1.0000000 69. ## [3] {COFFEE} => {SUGAR} 0.01436238 0.8034557 55. ## count ## [1] 372 ## [2] 372 ## [3] 372 34
  • 35. What purchases did product X influence? sugar_rules <- apriori(basket_data, parameter = list(supp = 0.009, conf appearance = list(default = "rhs", lhs = "SUGAR")) ## Apriori ## ## Parameter specification: ## confidence minval smax arem aval originalSupport maxtime support mi ## 0.8 0.1 1 none FALSE TRUE 5 0.009 ## maxlen target ext ## 10 rules FALSE ## ## Algorithmic control: ## filter tree heap memopt load sort verbose ## 0.1 TRUE TRUE FALSE TRUE 2 TRUE ## ## Absolute minimum support count: 233 ## ## set item appearances ...[1 item(s)] done [0.00s]. ## set transactions ...[10085 item(s), 25901 transaction(s)] done [1.35s ## sorting and recoding items [508 item(s)] done [0 03s] 35
  • 36. What purchases did product X influence? rules_sugar <- sort(sugar_rules, by = "confidence", decreasing = TRUE) inspect(rules_sugar) ## lhs rhs support confidence lift c ## [1] {SUGAR} => {SET 3 RETROSPOT TEA} 0.01436238 1 69.62634 3 ## [2] {SUGAR} => {COFFEE} 0.01436238 1 55.94168 3 36
  • 37. Top Rules by Support supp_rules <- sort(rules, by = 'support', decreasing = TRUE) top_rules <- supp_rules[1:10] inspect(top_rules) ## lhs rhs ## [1] {PINK REGENCY TEACUP AND SAUCER, ## ROSES REGENCY TEACUP AND SAUCER} => {GREEN REGENCY TEACUP AND S ## [2] {GREEN REGENCY TEACUP AND SAUCER, ## PINK REGENCY TEACUP AND SAUCER} => {ROSES REGENCY TEACUP AND S ## [3] {SET 3 RETROSPOT TEA} => {SUGAR} ## [4] {SUGAR} => {SET 3 RETROSPOT TEA} ## [5] {SET 3 RETROSPOT TEA} => {COFFEE} ## [6] {COFFEE} => {SET 3 RETROSPOT TEA} ## [7] {SUGAR} => {COFFEE} ## [8] {COFFEE} => {SUGAR} ## [9] {SET 3 RETROSPOT TEA, ## SUGAR} => {COFFEE} ## [10] {COFFEE, ## SET 3 RETROSPOT TEA} => {SUGAR} 37
  • 38. Top Rules by Confidence conf_rules <- sort(rules, by = 'confidence', decreasing = TRUE) top_rules <- conf_rules[1:10] inspect(top_rules) ## lhs rhs ## [1] {BACK DOOR} => {KEY FOB} ## [2] {SET 3 RETROSPOT TEA} => {SUGAR} ## [3] {SUGAR} => {SET 3 RETROSPOT TEA} ## [4] {SET 3 RETROSPOT TEA} => {COFFEE} ## [5] {SUGAR} => {COFFEE} ## [6] {SHED} => {KEY FOB} ## [7] {SET 3 RETROSPOT TEA, ## SUGAR} => {COFFEE} ## [8] {COFFEE, ## SET 3 RETROSPOT TEA} => {SUGAR} ## [9] {COFFEE, ## SUGAR} => {SET 3 RETROSPOT TEA} ## [10] {PINK REGENCY TEACUP AND SAUCER, ## REGENCY CAKESTAND 3 TIER, ## ROSES REGENCY TEACUP AND SAUCER} => {GREEN REGENCY TEACUP AND S 38
  • 39. Top Rules by Lift lift_rules <- sort(rules, by = 'lift', decreasing = TRUE) top_rules <- lift_rules[1:10] inspect(top_rules) ## lhs rhs suppo ## [1] {REGENCY TEA PLATE PINK} => {REGENCY TEA PLATE GREEN} 0.0090344 ## [2] {SET 3 RETROSPOT TEA} => {SUGAR} 0.0143623 ## [3] {SUGAR} => {SET 3 RETROSPOT TEA} 0.0143623 ## [4] {COFFEE, ## SET 3 RETROSPOT TEA} => {SUGAR} 0.0143623 ## [5] {COFFEE, ## SUGAR} => {SET 3 RETROSPOT TEA} 0.0143623 ## [6] {BACK DOOR} => {KEY FOB} 0.0096135 ## [7] {SHED} => {KEY FOB} 0.0112736 ## [8] {REGENCY TEA PLATE GREEN} => {REGENCY TEA PLATE ROSES} 0.0103470 ## [9] {SET 3 RETROSPOT TEA} => {COFFEE} 0.0143623 ## [10] {COFFEE} => {SET 3 RETROSPOT TEA} 0.0143623 39
  • 40. 40
  • 41. 41
  • 42. 42
  • 43. 43
  • 44. 44
  • 45. 45
  • 46. Summary unsupervised data mining technique uncovers products frequently bought together creates if-then scenario rules cost-effective, insightful and actionable association rule mining has applications in several industries directionality of rule is lost while using lift confidence as a measure can be misleading • • • • • • • 46
  • 47. 47