SlideShare a Scribd company logo
AI & MACHINE LEARNING
MODULE 2 : EXPLORATORY DATA ANALYSIS
Basic R oblects
•Variables
•Vector
•Array
•Matrics
•Data Frame
BASIC FUNCTIONS
# To pull mtcars data in this session
data()
data("mtcars")
#To view the variables / data dictionary
mtcars # print mydata
#To view the variables / data dictionary
str(mtcars) #To view the variables / data dictionary
dim(mtcars) # dimensions of an object
names(mtcars)
class(mtcars) # class of an object (numeric, matrix, data frame, etc)
head(mtcars) #To view top records of mtcars
# print first 10 rows of mydata
head(mtcars, n=10)
tail(mtcars) #to view bottom records of mtcars
##Variable Identification - Inferences
nrow(mtcars)
ncol(mtcars)
mean(mtcars[,2])
median(mtcars[,9])
range(mtcars$mpg)
mean(mtcars$mpg)
1. UNIVARIATE ANALYSIS
#Interquartile range (75 percentile - 25 percentile)
IQR(mtcars$mpg)
## Univariate Analysis (Pattern Recognition)
summary(mtcars) # Complete summary of the dataset
fivenum(mtcars)
fivenum(mtcars$mpg)
# edit the the data for Outliers
mtcars=edit(mtcars)
mtcars
HISTOGRAM
#Histogram
hist(mtcars$mpg)
hist(mtcars$mpg,col = "Red")
# Colored Histogram with Different Number of Bins
hist(mtcars$mpg, breaks=12, col="red")
# Add labels to the graph
x <- mtcars$mpg
h<-hist(x, breaks=10, col="red", xlab="Miles Per Gallon",
main="Histogram")
##(Histograms can be a poor method for determining the shape of a
distribution because it is so strongly affected by the number of bins used.)
##Kernal density plots are usually a much more effective way to view the
distribution of a variable.
# Kernel Density Plot
d <- density(mtcars$mpg) # returns the density data
plot(d) # plots the results
# Filled Density Plot
d <- density(mtcars$mpg)
plot(d, main="Kernel Density of Miles Per Gallon")
polygon(d, col="red", border="blue")
BAR PLOT
# Simple Bar Plot
counts <- table(mtcars$gear)
counts
barplot(counts, main="Car Distribution",
xlab="Number of Gears")
barplot(counts,col="Yellow", main="Car Distribution",
xlab="Number of Gears")
# Simple Horizontal Bar Plot with Added Labels
counts <- table(mtcars$gear)
barplot(counts, main="Car Distribution", horiz=TRUE)
barplot(counts, main="Car Distribution", horiz=TRUE,
names.arg=c("3 Gears", "4 Gears", "5 Gears"))
BOX PLOT
#Boxplot
boxplot(mtcars$mpg)
boxplot(mtcars$mpg,horizontal = TRUE)
boxplot(mtcars$mpg,horizontal = TRUE,col = "Pink")
boxplot(mtcars$mpg,horizontal = TRUE,col = "Green",main="Mileage")
2. BIVARIATE ANALYSIS
o Continous Numerical Variables vs Continous Numerical Variables
o Continous Numerical Variables vs Discrete Numerical Variables
o Discrete Numerical Variables vs Categorical Variables
STACKED BAR PLOT
# Stacked Bar Plot with Colors and Legend
counts <- table(mtcars$vs, mtcars$gear)
barplot(counts, main="Car Distribution by Gears and VS",
xlab="Number of Gears", col=c("darkblue","red"),
legend = rownames(counts))
counts <- table(mtcars$cyl, mtcars$gear)
barplot(counts, main="Car Distribution by Gears and Cylinders",
xlab="Number of Gears", col=c("darkblue","red","Yellow"),
legend = rownames(counts))
GROUPED BAR PLOT
# Grouped Bar Plot
counts <- table(mtcars$vs, mtcars$gear)
barplot(counts, main="Car Distribution by Gears and VS",
xlab="Number of Gears", col=c("darkblue","red"),
legend = rownames(counts), beside=TRUE)
counts <- table(mtcars$cyl, mtcars$gear)
barplot(counts, main="Car Distribution by Gears and Cylinders",
xlab="Number of Gears", col=c("darkblue","red","Yellow"),
legend = rownames(counts),beside = TRUE)
BOX PLOT
#Comparitive Boxplot
boxplot(mtcars$mpg~mtcars$vs,horizontal=TRUE,col=c("Red","Green"))
boxplot(mtcars$mpg~mtcars$vs,horizontal=TRUE,col=c("Red","Green"),main="Compar
itive Boxplot")
3. MULTIVARIATE ANALYSIS
## To check the correlation among the variables
pairs(~ mpg + disp + hp + wt + drat + qsec , data = mtcars) #graphical
representation
cor(mtcars[,c(1,2,3,4,5,6,7,8,9,10,11)])
Artificial inteliggence and machine learning ppt
Artificial inteliggence and machine learning ppt

More Related Content

DOCX
Data visualization with R and ggplot2.docx
PDF
NCCU: Statistics in the Criminal Justice System, R basics and Simulation - Pr...
PPTX
R Programming.pptx
PDF
Data manipulation on r
PDF
R programming & Machine Learning
PPTX
Murtaugh 2022 Appl Comp Genomics Tidyverse lecture.pptx-1.pptx
PDF
Data Visualization With R
PDF
Unit---4.pdf how to gst du paper in this day and age
Data visualization with R and ggplot2.docx
NCCU: Statistics in the Criminal Justice System, R basics and Simulation - Pr...
R Programming.pptx
Data manipulation on r
R programming & Machine Learning
Murtaugh 2022 Appl Comp Genomics Tidyverse lecture.pptx-1.pptx
Data Visualization With R
Unit---4.pdf how to gst du paper in this day and age

Similar to Artificial inteliggence and machine learning ppt (20)

TXT
R console
PDF
Practical Data Science : Data Cleaning and Summarising
PPTX
Descriptive Statistics in R.pptx
PDF
Introduction to tibbles
PDF
Data Visualization With R: Introduction
PDF
Data Visualization With R: Learn To Modify Color Of Plots
PDF
Linear Model Selection and Regularization (Article 6 - Practical exercises)
PPTX
Python chart plotting using Matplotlib.pptx
PDF
MH prediction modeling and validation in r (1) regression 190709
PDF
Manipulating Data using base R package
PPTX
Using R for Building a Simple and Effective Dashboard
PDF
Regression and Classification with R
PDF
Q plot tutorial
PPTX
Data manipulation and visualization in r 20190711 myanmarucsy
PDF
Data manipulation with dplyr
PPTX
Data Science.pptx00000000000000000000000
PPTX
Income Qualification ppt.pptx
PPTX
R programming language
PPTX
Introduction to R
PDF
Kaggle Winning Solution Xgboost algorithm -- Let us learn from its author
R console
Practical Data Science : Data Cleaning and Summarising
Descriptive Statistics in R.pptx
Introduction to tibbles
Data Visualization With R: Introduction
Data Visualization With R: Learn To Modify Color Of Plots
Linear Model Selection and Regularization (Article 6 - Practical exercises)
Python chart plotting using Matplotlib.pptx
MH prediction modeling and validation in r (1) regression 190709
Manipulating Data using base R package
Using R for Building a Simple and Effective Dashboard
Regression and Classification with R
Q plot tutorial
Data manipulation and visualization in r 20190711 myanmarucsy
Data manipulation with dplyr
Data Science.pptx00000000000000000000000
Income Qualification ppt.pptx
R programming language
Introduction to R
Kaggle Winning Solution Xgboost algorithm -- Let us learn from its author
Ad

Recently uploaded (20)

PDF
Technical Architecture - Chainsys dataZap
PDF
TyAnn Osborn: A Visionary Leader Shaping Corporate Workforce Dynamics
PDF
Tata consultancy services case study shri Sharda college, basrur
PDF
How to Get Business Funding for Small Business Fast
PDF
How to Get Approval for Business Funding
PPTX
2025 Product Deck V1.0.pptxCATALOGTCLCIA
PDF
Blood Collected straight from the donor into a blood bag and mixed with an an...
PDF
Ôn tập tiếng anh trong kinh doanh nâng cao
PDF
How to Get Funding for Your Trucking Business
PDF
Introduction to Generative Engine Optimization (GEO)
PDF
NEW - FEES STRUCTURES (01-july-2024).pdf
PPTX
svnfcksanfskjcsnvvjknsnvsdscnsncxasxa saccacxsax
PPTX
Board-Reporting-Package-by-Umbrex-5-23-23.pptx
PDF
SIMNET Inc – 2023’s Most Trusted IT Services & Solution Provider
PDF
BsN 7th Sem Course GridNNNNNNNN CCN.pdf
PDF
NISM Series V-A MFD Workbook v December 2024.khhhjtgvwevoypdnew one must use ...
PDF
Daniels 2024 Inclusive, Sustainable Development
PDF
Nante Industrial Plug Factory: Engineering Quality for Modern Power Applications
PDF
Booking.com The Global AI Sentiment Report 2025
PDF
Charisse Litchman: A Maverick Making Neurological Care More Accessible
Technical Architecture - Chainsys dataZap
TyAnn Osborn: A Visionary Leader Shaping Corporate Workforce Dynamics
Tata consultancy services case study shri Sharda college, basrur
How to Get Business Funding for Small Business Fast
How to Get Approval for Business Funding
2025 Product Deck V1.0.pptxCATALOGTCLCIA
Blood Collected straight from the donor into a blood bag and mixed with an an...
Ôn tập tiếng anh trong kinh doanh nâng cao
How to Get Funding for Your Trucking Business
Introduction to Generative Engine Optimization (GEO)
NEW - FEES STRUCTURES (01-july-2024).pdf
svnfcksanfskjcsnvvjknsnvsdscnsncxasxa saccacxsax
Board-Reporting-Package-by-Umbrex-5-23-23.pptx
SIMNET Inc – 2023’s Most Trusted IT Services & Solution Provider
BsN 7th Sem Course GridNNNNNNNN CCN.pdf
NISM Series V-A MFD Workbook v December 2024.khhhjtgvwevoypdnew one must use ...
Daniels 2024 Inclusive, Sustainable Development
Nante Industrial Plug Factory: Engineering Quality for Modern Power Applications
Booking.com The Global AI Sentiment Report 2025
Charisse Litchman: A Maverick Making Neurological Care More Accessible
Ad

Artificial inteliggence and machine learning ppt

  • 1. AI & MACHINE LEARNING
  • 2. MODULE 2 : EXPLORATORY DATA ANALYSIS Basic R oblects •Variables •Vector •Array •Matrics •Data Frame
  • 3. BASIC FUNCTIONS # To pull mtcars data in this session data() data("mtcars") #To view the variables / data dictionary mtcars # print mydata #To view the variables / data dictionary str(mtcars) #To view the variables / data dictionary dim(mtcars) # dimensions of an object names(mtcars) class(mtcars) # class of an object (numeric, matrix, data frame, etc)
  • 4. head(mtcars) #To view top records of mtcars # print first 10 rows of mydata head(mtcars, n=10) tail(mtcars) #to view bottom records of mtcars ##Variable Identification - Inferences nrow(mtcars) ncol(mtcars) mean(mtcars[,2]) median(mtcars[,9]) range(mtcars$mpg) mean(mtcars$mpg)
  • 5. 1. UNIVARIATE ANALYSIS #Interquartile range (75 percentile - 25 percentile) IQR(mtcars$mpg) ## Univariate Analysis (Pattern Recognition) summary(mtcars) # Complete summary of the dataset fivenum(mtcars) fivenum(mtcars$mpg) # edit the the data for Outliers mtcars=edit(mtcars) mtcars
  • 6. HISTOGRAM #Histogram hist(mtcars$mpg) hist(mtcars$mpg,col = "Red") # Colored Histogram with Different Number of Bins hist(mtcars$mpg, breaks=12, col="red") # Add labels to the graph x <- mtcars$mpg h<-hist(x, breaks=10, col="red", xlab="Miles Per Gallon", main="Histogram")
  • 7. ##(Histograms can be a poor method for determining the shape of a distribution because it is so strongly affected by the number of bins used.) ##Kernal density plots are usually a much more effective way to view the distribution of a variable. # Kernel Density Plot d <- density(mtcars$mpg) # returns the density data plot(d) # plots the results # Filled Density Plot d <- density(mtcars$mpg) plot(d, main="Kernel Density of Miles Per Gallon") polygon(d, col="red", border="blue")
  • 8. BAR PLOT # Simple Bar Plot counts <- table(mtcars$gear) counts barplot(counts, main="Car Distribution", xlab="Number of Gears") barplot(counts,col="Yellow", main="Car Distribution", xlab="Number of Gears") # Simple Horizontal Bar Plot with Added Labels counts <- table(mtcars$gear) barplot(counts, main="Car Distribution", horiz=TRUE) barplot(counts, main="Car Distribution", horiz=TRUE, names.arg=c("3 Gears", "4 Gears", "5 Gears"))
  • 9. BOX PLOT #Boxplot boxplot(mtcars$mpg) boxplot(mtcars$mpg,horizontal = TRUE) boxplot(mtcars$mpg,horizontal = TRUE,col = "Pink") boxplot(mtcars$mpg,horizontal = TRUE,col = "Green",main="Mileage")
  • 10. 2. BIVARIATE ANALYSIS o Continous Numerical Variables vs Continous Numerical Variables o Continous Numerical Variables vs Discrete Numerical Variables o Discrete Numerical Variables vs Categorical Variables
  • 11. STACKED BAR PLOT # Stacked Bar Plot with Colors and Legend counts <- table(mtcars$vs, mtcars$gear) barplot(counts, main="Car Distribution by Gears and VS", xlab="Number of Gears", col=c("darkblue","red"), legend = rownames(counts)) counts <- table(mtcars$cyl, mtcars$gear) barplot(counts, main="Car Distribution by Gears and Cylinders", xlab="Number of Gears", col=c("darkblue","red","Yellow"), legend = rownames(counts))
  • 12. GROUPED BAR PLOT # Grouped Bar Plot counts <- table(mtcars$vs, mtcars$gear) barplot(counts, main="Car Distribution by Gears and VS", xlab="Number of Gears", col=c("darkblue","red"), legend = rownames(counts), beside=TRUE) counts <- table(mtcars$cyl, mtcars$gear) barplot(counts, main="Car Distribution by Gears and Cylinders", xlab="Number of Gears", col=c("darkblue","red","Yellow"), legend = rownames(counts),beside = TRUE)
  • 14. 3. MULTIVARIATE ANALYSIS ## To check the correlation among the variables pairs(~ mpg + disp + hp + wt + drat + qsec , data = mtcars) #graphical representation cor(mtcars[,c(1,2,3,4,5,6,7,8,9,10,11)])