SlideShare a Scribd company logo
MACHINE LEARNING WITH
GO
Golang Bristol - April 2020
James Bowman
@jamesebowman
@jamesebowman
MAGIC !!
@jamesebowman
MAGIC !!
MATHS
& ALGORITHMS
Artificial Intelligence
Any technique which enables commuters to mimic
human behaviour.
Machine Learning
Subset of AI techniques which use statistical methods
to enable machines to ‘learn’ how to carry out tasks
without being explicitly programmed how to do them.
Deep Learning
Subset of ML techniques using multi-layered neural
networks (algorithms inspired by the structure and
function of the human brain). Typically suited to self-
learning and feature extraction.
Artificial Intelligence
Machine Learning
f(x)
Deep Learning
@jamesebowman
@jamesebowman
@jamesebowman
SO WHY GO?!?
• Relatively expressive and productive
• Strong typing (more explicit)
• Performant and Scalable
@jamesebowman
Supervised
Learning
Unsupervised
Learning
Reinforcement
Learning
Classification
Regression
Clustering
Dimensionality
Reduction
• House Price Prediction
• Demand Forecasting
• Image Recognition
• Ad Click Prediction
• Medical Diagnosis
• Spam Filtering
• Customer Segmentation
• Data Mining
• Recommendations
• Visualisation
• Feature Extraction
• Compression
• Skill Acquisition
• Control Systems
• Game AI
• Real-time Decisions
Machine Learning
@jamesebowman
BASIC ML WORKFLOW
Train Model
Historical
Data
Live Data
Training
Data
Test Data Evaluate
Model
Deploy/Use
Model
Performance
Metrics
Predictions
@jamesebowman
THE DIABETES DATASET
• Prima are a group of native Americans living in Arizona
• Highest rate of obesity and diabetes recorded
• Study conducted by National Institute of Diabetes and
Digestive and Kidney Diseases collected diagnosis data on
female patients with the aim of predicting diabetes.
# Pregnancies Glucose
Blood
Pressure
SkinThickness Insulin BMI
Diabetes
Pedigree
Function
Age Outcome (Class
Label)
6 148 72 35 0 33.6 0.627 50 1
1 85 66 29 0 26.6 0.351 31 0
https://www.kaggle.com/uciml/pima-indians-diabetes-database
@jamesebowman
INTUITIVELY
Patients with similar attributes tend to share the
same diagnosis
@jamesebowman
K-NEAREST
NEIGHBOURS
CLASSIFIER
Predicts class (Y) as the
average (mode) of the classes
for the K most similar
(nearest) observations from
the training data
K=3
0
1
1
Y = Mode of the K nearest observations
{0, 1, 1} = 1
0
1
0
0
0
0
0
1
Y
@jamesebowman
FEATUREVECTORS
• Observations (records) can be
represented as n-dimensional
numerical feature vectors
• Feature vectors can be thought
of as points in Euclidean space
P(x, y)
y
x
P(x, y, z)
y
x
z
[
p1
p2]
p1
p2
p3
p1
p2
p3
.
.
.
pn
n=2 (2D)
n=3 (3D)
=
=
@jamesebowman
NEAREST NEIGHBOURS
• ‘Nearest’ = shortest distance
• Where distance uses a formal distance
metric
• In n dimensional Euclidean space, distance
between points p and q is given by
Pythagoras formula:
d(p, q) =
n
∑
i=1
(pi − qi)2
= (p1 − q1)2
+ (p2 − q2)2
+ . . . + (pn − qn)2
p
q
d(p, q)
p1 - q1
p2-q2
LETS GO IMPLEMENT IT
(pun intended)
@jamesebowman
LETS BUILD A MODEL
type Predictor interface {
Fit(X *mat.Dense, Y []string)
Predict(X *mat.Dense) []string
}
1. Fit ‘trains’ the model using training data
2. Predict infers the class for the test or live
production data
@jamesebowman
EVALUATE WITH A SIMPLE
HARNESS
1. Load the dataset from the CSV
file
2. Split the data into training and test
sets
3. Train the model with the training
data
4. Predict classes for the test data
5. Compare predictions with test
data labels to find model accuracy
func Evaluate(dsPath string, model Predictor) (float64,
error) {
records, err := loadFile(dsPath)
if err != nil {
return 0, err
}
trainData, trainLabels, testData, testLabels :=
split(true, records, 0.7)
model.Fit(trainData, trainLabels)
predictions := model.Predict(testData)
return evaluate(predictions, testLabels), nil
}
@jamesebowman
1. LOADTHE DATASET FROM
THE CSV FILE
func loadFile(path string) ([][]string, error) {
var records [][]string
file, err := os.Open(path)
if err != nil {
return records, err
}
reader := csv.NewReader(file)
return reader.ReadAll()
}
@jamesebowman
2. SPLITTHE DATA INTO
TRAINING ANDTEST SETS
func split(header bool, records [][]string, trainProportion float64) (mat.Matrix, []string, mat.Matrix,
[]string) {
if header {
records = records[1:]
}
datasetLength := len(records)
indx := make([]int, int(float64(datasetLength)*trainProportion))
r := rnd.New(rnd.NewSource(uint64(47)))
sampleuv.WithoutReplacement(indx, datasetLength, r)
sort.Ints(indx)
trainData := mat.NewDense(len(indx), len(records[0]), nil)
trainLabels := make([]string, len(indx))
testData := mat.NewDense(len(records)-len(indx), len(records[0]), nil)
testLabels := make([]string, len(records)-len(indx))
var trainind, testind int
for i, v := range records {
if trainind < len(indx) && i == indx[trainind] {
// training set
readRecord(trainLabels, trainData, trainind, v)
} else {
// test set
readRecord(testLabels, testData, testind, v)
}
}
return trainData, trainLabels, testData, testLabels
}
@jamesebowman
2. SPLITTHE DATA INTO
TRAINING ANDTEST SETS
func readRecord(labels []string, data *mat.Dense, recordNum int, record []string) {
labels[recordNum] = record[len(record)-1]
for i, v := range record[:len(record)-1] {
s, err := strconv.ParseFloat(v, 64)
if err != nil {
// replace invalid numbers with 0
s = 0
}
data.Set(recordNum, i, s)
}
}
@jamesebowman
3.TRAINTHE MODEL WITH
THETRAINING DATA
type KNNClassifier struct {
K int
Distance func(a, b mat.Vector) float64
datapoints *mat.Dense
classes []string
}
func (k *KNNClassifier) Fit(X *mat.Dense, Y []string) {
k.datapoints = X
k.classes = Y
}
@jamesebowman
4. PREDICT CLASSES FORTHE
TEST DATA
func (k *KNNClassifier) Predict(X *mat.Dense) []string {
r, _ := X.Dims()
targets := make([]string, r)
distances := make([]float64, len(k.classes))
inds := make([]int, len(k.classes))
for i := 0; i < r; i++ {
votes := make(map[string]float64)
for j := 0; j < len(k.classes); j++ {
distances[j] = k.Distance(
k.datapoints.RowView(j),
X.RowView(i),
)
}
floats.Argsort(distances, inds)
for n := 0; n < k.K; n++ {
votes[k.classes[inds[n]]]++
}
var winningCount float64
for k, v := range votes {
if v > winningCount {
targets[i] = k
winningCount = v
}
}
}
return targets
}
1. For each observation to predict for
(row in the matrix):
2. Calculate the distance to every
training observation
3. Sort the distances
4. Count the frequency of each class
corresponding to the top k closest
5. Determine the highest frequency class
@jamesebowman
4. PREDICT CLASSES FORTHE
TEST DATA
func EuclideanDistance(a, b mat.Vector) float64 {
var v mat.VecDense
v.SubVec(a, b)
return math.Sqrt(mat.Dot(&v, &v))
}
= (p1 − q1)2
+ (p2 − q2)2
+ . . . + (pn − qn)2
@jamesebowman
5. COMPARE PREDICTIONS WITHTEST
DATA LABELSTO FIND MODEL
ACCURACY
func evaluate(predictions, labels []string) float64 {
var correct float64
for i, v := range labels {
if predictions[i] == v {
correct++
}
}
return correct / float64(len(labels))
}
@jamesebowman
PERFORMANCE
0.69
QUESTIONS
?
@jamesebowman

More Related Content

PDF
Fuzzy c means_realestate_application
PPT
Uninformed search
PPTX
Fuzzy c means manual work
PDF
maXbox starter68 machine learning VI
PPT
FUAT – A Fuzzy Clustering Analysis Tool
PPTX
Fuzzy image processing- fuzzy C-mean clustering
PDF
Getting Started with Machine Learning
PDF
Machine Learning Basics
Fuzzy c means_realestate_application
Uninformed search
Fuzzy c means manual work
maXbox starter68 machine learning VI
FUAT – A Fuzzy Clustering Analysis Tool
Fuzzy image processing- fuzzy C-mean clustering
Getting Started with Machine Learning
Machine Learning Basics

What's hot (20)

PPTX
Sharbani bhattacharya sacta 2014
PDF
International Journal of Computational Engineering Research(IJCER)
PDF
Lecture 5 Relationship between pixel-2
PDF
Generative adversarial networks
PDF
3 handouts section3-7
PDF
Auto encoding-variational-bayes
ODP
Introduction to RBM for written digits recognition
PPT
Fuzzy c means clustering protocol for wireless sensor networks
PPTX
Rabbit challenge 3 DNN Day2
PDF
Auto-encoding variational bayes
PPTX
2021 01-04-learning filter-basis
PPTX
CP 2011 Poster
PDF
The Power of Ensembles in Machine Learning
PPT
ECCV2010: feature learning for image classification, part 4
PDF
Gradient boosting in practice: a deep dive into xgboost
PDF
Clustering tutorial
PDF
The Perceptron (D1L2 Deep Learning for Speech and Language)
PDF
Graph convolutional networks in apache spark
PDF
Kernels in convolution
PDF
方策勾配型強化学習の基礎と応用
Sharbani bhattacharya sacta 2014
International Journal of Computational Engineering Research(IJCER)
Lecture 5 Relationship between pixel-2
Generative adversarial networks
3 handouts section3-7
Auto encoding-variational-bayes
Introduction to RBM for written digits recognition
Fuzzy c means clustering protocol for wireless sensor networks
Rabbit challenge 3 DNN Day2
Auto-encoding variational bayes
2021 01-04-learning filter-basis
CP 2011 Poster
The Power of Ensembles in Machine Learning
ECCV2010: feature learning for image classification, part 4
Gradient boosting in practice: a deep dive into xgboost
Clustering tutorial
The Perceptron (D1L2 Deep Learning for Speech and Language)
Graph convolutional networks in apache spark
Kernels in convolution
方策勾配型強化学習の基礎と応用
Ad

Similar to Machine Learning with Go (20)

PPTX
KNN CLASSIFIER, INTRODUCTION TO K-NEAREST NEIGHBOR ALGORITHM.pptx
PPTX
Deep learning from mashine learning AI..
PPT
[ppt]
PPT
[ppt]
PDF
Machine Learning Algorithms Introduction.pdf
PPTX
K-Nearest Neighbor Classifier
PPTX
Instance Based Learning in machine learning
PPTX
Lecture 09(introduction to machine learning)
PPTX
Statistical Machine Learning unit3 lecture notes
PPT
594503964-Introduction-to-Classification-PPT-Slides-1.ppt
PDF
Introduction to k-Nearest Neighbors and Amazon SageMaker
PDF
Lecture7 - IBk
PPTX
3a-knn.pptxhggmtdu0lphm0kultkkkkkkkkkkkk
PDF
Lecture 6 - Classification Classification
PDF
Introduction to Big Data Science
PDF
Machine Learning Comparative Analysis - Part 1
PPTX
MachineLearning.pptx
PPT
Computational Biology, Part 4 Protein Coding Regions
PDF
IRJET- Performance Evaluation of Various Classification Algorithms
PDF
IRJET- Performance Evaluation of Various Classification Algorithms
KNN CLASSIFIER, INTRODUCTION TO K-NEAREST NEIGHBOR ALGORITHM.pptx
Deep learning from mashine learning AI..
[ppt]
[ppt]
Machine Learning Algorithms Introduction.pdf
K-Nearest Neighbor Classifier
Instance Based Learning in machine learning
Lecture 09(introduction to machine learning)
Statistical Machine Learning unit3 lecture notes
594503964-Introduction-to-Classification-PPT-Slides-1.ppt
Introduction to k-Nearest Neighbors and Amazon SageMaker
Lecture7 - IBk
3a-knn.pptxhggmtdu0lphm0kultkkkkkkkkkkkk
Lecture 6 - Classification Classification
Introduction to Big Data Science
Machine Learning Comparative Analysis - Part 1
MachineLearning.pptx
Computational Biology, Part 4 Protein Coding Regions
IRJET- Performance Evaluation of Various Classification Algorithms
IRJET- Performance Evaluation of Various Classification Algorithms
Ad

Recently uploaded (20)

PDF
Web App vs Mobile App What Should You Build First.pdf
PDF
TrustArc Webinar - Click, Consent, Trust: Winning the Privacy Game
PDF
DP Operators-handbook-extract for the Mautical Institute
PPTX
O2C Customer Invoices to Receipt V15A.pptx
PPT
What is a Computer? Input Devices /output devices
PDF
2021 HotChips TSMC Packaging Technologies for Chiplets and 3D_0819 publish_pu...
PPTX
MicrosoftCybserSecurityReferenceArchitecture-April-2025.pptx
PPTX
Group 1 Presentation -Planning and Decision Making .pptx
PPTX
1. Introduction to Computer Programming.pptx
PDF
WOOl fibre morphology and structure.pdf for textiles
PPTX
Chapter 5: Probability Theory and Statistics
PPTX
OMC Textile Division Presentation 2021.pptx
PDF
STKI Israel Market Study 2025 version august
PDF
1 - Historical Antecedents, Social Consideration.pdf
PDF
Video forgery: An extensive analysis of inter-and intra-frame manipulation al...
PDF
project resource management chapter-09.pdf
PPT
Module 1.ppt Iot fundamentals and Architecture
PPTX
Modernising the Digital Integration Hub
PDF
Microsoft Solutions Partner Drive Digital Transformation with D365.pdf
PDF
Profit Center Accounting in SAP S/4HANA, S4F28 Col11
Web App vs Mobile App What Should You Build First.pdf
TrustArc Webinar - Click, Consent, Trust: Winning the Privacy Game
DP Operators-handbook-extract for the Mautical Institute
O2C Customer Invoices to Receipt V15A.pptx
What is a Computer? Input Devices /output devices
2021 HotChips TSMC Packaging Technologies for Chiplets and 3D_0819 publish_pu...
MicrosoftCybserSecurityReferenceArchitecture-April-2025.pptx
Group 1 Presentation -Planning and Decision Making .pptx
1. Introduction to Computer Programming.pptx
WOOl fibre morphology and structure.pdf for textiles
Chapter 5: Probability Theory and Statistics
OMC Textile Division Presentation 2021.pptx
STKI Israel Market Study 2025 version august
1 - Historical Antecedents, Social Consideration.pdf
Video forgery: An extensive analysis of inter-and intra-frame manipulation al...
project resource management chapter-09.pdf
Module 1.ppt Iot fundamentals and Architecture
Modernising the Digital Integration Hub
Microsoft Solutions Partner Drive Digital Transformation with D365.pdf
Profit Center Accounting in SAP S/4HANA, S4F28 Col11

Machine Learning with Go

  • 1. MACHINE LEARNING WITH GO Golang Bristol - April 2020 James Bowman @jamesebowman
  • 4. Artificial Intelligence Any technique which enables commuters to mimic human behaviour. Machine Learning Subset of AI techniques which use statistical methods to enable machines to ‘learn’ how to carry out tasks without being explicitly programmed how to do them. Deep Learning Subset of ML techniques using multi-layered neural networks (algorithms inspired by the structure and function of the human brain). Typically suited to self- learning and feature extraction. Artificial Intelligence Machine Learning f(x) Deep Learning @jamesebowman
  • 6. @jamesebowman SO WHY GO?!? • Relatively expressive and productive • Strong typing (more explicit) • Performant and Scalable
  • 7. @jamesebowman Supervised Learning Unsupervised Learning Reinforcement Learning Classification Regression Clustering Dimensionality Reduction • House Price Prediction • Demand Forecasting • Image Recognition • Ad Click Prediction • Medical Diagnosis • Spam Filtering • Customer Segmentation • Data Mining • Recommendations • Visualisation • Feature Extraction • Compression • Skill Acquisition • Control Systems • Game AI • Real-time Decisions Machine Learning
  • 8. @jamesebowman BASIC ML WORKFLOW Train Model Historical Data Live Data Training Data Test Data Evaluate Model Deploy/Use Model Performance Metrics Predictions
  • 9. @jamesebowman THE DIABETES DATASET • Prima are a group of native Americans living in Arizona • Highest rate of obesity and diabetes recorded • Study conducted by National Institute of Diabetes and Digestive and Kidney Diseases collected diagnosis data on female patients with the aim of predicting diabetes. # Pregnancies Glucose Blood Pressure SkinThickness Insulin BMI Diabetes Pedigree Function Age Outcome (Class Label) 6 148 72 35 0 33.6 0.627 50 1 1 85 66 29 0 26.6 0.351 31 0 https://www.kaggle.com/uciml/pima-indians-diabetes-database
  • 10. @jamesebowman INTUITIVELY Patients with similar attributes tend to share the same diagnosis
  • 11. @jamesebowman K-NEAREST NEIGHBOURS CLASSIFIER Predicts class (Y) as the average (mode) of the classes for the K most similar (nearest) observations from the training data K=3 0 1 1 Y = Mode of the K nearest observations {0, 1, 1} = 1 0 1 0 0 0 0 0 1 Y
  • 12. @jamesebowman FEATUREVECTORS • Observations (records) can be represented as n-dimensional numerical feature vectors • Feature vectors can be thought of as points in Euclidean space P(x, y) y x P(x, y, z) y x z [ p1 p2] p1 p2 p3 p1 p2 p3 . . . pn n=2 (2D) n=3 (3D) = =
  • 13. @jamesebowman NEAREST NEIGHBOURS • ‘Nearest’ = shortest distance • Where distance uses a formal distance metric • In n dimensional Euclidean space, distance between points p and q is given by Pythagoras formula: d(p, q) = n ∑ i=1 (pi − qi)2 = (p1 − q1)2 + (p2 − q2)2 + . . . + (pn − qn)2 p q d(p, q) p1 - q1 p2-q2
  • 14. LETS GO IMPLEMENT IT (pun intended)
  • 15. @jamesebowman LETS BUILD A MODEL type Predictor interface { Fit(X *mat.Dense, Y []string) Predict(X *mat.Dense) []string } 1. Fit ‘trains’ the model using training data 2. Predict infers the class for the test or live production data
  • 16. @jamesebowman EVALUATE WITH A SIMPLE HARNESS 1. Load the dataset from the CSV file 2. Split the data into training and test sets 3. Train the model with the training data 4. Predict classes for the test data 5. Compare predictions with test data labels to find model accuracy func Evaluate(dsPath string, model Predictor) (float64, error) { records, err := loadFile(dsPath) if err != nil { return 0, err } trainData, trainLabels, testData, testLabels := split(true, records, 0.7) model.Fit(trainData, trainLabels) predictions := model.Predict(testData) return evaluate(predictions, testLabels), nil }
  • 17. @jamesebowman 1. LOADTHE DATASET FROM THE CSV FILE func loadFile(path string) ([][]string, error) { var records [][]string file, err := os.Open(path) if err != nil { return records, err } reader := csv.NewReader(file) return reader.ReadAll() }
  • 18. @jamesebowman 2. SPLITTHE DATA INTO TRAINING ANDTEST SETS func split(header bool, records [][]string, trainProportion float64) (mat.Matrix, []string, mat.Matrix, []string) { if header { records = records[1:] } datasetLength := len(records) indx := make([]int, int(float64(datasetLength)*trainProportion)) r := rnd.New(rnd.NewSource(uint64(47))) sampleuv.WithoutReplacement(indx, datasetLength, r) sort.Ints(indx) trainData := mat.NewDense(len(indx), len(records[0]), nil) trainLabels := make([]string, len(indx)) testData := mat.NewDense(len(records)-len(indx), len(records[0]), nil) testLabels := make([]string, len(records)-len(indx)) var trainind, testind int for i, v := range records { if trainind < len(indx) && i == indx[trainind] { // training set readRecord(trainLabels, trainData, trainind, v) } else { // test set readRecord(testLabels, testData, testind, v) } } return trainData, trainLabels, testData, testLabels }
  • 19. @jamesebowman 2. SPLITTHE DATA INTO TRAINING ANDTEST SETS func readRecord(labels []string, data *mat.Dense, recordNum int, record []string) { labels[recordNum] = record[len(record)-1] for i, v := range record[:len(record)-1] { s, err := strconv.ParseFloat(v, 64) if err != nil { // replace invalid numbers with 0 s = 0 } data.Set(recordNum, i, s) } }
  • 20. @jamesebowman 3.TRAINTHE MODEL WITH THETRAINING DATA type KNNClassifier struct { K int Distance func(a, b mat.Vector) float64 datapoints *mat.Dense classes []string } func (k *KNNClassifier) Fit(X *mat.Dense, Y []string) { k.datapoints = X k.classes = Y }
  • 21. @jamesebowman 4. PREDICT CLASSES FORTHE TEST DATA func (k *KNNClassifier) Predict(X *mat.Dense) []string { r, _ := X.Dims() targets := make([]string, r) distances := make([]float64, len(k.classes)) inds := make([]int, len(k.classes)) for i := 0; i < r; i++ { votes := make(map[string]float64) for j := 0; j < len(k.classes); j++ { distances[j] = k.Distance( k.datapoints.RowView(j), X.RowView(i), ) } floats.Argsort(distances, inds) for n := 0; n < k.K; n++ { votes[k.classes[inds[n]]]++ } var winningCount float64 for k, v := range votes { if v > winningCount { targets[i] = k winningCount = v } } } return targets } 1. For each observation to predict for (row in the matrix): 2. Calculate the distance to every training observation 3. Sort the distances 4. Count the frequency of each class corresponding to the top k closest 5. Determine the highest frequency class
  • 22. @jamesebowman 4. PREDICT CLASSES FORTHE TEST DATA func EuclideanDistance(a, b mat.Vector) float64 { var v mat.VecDense v.SubVec(a, b) return math.Sqrt(mat.Dot(&v, &v)) } = (p1 − q1)2 + (p2 − q2)2 + . . . + (pn − qn)2
  • 23. @jamesebowman 5. COMPARE PREDICTIONS WITHTEST DATA LABELSTO FIND MODEL ACCURACY func evaluate(predictions, labels []string) float64 { var correct float64 for i, v := range labels { if predictions[i] == v { correct++ } } return correct / float64(len(labels)) }