Alexander Ioffe

@deusaquilus
Quill + =
Better Together
Quill + Spark = Better Together
Quill + Spark = Better Together
Quill + Spark = Better Together
So What’s The Difference?
• Abstraction

• Encapsulation

• Error Handling

• Good Control Flow

• Performance
Application Development
Languages
Data Retrieval 

Languages
• Natural Expression

• Possible Optimization

• Good Control Flow

• Performance
They Make Different Tradeoffs!
AbstractionPower
Possible Optimizations
Data Retrieval

Languages
Application Development
Languages
Example Please???
CREATE VIEW HelloAmerican AS
SELECT 'Hello ' || t.firstName + ' ' || t.lastName + ' of ' || a.city
FROM Americans t
JOIN Addresses a on t.address_id == a.id
-- Hello John James of New York
CREATE VIEW HelloCanadian AS
SELECT 'Hello ' + t.name + ' ' + t.surname + ' of ' + a.city
FROM Canadians t
JOIN Addresses a on t.residence_id == a.id
-- Hello Jim Jones of Toronto
CREATE VIEW HelloYeti AS
SELECT 'Hello ' + t.gruntingSound + ' ' + t.roaringSound + ' of ' + a.city
FROM AbominableShowmen t
JOIN Addresses a on t.cave_id == a.id
-- Hello Aaargalah Gralala of Kholat Syakhl
CREATE FUNCTION AddressToSomeone (
@humanoidLivingSomewhere Table(called VARCHAR; alsoCalled VARCHAR; whereHeLives_id)
)
SELECT 'Hello ' || t.called || ' ' || t.alsoCalled || ' of ' || a.city
FROM @humanoidLivingSomewhere t
JOIN Addresses a on t.whereHeLives_id == a.id
CREATE VIEW AmericanClients
AS SELECT * from AddressToSomeone(
SELECT t.firstName as called, t.lastName as alsoCalled, a.address_id as whereHeLives_id
FROM Americans
)
CREATE VIEW CanadianClients
AS SELECT * from AddressToSomeone(
SELECT t.name as called, t.surname as alsoCalled, a.residence_id as whereHeLives_id
FROM Canadians
)
CREATE VIEW YetiClients
AS SELECT * from AddressToSomeone(
SELECT t.gruntingSound as called, t.roaringSound as alsoCalled,
a.cave_id as whereHeLives_id
FROM AbominableShowmen
)
CREATE FUNCTION concatName (
@called VARCHAR; @alsoCalled VARCHAR; @whereHeLives_id)
)
AS 'Hello ' || t.called || ' ' || t.alsoCalled || ' of ' || a.city
SELECT concatName(t.firstName, t.lastName, a.city)
FROM American t
JOIN Addresses a on t.whereHeLives_id == a.id
SELECT concatName(t.name, t.surname, a.city)
FROM American t
JOIN Addresses a on t.whereHeLives_id == a.id
SELECT concatName(t.gruntingSound, t.roaringSound, a.city)
FROM American t
JOIN Addresses a on t.whereHeLives_id == a.id
CREATE FUNCTION concatName (
@called VARCHAR; @alsoCalled VARCHAR; @whereHeLives_id)
)
AS 'Hello ' || t.called || ' ' || t.alsoCalled || ' of ' || a.city
CREATE FUNCTION AddressToSomeone (
@humanoidLivingSomewhere Table(called VARCHAR; alsoCalled VARCHAR;
whereHeLives_id)
)
SELECT
'Hello ' ||t .called || ' ' || t.alsoCalled || ' of ' || a.city,
CASE
WHEN zd.zone_type = 'K' THEN 'StandardCategory'
WHEN zd.zone_type = 'N' AND rc.barbaz = 'GT' THEN 'NonStandardCategory'
ELSE 'UnknownCategory'
END as zoning_category1,
CASE
WHEN ru.kdd = 'IK' THEN 'Insanity'
WHEN zd.kdd = 'N' AND rc.barbaz = 'GTT' THEN 'MoreInsanity'
ELSE 'I_Dont_Even_Know_What_Goes_Here'
END as zoning_category2
FROM @humanoidLivingSomewhere t
JOIN Addresses a on t.whereHeLives_id = a.id
JOIN ResidenceUnit ru on a.rid = ru.id
JOIN ResidenceClass rc on ru.class_id = rc.class_id
JOIN ZoningDesignation zd on ru.zone_id = zd.rzid and zd.cid = rc.class_id
SELECT concatName(t.firstName, t.lastName, a.city)
FROM American t
JOIN Addresses a on t.whereHeLives_id == a.id
SELECT concatName(t.name, t.surname, a.city)
FROM American t
JOIN Addresses a on t.whereHeLives_id == a.id
SELECT concatName(t.gruntingSound, t.roaringSound, a.city)
FROM American t
JOIN Addresses a on t.whereHeLives_id == a.id
CREATE FUNCTION AddressToSomeone (
@humanoidLivingSomewhere Table(called VARCHAR; alsoCalled VARCHAR; whereHeLives_id)
)
SELECT 'Hello ' || t.called || ' ' || t.alsoCalled || ' of ' || a.city
FROM @humanoidLivingSomewhere t
JOIN Addresses a on t.whereHeLives_id == a.id
CREATE FUNCTION AddressToSomeone (
@humanoidLivingSomewhere Table(called VARCHAR; alsoCalled VARCHAR; whereHeLives_id)
)
SELECT 'Hello ' || t.called || ' ' || t.alsoCalled || ' of ' || a.city
FROM @humanoidLivingSomewhere t
JOIN Addresses a on t.whereHeLives_id == a.id
WHERE a.current = true
DataFrame Can!
def addressToSomeone(df: DataFrame) = {
df.as("t")
.join(addresses.as("a"), $"whereHeLivesId" === $"id")
.select(
concat(lit("Hello "), $"t.called", lit(" "), $"t.alsoCalled",
lit(" of "), $"a.city"))
.filter($"a.current" === lit(true))
}
DataFrame Can!
addressToSomeone(
americans.select($"firstName" as "called", $"lastName" as "alsoCalled",
$"address_id" as "whereHeLives_id")
)
addressToSomeone(
canadians.select($"name" as "called", $"surname" as "alsoCalled",
$"residence_id" as "whereHeLives_id")
)
addressToSomeone(
yeti.select($"gruntSound" as "called", $"roarSound" as "alsoCalled",
$"cave_id" as "whereHeLives_id")
)
def addressToSomeone(df: DataFrame) = {
df.as("t")
.join(addresses.as("a"), $"id" === $"whereHeLives_id")
.select(
concat(lit("Hello "), $"t.called", lit(" "), $"t.alsoCalled",
lit(" of "), $"a.city"))
.filter($"a.current" === lit(true))
}
DataFrame Can…
addressToSomeone(
americans.select($"firstName" as "called", $"lastName" as "alsoCalled",
$"address_id" as "whereHeLives_id")
)
addressToSomeone(
canadians.select($"name" as "called", $"surname" as "alsoCalled",
$"residence_id" as "whereHeLives_id")
)
addressToSomeone(
yeti.select($"gruntSound" as "called", $"roarSound" as "alsoCalled",
$"cave_id" as "whereHeLives_id")
)
def addressToSomeone(df: DataFrame) = {
df.as("t")
.join(addresses.as("a"), $"whereHeLives_id" === $"id")
.select(
concat(lit("Hello "), $"t.called", lit(" "), $"t.alsoCalled",
lit(" of "), $"a.city"))
.filter($"a.current" === lit(true))
}
… Hurt!
def insaneJoin(df: DataFrame) =
df.as("t")
.join(addresses.as("a"), $"t.whereHeLives_id" === $"a.id")
.join(residenceUnit.as("ru"), $"a.rid" === $"ru.id")
.join(residenceClass.as("rc"), $"ru.class_id" === $"rc.class_id")
.join(zoningDesignation.as("zd"),
($"ru.zone_id" === "zd.rid") &&
($"zd.cid" === $"rc.class_id")
)
.select(
concat(
lit("Hello "), $"t.called", lit(" "), $"t.alsoCalled",
lit(" of "), $"a.city"),
when($"zd.zone_type" === lit("K"), "StandardCategory")
.when(($"zd.zone_type" === lit("N")) && ($"rc.barbaz" === lit("GT")),
"NonStandardCategory")
.otherwise("UnknownCategory")
.as("zoning_category1"),
when($"ru.kdd" === lit("IK"), "Insanity")
.when(($"zd.kdd" === lit("N")) && ($"rc.barbaz" === lit("GTT")),
"MoreInsanity")
.otherwise("I_Dont_Even_Know_What_Goes_Here")
.as("zoning_category2")
)
… Hurt!
def insaneJoin(df: DataFrame) =
df.as("t")
.join(addresses.as("a"), $"t.whereHeLives_id" === $"a.id")
.join(residenceUnit.as("ru"), $"a.rid" === $"ru.id")
.join(residenceClass.as("rc"), $"ru.class_id" === $"rc.class_id")
.join(zoningDesignation.as("zd"),
($"ru.zone_id" === "zd.rzid") &&
($"zd.cid" === $"rc.class_id")
)
.select(
concat(
lit("Hello "), $"t.called", lit(" "), $"t.alsoCalled",
lit(" of "), $"a.city"),
when($"zd.zone_type" === lit("K"), "StandardCategory")
.when(($"zd.zone_type" === lit("N")) && ($"rc.barbaz" === lit("GT")),
"NonStandardCategory")
.otherwise("UnknownCategory")
.as("zoning_category1"),
when($"ru.kdd" === lit("IK"), "Insanity")
.when(($"zd.kdd" === lit("N")) && ($"rc.barbaz" === lit("GTT")),
"MoreInsanity")
.otherwise("I_Dont_Even_Know_What_Goes_Here")
.as("zoning_category2")
)
Quill + Spark = Better Together
case class HumanoidLivingSomewhere(
called:String, alsoCalled: String, whereHeLives_id:Int
)
Wait… Dataset?
def insaneJoin(humanoidsLivingSomewhere: Dataset[HumanoidLivingSomewhere]) =
humanoidsLivingSomewhere.as("t")
.joinWith(addresses.as("a"), $"whereHeLives_id" === $"id")
.joinWith(residenceUnit.as("ru"), $"a.rid" === $"ru.id")
.joinWith(residenceClass.as("rc"), $"ru.class_id" === $"rc.class_id")
.joinWith(zoningDesignation.as("zd"),
($"ru.zone_id" === "zd.rzid") &&
($"zd.cid" === $"ru.class_id")
)
.map { case ((((t, a), ru), rc), zd) => (
s"Hello ${t.called} ${t.alsoCalled} of ${a.city}",
if (zd.zone_type == "K") "StandardCategory"
else if (zd.zone_type == "N" && rc.barbaz == "GT")
"NonStandardCategory"
else
"UnknownCategory",
if (ru.kdd == "IK") "Insanity"
else if (zd.kdd == "N" && rc.barbaz == "GT")
"MoreInsanity"
else
"I_Dont_Even_Know_What_Goes_Here"
)
}
Wait… Dataset?
def insaneJoin(humanoidsLivingSomewhere: Dataset[HumanoidLivingSomewhere]) =
humanoidsLivingSomewhere.as("t")
.joinWith(addresses.as("a"), $"whereHeLives_id" === $"id")
.joinWith(residenceUnit.as("ru"), $"_2.rid" === $"ru.id")
.joinWith(residenceClass.as("rc"), $"_2.class_id" === $"rc.class_id")
.joinWith(zoningDesignation.as("zd"),
($"_1._2.zone_id" === "zd.rzid") &&
($"zd.cid" === $"_1._2.class_id")
)
.map { case ((((t, a), ru), rc), zd) => (
s"Hello ${t.called} ${t.alsoCalled} of ${a.city}",
if (zd.zone_type == "K") "StandardCategory"
else if (zd.zone_type == "N" && rc.barbaz == "GT")
"NonStandardCategory"
else
"UnknownCategory",
if (ru.kdd == "IK") "Insanity"
else if (zd.kdd == "N" && rc.barbaz == "GT")
"MoreInsanity"
else
"I_Dont_Even_Know_What_Goes_Here"
)
}
Wait… Dataset?
def insaneJoin(humanoidsLivingSomewhere: Dataset[HumanoidLivingSomewhere]) =
humanoidsLivingSomewhere.as("t")
.joinWith(addresses.as("a"), $"whereHeLives_id" === $"id")
.joinWith(residenceUnit.as("ru"), $"_2.rid" === $"ru.id")
.joinWith(residenceClass.as("rc"), $"_2.class_id" === $"rc.class_id")
.joinWith(zoningDesignation.as("zd"),
($"_1._2.zone_id" === "zd.rzid") &&
($"zd.cid" === $"_1._2.class_id")
)
.map { case ((((t, a), ru), rc), zd) => (
s"Hello ${t.called} ${t.alsoCalled} of ${a.city}",
if (zd.zone_type == "K") "StandardCategory"
else if (zd.zone_type == "N" && rc.barbaz == "GT")
"NonStandardCategory"
else
"UnknownCategory",
if (ru.kdd == "IK") "Insanity"
else if (zd.kdd == "N" && rc.barbaz == "GT")
"MoreInsanity"
else
"I_Dont_Even_Know_What_Goes_Here"
)
}
Dataset[(HumanoidLivingSomewhere, Address)]
Wait… Dataset?
def insaneJoin(humanoidsLivingSomewhere: Dataset[HumanoidLivingSomewhere]) =
humanoidsLivingSomewhere.as("t")
.joinWith(addresses.as("a"), $"whereHeLives_id" === $"id")
.joinWith(residenceUnit.as("ru"), $"_2.rid" === $"ru.id")
.joinWith(residenceClass.as("rc"), $"_2.class_id" === $"rc.class_id")
.joinWith(zoningDesignation.as("zd"),
($"_1._2.zone_id" === "zd.rzid") &&
($"zd.cid" === $"_1._2.class_id")
)
.map { case ((((t, a), ru), rc), zd) => (
s"Hello ${t.called} ${t.alsoCalled} of ${a.city}",
if (zd.zone_type == "K") "StandardCategory"
else if (zd.zone_type == "N" && rc.barbaz == "GT")
"NonStandardCategory"
else
"UnknownCategory",
if (ru.kdd == "IK") "Insanity"
else if (zd.kdd == "N" && rc.barbaz == "GT")
"MoreInsanity"
else
"I_Dont_Even_Know_What_Goes_Here"
)
}
Dataset[
(((HumanoidLivingSomewhere, Address), ResidenceUnit), ResidenceClass)
]
def insaneJoin(humanoidsLivingSomewhere: Dataset[HumanoidLivingSomewhere]) =
humanoidsLivingSomewhere.as("t")
.joinWith(addresses.as("a"), $"whereHeLives_id" === $"id")
.joinWith(residenceUnit.as("ru"), $"_2.rid" === $"ru.id")
.joinWith(residenceClass.as("rc"), $"_2.class_id" === $"rc.class_id")
.joinWith(zoningDesignation.as("zd"),
($"_1._2.zone_id" === "zd.rzid") &&
($"zd.cid" === $"_1._2.class_id")
)
.map { case ((((t, a), ru), rc), zd) => (
s"Hello ${t.called} ${t.alsoCalled} of ${a.city}",
if (zd.zone_type == "K") "StandardCategory"
else if (zd.zone_type == "N" && rc.barbaz == "GT")
"NonStandardCategory"
else
"UnknownCategory",
if (ru.kdd == "IK") "Insanity"
else if (zd.kdd == "N" && rc.barbaz == "GT")
"MoreInsanity"
else
"I_Dont_Even_Know_What_Goes_Here"
)
}
case class American(
firstName:String, lastName:String, address_id:Int,
irrelevantP1:String... irrelevantP100:String
)
case class Canadian(
name:String, surname:String, residence_id:Int,
irrelevantP1:String... irrelevantP100:String
)
case class Yeti(
gruntingSound:String, roaringSound:String, address_id:Int,
irrelevantP1:String... irrelevantP100:String
)
Say There's Stuff We Don't Care About
case class Address(
id:Int, street:String, city:String, current: Boolean
irrelevantA1:String... irrelevantA100:String
)
case class HumanoidLivingSomewhere(
called:String, alsoCalled: String, whereHeLives_id:Int
)
Let's Plug it In!
def addressToSomeone(humanoidLivingSomewhere: DataFrame) = {
humanoidLivingSomewhere.as("t")
.join(addresses.as("a"), $"whereHeLivesId" === $"id")
.select(
concat(lit("Hello "), $"t.called", lit(" "), $"t.alsoCalled", lit(" of "), $"a.city"))
.filter($"a.current" === lit(true))
}
addressToSomeone(
americans.select(
$"firstName" as "called", $"lastName" as "alsoCalled", $"address_id" as "whereHeLives_id")
)
americans
.select($"firstName" as "called", $"lastName" as "alsoCalled", $"address_id" as "whereHeLives_id")
.as("t")
.join(addresses.as("a"), $"whereHeLivesId" === $"id")
.select(
concat(lit("Hello "), $"t.called", lit(" "), $"t.alsoCalled", lit(" of "), $"a.city"))
.filter($"a.current" === lit(true))
Explain This!
*(5) Project [concat(Hello , called, , alsoCalled, of , city)]
+- *(5) SortMergeJoin [whereHeLives_id], [id], Inner
+- Exchange hashpartitioning(whereHeLives_id)
+- *(1) Project [firstName AS called, ... AS whereHeLives_id]
+- *(1) FileScan parquet [firstName,lastName,address_id]
+- Exchange hashpartitioning(id)
+- *(3) FileScan parquet [id,city,current]
PushedFilters: [EqualTo(current,true)],
americans
.select($"firstName" as "called", $"lastName" as "alsoCalled", $"address_id" as "whereHeLives_id")
.as("t")
.join(addresses.as("a"), $"whereHeLivesId" === $"id")
.select(
concat(lit("Hello "), $"t.called", lit(" "), $"t.alsoCalled", lit(" of "), $"a.city"))
.filter($"a.current" === lit(true))
.explain()
SQL Does the Same Thing
spark.sql(
"""
|select concat('Hello ', t.called, ' ', t.alsoCalled, ' of ', a.city) as _1
|from (
| select firstName as called, lastName as alsoCalled, address_id as whereHeLives_id
| from americans
|) as t
|join addresses a on (t.whereHeLives_id = a.id)
|where a.current = true
|""".stripMargin
)
.explain()
*(5) Project [concat(Hello , called, , alsoCalled, of , city)]
+- *(5) SortMergeJoin [whereHeLives_id], [id], Inner
+- Exchange hashpartitioning(whereHeLives_id)
+- *(1) Project [firstName AS called, ... AS whereHeLives_id]
+- *(1) FileScan parquet [firstName,lastName,address_id]
+- Exchange hashpartitioning(id)
+- *(3) FileScan parquet [id,city,current]
PushedFilters: [EqualTo(current,true)],
*(5) Project [concat(Hello , called, , alsoCalled, of , city)]
+- *(5) SortMergeJoin [whereHeLives_id], [id], Inner
+- Exchange hashpartitioning(whereHeLives_id)
+- *(1) Project [firstName AS called, ... AS whereHeLives_id]
+- *(1) FileScan parquet [firstName,lastName,address_id]
+- Exchange hashpartitioning(id)
+- *(3) FileScan parquet [id,city,current]
PushedFilters: [EqualTo(current,true)],
americans
.select($"firstName" as "called", $"lastName" as "alsoCalled", $"address_id" as "whereHeLives_id")
.as("t")
.join(addresses.as("a"), $"whereHeLivesId" === $"id")
.select(
concat(lit("Hello "), $"t.called", lit(" "), $"t.alsoCalled", lit(" of "), $"a.city"))
.filter($"a.current" === lit(true))
.explain()
Explain This!
In English Please?
*(🤞) Gimme My Result! [concat(Hello , called, , alsoCalled, of , city)]
+- *(💂) We're Joining! Huzzah! [whereHeLives_id], [id], Inner
+- Join Key for the Left Side! (whereHeLives_id)
+- *(1) Rename these like I said! Pronto! [firstName as Called... ]
+- *(😇) I'm a smart format, load only: [firstName,lastName,address_id]
+- Join Key for the Right Side! (id)
+- *(😇) I'm a smart format, load only: [id,city,current]
Read only current addr. from the file! 😎: [EqualTo(current,true)],
americans
.select($"firstName" as "called", $"lastName" as "alsoCalled", $"address_id" as "whereHeLives_id")
.as("t")
.join(addresses.as("a"), $"whereHeLivesId" === $"id")
.select(
concat(lit("Hello "), $"t.called", lit(" "), $"t.alsoCalled", lit(" of "), $"a.city"))
.filter($"a.current" === lit(true))
.explain()
How About Dataset?
def addressToSomeone(humanoidsLivingSomewhere: Dataset[HumanoidLivingSomewhere]) = {
humanoidsLivingSomewhere
.joinWith(addresses, $"id" === $"whereHeLivesId")
.filter(ta => ta._2.current == true)
.map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" }
}
val americanClients =
addressToSomeone(
americans.map(a => HumanoidLivingSomewhere(a.firstName, a.lastName, a.addressId))
)
americans.map(a => HumanoidLivingSomewhere(a.firstName, a.lastName, a.addressId))
.joinWith(addresses, $"id" === $"whereHeLivesId")
.filter(ta => ta._2.current == true)
.map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" }
Explain This... Please!
*(3) SerializeFromObject [UTF8String]
+- *(3) MapElements java.lang.String
+- DeserializeToObject newInstance(Tuple2)
+- SortMergeJoin [_1.whereHeLives_id], [_2.id], Inner
+- Exchange hashpartitioning(_1.whereHeLives_id)
+- *(1) Project [called, alsoCalled, whereHeLives_id]
+- *(1) SerializeFromObject [UTF8String]
+- *(1) MapElements HumanoidLivingSomewhere
+- DeserializeToObject newInstance(American)
+- FileScan parquet [firstName,lastName,address_id,irrelevantP1,irrelevantP2,i
+- Exchange hashpartitioning(_2.id)
+- FileScan parquet [id,street,city,current,irrelevantA1,irrelevantA2,irrelevantA3,irrel
PushedFilters: []
americans.map(a => HumanoidLivingSomewhere(a.firstName, a.lastName, a.addressId))
.joinWith(addresses, $"id" === $"whereHeLivesId")
.filter(ta => ta._2.current == true)
.map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" }
.explain()
*(🤮) Serialize Back Into a String Expensive!
+- *(3) Do the Outer Map that we Invoked
+- (🤮) Deserialize Tuple2 Expensive!
+- & We're Joining! Huzzah! [_1.whereHeLives_id], [_2.id], Inner
+- Join Key for the Left Side (_1.whereHeLives_id)
+- *(1) Project [called, alsoCalled, whereHeLives_id]
+- *(🤮) Serialize the Join Key. Expensive!
+- *(1) MapElements HumanoidLivingSomewhere
+- (🤮) Deserialize into a JVM Object (i.e. class American)
+- Scan All 'American' Columns Including 100 irrelevant ones!😱
+- Join Key for the Right Side (_2.id)
+- Scan All 'Address' Columns Including 100 irrelevant ones! 😱
We Need to Read The Entire Dataset! No Excluding Non-Current Addresses 😢
americans.map(a => HumanoidLivingSomewhere(a.firstName, a.lastName, a.addressId))
.joinWith(addresses, $"id" === $"whereHeLivesId")
.filter(ta => ta._2.current == true)
.map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" }
.explain()
americans.map(a => HumanoidLivingSomewhere(a.firstName, a.lastName, a.addressId))
.joinWith(addresses, $"id" === $"whereHeLivesId")
.filter(ta => ta._2.current == true)
.map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" }
val americanClients =
americans.map(a =>
HumanoidLivingSomewhere(a.firstName, a.lastName, a.address_id)
)
.joinWith(addresses, $"whereHeLives_id" === $"id")
.filter { tup => tup._2.current == true }
.map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" }
What We See:
What Catalyst Sees:
val americanClients =
americans.map(¯_(ツ)_/¯ : HumanoidLivingSomewhere)
.joinWith(addresses, $"whereHeLives_id" === $"id")
.filter { ¯_(ツ)_/¯ : Boolean }
.map { ¯_(ツ)_/¯ : String }
val americanClients =
americans.map(a =>
HumanoidLivingSomewhere(a.firstName, a.lastName, a.address_id)
)
.joinWith(addresses, $"whereHeLives_id" === $"id")
.filter { tup => tup._2.current == true }
.map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" }
What We See:
What Catalyst Sees:
val americanClients =
americans.map(¯_(ツ)_/¯ : HumanoidLivingSomewhere)
.joinWith(addresses, $"whereHeLives_id" === $"id")
.filter { ¯_(ツ)_/¯ : Boolean }
.map { ¯_(ツ)_/¯ : String }
Which Columns are
we using in here?
Which Columns are
we using in here?
Which Columns are
we using in here?
⏸
val americanClients =
americans.map(a =>
HumanoidLivingSomewhere(a.firstName, a.lastName, a.address_id)
)
.joinWith(addresses, $"whereHeLives_id" === $"id")
.filter { tup => tup._2.current == true }
.map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" }
What We See:
What Catalyst Sees:
val americanClients =
americans.map(¯_(ツ)_/¯ : HumanoidLivingSomewhere)
.joinWith(addresses, $"whereHeLives_id" === $"id")
.filter { ¯_(ツ)_/¯ : Boolean }
.map { ¯_(ツ)_/¯ : String }
I Guess We Need

All Of Them!
I Guess We Need

All Of Them!
I Guess We Need

All Of Them!
⏸
val americanClients =
americans.map(a =>
HumanoidLivingSomewhere(a.firstName, a.lastName, a.address_id)
)
.joinWith(addresses, $"whereHeLives_id" === $"id")
.filter { tup => tup._2.current == true }
.map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" }
What We See:
What Catalyst Sees:
val americanClients =
americans.map(¯_(ツ)_/¯ : HumanoidLivingSomewhere)
.joinWith(addresses, $"whereHeLives_id" === $"id")
.filter { ¯_(ツ)_/¯ : Boolean }
.map { ¯_(ツ)_/¯ : String }
val americanClients =
americans.map(a =>
HumanoidLivingSomewhere(a.firstName, a.lastName, a.address_id)
)
.joinWith(addresses, $"whereHeLives_id" === $"id")
.filter { tup => tup._2.current == true }
.map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" }
What We See:
What Catalyst Sees:
val americanClients =
americans.map(¯_(ツ)_/¯ : HumanoidLivingSomewhere)
.joinWith(addresses, ¯_(ツ)_/¯)
.filter { ¯_(ツ)_/¯ : Boolean }
.map { ¯_(ツ)_/¯ : String }
⏸
What columns
am I joining by???
val americanClients =
americans.map(a =>
HumanoidLivingSomewhere(a.firstName, a.lastName, a.address_id)
)
.joinWith(addresses, $"whereHeLives_id" === $"id")
.filter { tup => tup._2.current == true }
.map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" }
What We See:
What Catalyst Sees:
val americanClients =
americans.map( (🖼 American) => HumanoidLivingSomewhere 🖼 ) 🤮
.joinWith(addresses, $"whereHeLives_id" === $"id")
.filter { (🖼 HumanoidLivingSomewhere) => Boolean } 🤮
.map { (🖼 HumanoidLivingSomewhere) => String 🖼 } 🤮
→ →
→
→ →
americans.map(a => HumanoidLivingSomewhere(a.firstName, a.lastName, a.addressId))
.joinWith(addresses, $"id" === $"whereHeLivesId")
.filter(ta => ta._2.current == true)
.map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" }
def addressToSomeone(humanoid: TypedDataset[HumanoidLivingSomewhere]) = {
val joined = humanoid
.joinInner(addresses) { humanoid('where) === addresses('id) }
joined.select(concat(
lit("Hello "), joined.colMany('_1, 'called), lit(" "),
joined.colMany('_1, 'alsoCalled), lit(" of "), joined.colMany('_2, 'city)))
}
What About Frameless?
addressToSomeone(
americans.select(americans('firstName), americans('lastName), americans('addressId))
.deserialized.map{ case (name, age, whereHeLives_id ) =>
HumanoidLivingSomewhere(
name.asInstanceOf[String],
age.asInstanceOf[String],
whereHeLives_id.asInstanceOf[Int])
}
)
What About Frameless?
def addressToSomeone(humanoid: TypedDataset[HumanoidLivingSomewhere]) = {
val joined = humanoid
.joinInner(addresses) { humanoid('where) === addresses('id) }
joined.select(concat(
lit("Hello "), joined.colMany('_1, 'called), lit(" "),
joined.colMany('_1, 'alsoCalled), lit(" of "), joined.colMany('_2, 'city)))
}
addressToSomeone(
americans.select(americans('firstName), americans('lastName), americans('addressId))
.deserialized.map{ case (name, age, whereHeLives_id ) =>
HumanoidLivingSomewhere(
name.asInstanceOf[String],
age.asInstanceOf[String],
whereHeLives_id.asInstanceOf[Int])
}
)
What About Frameless?
def addressToSomeone(humanoid: TypedDataset[HumanoidLivingSomewhere]) = {
val joined = humanoid
.joinInner(addresses) { humanoid('where) === addresses('id) }
joined.select(concat(
lit("Hello "), joined.colMany('_1, 'called), lit(" "),
joined.colMany('_1, 'alsoCalled), lit(" of "), joined.colMany('_2, 'city)))
}
def addressToSomeone(humanoid: TypedDataset[HumanoidLivingSomewhere]) = {
val j1 = humanoid.joinInner(addresses) { humanoid('whereHeLives_id) === addresses('id) }
val j2 = j1.joinInner(residenceUnit) { j1.colMany('_2, 'rid) === residenceUnit('id) }
val j3 = j2.joinInner(residenceClass) { j2.colMany('_2, 'class_id) === residenceClass('class_id) }
val j4 = j3.joinInner(zoningDesignation) {
(j3.colMany('_1, '_2, 'zone_id) === zoningDesignation('rzid)) &&
(zoningDesignation('rzid) === j3.colMany('_1, '_, 'class_id))
}
type OUT = ((((HumanoidLivingSomewhere, Address), ResidenceUnit), ResidenceClass), ZoningDesignation)
j4.select(
concat(
lit("Hello "), j4.colMany('_1, '_1, '_1, '_1, 'called), lit(" "),
j4.colMany('_1, '_1, '_1, '_1, 'alsoCalled), lit(" of "), j4.colMany('_1, '_1, '_1, '_2, 'city)
),
when(j4.colMany('_2, 'zone_type) === "K", lit[String, OUT]("StandardCategory"))
.when((j4.colMany('_2, 'zone_type) === "N") && (j4.colMany('_1, '_2, 'barbaz) === "GT"),
lit("NonStandardCategory"))
.otherwise( lit("UnknownCategory")),
when(j4.colMany('_1, '_1, '_2, 'kdd) === "IK", lit[String, OUT]("Insanity"))
.when((j4.colMany('_2, 'kdd) === "N") && (j4.colMany('_1, '_2, 'barbaz) === "GTT"),
lit("MoreInsanity"))
.otherwise(lit("I_Dont_Even_Know_What_Goes_Here"))
)
}
What Sub-Tuple is class_id inside of?
def addressToSomeone(humanoid: TypedDataset[HumanoidLivingSomewhere]) = {
val j1 = humanoid.joinInner(addresses) { humanoid('whereHeLives_id) === addresses('id) }
val j2 = j1.joinInner(residenceUnit) { j1.colMany('_2, 'rid) === residenceUnit('id) }
val j3 = j2.joinInner(residenceClass) { j2.colMany('_2, 'class_id) === residenceClass('class_id) }
val j4 = j3.joinInner(zoningDesignation) {
(j3.colMany('_1, '_2, 'zone_id) === zoningDesignation('rzid)) &&
(zoningDesignation('rzid) === j3.colMany('_1, '_, 'class_id))
}
type OUT = ((((HumanoidLivingSomewhere, Address), ResidenceUnit), ResidenceClass), ZoningDesignation)
j4.select(
concat(
lit("Hello "), j4.colMany('_1, '_1, '_1, '_1, 'called), lit(" "),
j4.colMany('_1, '_1, '_1, '_1, 'alsoCalled), lit(" of "), j4.colMany('_1, '_1, '_1, '_2, 'city)
),
when(j4.colMany('_2, 'zone_type) === "K", lit[String, OUT]("StandardCategory"))
.when((j4.colMany('_2, 'zone_type) === "N") && (j4.colMany('_1, '_2, 'barbaz) === "GT"),
lit("NonStandardCategory"))
.otherwise( lit("UnknownCategory")),
when(j4.colMany('_1, '_1, '_2, 'kdd) === "IK", lit[String, OUT]("Insanity"))
.when((j4.colMany('_2, 'kdd) === "N") && (j4.colMany('_1, '_2, 'barbaz) === "GTT"),
lit("MoreInsanity"))
.otherwise(lit("I_Dont_Even_Know_What_Goes_Here"))
)
}
What Sub-Tuple is class_id inside of?
Dataset[
(((HumanoidLivingSomewhere, Address), ResidenceUnit), ResidenceClass)
]
def addressToSomeone(humanoid: TypedDataset[HumanoidLivingSomewhere]) = {
val j1 = humanoid.joinInner(addresses) { humanoid('whereHeLives_id) === addresses('id) }
val j2 = j1.joinInner(residenceUnit) { j1.colMany('_2, 'rid) === residenceUnit('id) }
val j3 = j2.joinInner(residenceClass) { j2.colMany('_2, 'class_id) === residenceClass('class_id) }
val j4 = j3.joinInner(zoningDesignation) {
(j3.colMany('_1, '_2, 'zone_id) === zoningDesignation('rzid)) &&
(zoningDesignation('rzid) === j3.colMany('_1, '_, 'class_id))
}
type OUT = ((((HumanoidLivingSomewhere, Address), ResidenceUnit), ResidenceClass), ZoningDesignation)
j4.select(
concat(
lit("Hello "), j4.colMany('_1, '_1, '_1, '_1, 'called), lit(" "),
j4.colMany('_1, '_1, '_1, '_1, 'alsoCalled), lit(" of "), j4.colMany('_1, '_1, '_1, '_2, 'city)
),
when(j4.colMany('_2, 'zone_type) === "K", lit[String, OUT]("StandardCategory"))
.when((j4.colMany('_2, 'zone_type) === "N") && (j4.colMany('_1, '_2, 'barbaz) === "GT"),
lit("NonStandardCategory"))
.otherwise( lit("UnknownCategory")),
when(j4.colMany('_1, '_1, '_2, 'kdd) === "IK", lit[String, OUT]("Insanity"))
.when((j4.colMany('_2, 'kdd) === "N") && (j4.colMany('_1, '_2, 'barbaz) === "GTT"),
lit("MoreInsanity"))
.otherwise(lit("I_Dont_Even_Know_What_Goes_Here"))
)
}
Also... What's This???
[error] found : frameless.TypedColumn[Nothing,String]
[error] required:
frameless.AbstractTypedColumn[((((org.ctl.complex.HumanoidLivingSomewhere,
org.ctl.complex.Address), org.ctl.complex.ResidenceUnit),
org.ctl.complex.ResidenceClass), org.ctl.complex.ZoningDesignation),String]
[error] Note: Nothing <: ((((org.ctl.complex.HumanoidLivingSomewhere,
org.ctl.complex.Address), org.ctl.complex.ResidenceUnit),
org.ctl.complex.ResidenceClass), org.ctl.complex.ZoningDesignation), but class
AbstractTypedColumn is invariant in type T.
[error] You may wish to define T as +T instead. (SLS 4.5)
[error] when(j4.colMany('_2, 'zone_type) === "K", lit("StandardCategory"))
DataFrame/SQL Untyped 😢
Column Pruning
Filter Pushdown 😎
Dataset Almost Typed 😕 Extra Serialization 🤮
Frameless Typed 😃
Very Complex if you
don't know Shapeless.
TypecheckingPower
Possible Optmizations
DataFrame/
Scala Code
(i.e. Dataset[T])
SQL
Quill + Spark = Better Together
?
Scala Code SQL DataFrame
?
Scala Code SQL DataFrame
TypecheckingPower
Possible Optmizations
Scala Code SQL
Quill
DataFrame
quote {
scala-syntax-tree
}
AST
Macro
Scala Code SQL
Quill
quote {
scala-syntax-tree
}
AST
Macro
Query[R] SQL
Quill
quote { Query[R] }
AST
Macro
Quoted[Query[R]]
Quill + Spark = Better Together
Quill + Spark = Better Together
CREATE FUNCTION AddressToSomeone (
@humanoid
Table(
called VARCHAR;
alsoCalled VARCHAR;
whereHeLives_id
)
)
SELECT
'Hello ' ||
h.called || ' ' ||
h.alsoCalled || ' of ' ||
a.city
FROM
@humanoid h
JOIN
Addresses a
ON
h.whereHeLives_id == a.id
WHERE
a.current == true
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[?]) =>
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
}
CREATE FUNCTION AddressToSomeone (
@humanoid
Table(
called VARCHAR;
alsoCalled VARCHAR;
whereHeLives_id
)
)
SELECT
'Hello ' ||
h.called || ' ' ||
h.alsoCalled || ' of ' ||
a.city
FROM
@humanoid h
JOIN
Addresses a
ON
h.whereHeLives_id == a.id
WHERE
a.current == true
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
}
CREATE FUNCTION AddressToSomeone (
@humanoid
Table(
called VARCHAR;
alsoCalled VARCHAR;
whereHeLives_id
)
)
SELECT
'Hello ' ||
h.called || ' ' ||
h.alsoCalled || ' of ' ||
a.city
FROM
@humanoid h
JOIN
Addresses a
ON
h.whereHeLives_id == a.id
WHERE
a.current == true
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield ( )
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
}
Query [Humanoid]
Humanoid
Query [Address]
Address
⏸
CREATE FUNCTION AddressToSomeone (
@humanoid
Table(
called VARCHAR;
alsoCalled VARCHAR;
whereHeLives_id
)
)
SELECT
'Hello ' ||
h.called || ' ' ||
h.alsoCalled || ' of ' ||
a.city
FROM
@humanoid h
JOIN
Addresses a
ON
h.whereHeLives_id == a.id
WHERE
a.current == true
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
}
CREATE FUNCTION AddressToSomeone (
@humanoid
Table(
called VARCHAR;
alsoCalled VARCHAR;
whereHeLives_id
)
)
SELECT
'Hello ' ||
h.called || ' ' ||
h.alsoCalled || ' of ' ||
a.city
FROM
@humanoid h
JOIN
Addresses a
ON
h.whereHeLives_id == a.id
WHERE
a.current == true
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
}
CREATE FUNCTION AddressToSomeone (
@humanoid
Table(
called VARCHAR;
alsoCalled VARCHAR;
whereHeLives_id
)
)
SELECT
'Hello ' ||
h.called || ' ' ||
h.alsoCalled || ' of ' ||
a.city
FROM
@humanoid h
JOIN
Addresses a
ON
h.whereHeLives_id == a.id
WHERE
a.current == true
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses if (
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
}
CREATE FUNCTION AddressToSomeone (
@humanoid
Table(
called VARCHAR;
alsoCalled VARCHAR;
whereHeLives_id
)
)
SELECT
'Hello ' ||
h.called || ' ' ||
h.alsoCalled || ' of ' ||
a.city
FROM
@humanoid h
JOIN
Addresses a
ON
h.whereHeLives_id == a.id
WHERE
a.current == true
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
}
CREATE FUNCTION AddressToSomeone (
@humanoid
Table(
called VARCHAR;
alsoCalled VARCHAR;
whereHeLives_id
)
)
SELECT
'Hello ' ||
h.called || ' ' ||
h.alsoCalled || ' of ' ||
a.city
FROM
@humanoid h
LEFT JOIN
Addresses a
ON
h.whereHeLives_id == a.id
WHERE
a.current == true
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses.leftJoin(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
}
CREATE FUNCTION AddressToSomeone (
@humanoid
Table(
called VARCHAR;
alsoCalled VARCHAR;
whereHeLives_id
)
)
SELECT
'Hello ' ||
h.called || ' ' ||
h.alsoCalled || ' of ' ||
a.city
FROM
@humanoid h
LEFT JOIN
Addresses a
ON
h.whereHeLives_id == a.id
WHERE
a.current == true
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses.leftJoin(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
}
Address
CREATE FUNCTION AddressToSomeone (
@humanoid
Table(
called VARCHAR;
alsoCalled VARCHAR;
whereHeLives_id
)
)
SELECT
'Hello ' ||
h.called || ' ' ||
h.alsoCalled || ' of ' ||
a.city
FROM
@humanoid h
LEFT JOIN
Addresses a
ON
h.whereHeLives_id == a.id
WHERE
a.current == true
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses.leftJoin(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
}
Address
Option[Address] Address
CREATE FUNCTION AddressToSomeone (
@humanoid
Table(
called VARCHAR;
alsoCalled VARCHAR;
whereHeLives_id
)
)
SELECT
'Hello ' ||
h.called || ' ' ||
h.alsoCalled || ' of ' ||
a.city
FROM
@humanoid h
JOIN
Addresses a
ON
h.whereHeLives_id == a.id
WHERE
a.current == true
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
}
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
}
CREATE FUNCTION AddressToSomeone (
@humanoid
Table(
called VARCHAR;
alsoCalled VARCHAR;
whereHeLives_id
)
)
SELECT
'Hello ' ||
h.called || ' ' ||
h.alsoCalled || ' of ' ||
a.city
FROM
@humanoid h
JOIN
Addresses a
ON
h.whereHeLives_id == a.id
WHERE
a.current == trueString
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
}
CREATE FUNCTION AddressToSomeone (
@humanoid
Table(
called VARCHAR;
alsoCalled VARCHAR;
whereHeLives_id
)
)
SELECT
'Hello ' ||
h.called || ' ' ||
h.alsoCalled || ' of ' ||
a.city
FROM
@humanoid h
JOIN
Addresses a
ON
h.whereHeLives_id == a.id
WHERE
a.current == true
Query[String]
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
}
CREATE FUNCTION AddressToSomeone (
@humanoid
Table(
called VARCHAR;
alsoCalled VARCHAR;
whereHeLives_id
)
)
SELECT
'Hello ' ||
h.called || ' ' ||
h.alsoCalled || ' of ' ||
a.city
FROM
@humanoid h
JOIN
Addresses a
ON
h.whereHeLives_id == a.id
WHERE
a.current == true
Query[Humanoid] => Query[String]
Query[String]
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
}
CREATE FUNCTION AddressToSomeone (
@humanoid
Table(
called VARCHAR;
alsoCalled VARCHAR;
whereHeLives_id
)
)
SELECT
'Hello ' ||
h.called || ' ' ||
h.alsoCalled || ' of ' ||
a.city
FROM
@humanoid h
JOIN
Addresses a
ON
h.whereHeLives_id == a.id
WHERE
a.current == true
Quoted[Query[Humanoid] => Query[String]]
Query[Humanoid] => Query[String]
Query[String]
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield h
}
CREATE FUNCTION AddressToSomeone (
@humanoid
Table(
called VARCHAR;
alsoCalled VARCHAR;
whereHeLives_id
)
)
SELECT
h.*
FROM
@humanoid h
JOIN
Addresses a
ON
h.whereHeLives_id == a.id
WHERE
a.current == true
Quoted[Query[Humanoid] => Query[Humanoid]]
Query[Humanoid] => Query[Humanoid]
Query[Humanoid]
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield a
}
CREATE FUNCTION AddressToSomeone (
@humanoid
Table(
called VARCHAR;
alsoCalled VARCHAR;
whereHeLives_id
)
)
SELECT
a.*
FROM
@humanoid h
JOIN
Addresses a
ON
h.whereHeLives_id == a.id
WHERE
a.current == true
Quoted[Query[Humanoid] => Query[Address]]
Query[Humanoid] => Query[Address]
Query[Address]
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (h, a)
}
CREATE FUNCTION AddressToSomeone (
@humanoid
Table(
called VARCHAR;
alsoCalled VARCHAR;
whereHeLives_id
)
)
SELECT
h.*, a.*
FROM
@humanoid h
JOIN
Addresses a
ON
h.whereHeLives_id == a.id
WHERE
a.current == true
Quoted[Query[Humanoid] => Query[(Humanoid, Address)]]
Query[Humanoid] => Query[(Humanoid, Address)]
Query[(Humanoid, Address)]
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (Foobar(h, a))
}
CREATE FUNCTION AddressToSomeone (
@humanoid
Table(
called VARCHAR;
alsoCalled VARCHAR;
whereHeLives_id
)
)
SELECT
?? I don't understand objects ??
FROM
@humanoid h
JOIN
Addresses a
ON
h.whereHeLives_id == a.id
WHERE
a.current == true
Quoted[Query[Humanoid] => Query[Foobar]]
Query[Humanoid] => Query[Foobar]
Query[Foobar]
CREATE FUNCTION AddressToSomeone (
@humanoid
Table(
called VARCHAR;
alsoCalled VARCHAR;
whereHeLives_id
)
)
SELECT
'Hello ' ||
h.called || ' ' ||
h.alsoCalled || ' of ' ||
a.city
FROM
@humanoid h
JOIN
Addresses a
ON
h.whereHeLives_id == a.id
WHERE
a.current == true
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
}
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
▶
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
for {
h <- humanoid
a <- addresses.join(a => a.id == h.whereHeLives_id)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
for {
h <- humanoid
a <- addresses.join(a => a.id == h.whereHeLives_id)
ru <-residenceUnit.join(ru => a.rid == ru.id)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
for {
h <- humanoid
a <- addresses.join(a => a.id == h.whereHeLives_id)
ru <-residenceUnit.join(ru => a.rid == ru.id)

rc <- residenceClass.join(rc => ru.class_id == rc.class_id)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
for {
h <- humanoid
a <- addresses.join(a => a.id == h.whereHeLives_id)
ru <-residenceUnit.join(ru => a.rid == ru.id)

rc <- residenceClass.join(rc => ru.class_id == rc.class_id)
zd <- zoningDesignation.join(zd =>
ru.zone_id == zd.rid && zd.cid == rc.class_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
for {
h <- humanoid
a <- addresses.join(a => a.id == h.whereHeLives_id)
ru <-residenceUnit.join(ru => a.rid == ru.id)

rc <- residenceClass.join(rc => ru.class_id == rc.class_id)
zd <- zoningDesignation.join(zd =>
ru.zone_id == zd.r && zd.cid == rc.class_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city,
if (zd.zone_type == "K") "StandardCategory"
else if (zd.zone_type == "N" && rc.barbaz == "GT") "NonStandard"
else "UnknownCategory"
)
for {
h <- humanoid
a <- addresses.join(a => a.id == h.whereHeLives_id)
ru <-residenceUnit.join(ru => a.rid == ru.id)

rc <- residenceClass.join(rc => ru.class_id == rc.class_id)
zd <- zoningDesignation.join(zd =>
ru.zone_id == zd.r && zd.cid == rc.class_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city,
if (zd.zone_type == "K") "StandardCategory"
else if (zd.zone_type == "N" && rc.barbaz == "GT") "NonStandard"
else "UnknownCategory",
if (ru.kdd == "IK") "Insanity"
else if (zd.kdd == "N" && rc.barbaz == "GTT") "MoreInsanity"
else "I_Dont_Even_Know_What_Goes_Here"
)
for {
h <- humanoid
a <- addresses.join(a => a.id == h.whereHeLives_id)
ru <-residenceUnit.join(ru => a.rid == ru.id)

rc <- residenceClass.join(rc => ru.class_id == rc.class_id)
zd <- zoningDesignation.join(zd =>
ru.zone_id == zd.r && zd.cid == rc.class_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city,
if (zd.zone_type == "K") "StandardCategory"
else if (zd.zone_type == "N" && rc.barbaz == "GT") "NonStandard"
else "UnknownCategory",
if (ru.kdd == "IK") "Insanity"
else if (zd.kdd == "N" && rc.barbaz == "GTT") "MoreInsanity"
else "I_Dont_Even_Know_What_Goes_Here"
)
for {
h <- humanoid
a <- addresses.join(a => a.id == h.whereHeLives_id)
ru <-residenceUnit.join(ru => a.rid == ru.id)

rc <- residenceClass.join(rc => ru.class_id == rc.class_id)
zd <- zoningDesignation.join(zd =>
ru.zone_id == zd.rid && zd.cid == rc.class_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city,
if (zd.zone_type == "K") "StandardCategory"
else if (zd.zone_type == "N" && rc.barbaz == "GT") "NonStandard"
else "UnknownCategory",
if (ru.kdd == "IK") "Insanity"
else if (zd.kdd == "N" && rc.barbaz == "GTT") "MoreInsanity"
else "I_Dont_Even_Know_What_Goes_Here"
)
[error] quillspark-examples/src/main/scala/Main.scala:107:28:
value rid is not a member of org.ctl.complex.ZoningDesignation
[error] ru.zone_id == zd.rid && zd.cid == rc.class_id
[error] ^
for {
h <- humanoid
a <- addresses.join(a => a.id == h.whereHeLives_id)
ru <-residenceUnit.join(ru => a.rid == ru.id)

rc <- residenceClass.join(rc => ru.class_id == rc.class_id)
zd <- zoningDesignation.join(zd =>
ru.zone_id == zd.rid && zd.cid == rc.class_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city,
if (zd.zone_type == "K") "StandardCategory"
else if (zd.zone_type == "N" && rc.barbaz == "GT") "NonStandard"
else "UnknownCategory",
if (ru.kdd == "IK") "Insanity"
else if (zd.kdd == "N" && rc.barbaz == "GTT") "MoreInsanity"
else "I_Dont_Even_Know_What_Goes_Here"
)
⏸
humanoidsLivingSomewhere.as("t")
.joinWith(addresses.as("a"), $"whereHeLives_id" === $"id")
.joinWith(residenceUnit.as("ru"), $"_2.rid" === $"ru.id")
.joinWith(residenceClass.as("rc"), $"_2.class_id" === $"rc.class_id")
.joinWith(zoningDesignation.as("zd"),
($"_1._2.zone_id" === "zd.rzid") &&
($"zd.cid" === $"_1._2.class_id")
)
.map { case ((((t, a), ru), rc), zd) => (
s"Hello ${t.called} ${t.alsoCalled} of ${a.city}",
if (zd.zone_type == "K") "StandardCategory"
else if (zd.zone_type == "N" && rc.barbaz == "GT")
"NonStandardCategory"
else
"UnknownCategory",
if (ru.kdd == "IK") "Insanity"
else if (zd.kdd == "N" && rc.barbaz == "GT")
"MoreInsanity"
else
"I_Dont_Even_Know_What_Goes_Here"
)
}
for {
h <- humanoid
a <- addresses.join(a => a.id == h.whereHeLives_id)
ru <-residenceUnit.join(ru => a.rid == ru.id)

rc <- residenceClass.join(rc => ru.class_id == rc.class_id)
zd <- zoningDesignation.join(zd =>
ru.zone_id == zd.rid && zd.cid == rc.class_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city,
if (zd.zone_type == "K") "StandardCategory"
else if (zd.zone_type == "N" && rc.barbaz == "GT") "NonStandard"
else "UnknownCategory",
if (ru.kdd == "IK") "Insanity"
else if (zd.kdd == "N" && rc.barbaz == "GTT") "MoreInsanity"
else "I_Dont_Even_Know_What_Goes_Here"
)
for {
h <- humanoid
a <- addresses.join(a => a.id == h.whereHeLives_id)
ru <-residenceUnit.join(ru => a.rid == ru.id)

rc <- residenceClass.join(rc => ru.class_id == rc.class_id)
zd <- zoningDesignation.join(zd =>
ru.zone_id == zd.rid && zd.cid == rc.class_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city,
if (zd.zone_type == "K") "StandardCategory"
else if (zd.zone_type == "N" && rc.barbaz == "GT") "NonStandard"
else "UnknownCategory",
if (ru.kdd == "IK") "Insanity"
else if (zd.kdd == "N" && rc.barbaz == "GTT") "MoreInsanity"
else "I_Dont_Even_Know_What_Goes_Here"
)
for {
h <- humanoid
a <- addresses.join(a => a.id == h.whereHeLives_id)
ru <-residenceUnit.join(ru => a.rid == ru.id)

rc <- residenceClass.join(rc => ru.class_id == rc.class_id)
zd <- zoningDesignation.join(zd =>
ru.zone_id == zd.rid && zd.cid == rc.class_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city,
if (zd.zone_type == "K") "StandardCategory"
else if (zd.zone_type == "N" && rc.barbaz == "GT") "NonStandard"
else "UnknownCategory"
)
for {
h <- humanoid
a <- addresses.join(a => a.id == h.whereHeLives_id)
ru <-residenceUnit.join(ru => a.rid == ru.id)

rc <- residenceClass.join(rc => ru.class_id == rc.class_id)
zd <- zoningDesignation.join(zd =>
ru.zone_id == zd.rid && zd.cid == rc.class_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
for {
h <- humanoid
a <- addresses.join(a => a.id == h.whereHeLives_id)
ru <-residenceUnit.join(ru => a.rid == ru.id)

rc <- residenceClass.join(rc => ru.class_id == rc.class_id)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
for {
h <- humanoid
a <- addresses.join(a => a.id == h.whereHeLives_id)
ru <-residenceUnit.join(ru => a.rid == ru.id)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
for {
h <- humanoid
a <- addresses.join(a => a.id == h.whereHeLives_id)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
}
CREATE FUNCTION AddressToSomeone (
@humanoid
Table(
called VARCHAR;
alsoCalled VARCHAR;
whereHeLives_id
)
)
SELECT
'Hello ' ||
h.called || ' ' ||
h.alsoCalled || ' of ' ||
a.city
FROM
@humanoid h
JOIN
Addresses a
ON
h.whereHeLives_id == a.id
WHERE
a.current == true
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
▶
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
}
CREATE FUNCTION AddressToSomeone (
@humanoid
Table(
called VARCHAR;
alsoCalled VARCHAR;
whereHeLives_id
)
)
SELECT
'Hello ' ||
h.called || ' ' ||
h.alsoCalled || ' of ' ||
a.city
FROM
@humanoid h
JOIN
Addresses a
ON
h.whereHeLives_id == a.id
WHERE
a.current == true
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
▶
CREATE FUNCTION AddressToSomeone (
@humanoid
Table(
called VARCHAR;
alsoCalled VARCHAR;
whereHeLives_id
)
)
SELECT
'Hello ' ||
h.called || ' ' ||
h.alsoCalled || ' of ' ||
a.city
FROM
@humanoid h
JOIN
Addresses a
ON
h.whereHeLives_id == a.id
WHERE
a.current == true
▶
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
}
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
}
SELECT
'Hello ' ||
h.firstName || ' ' ||
h.lastName || ' of ' ||
a.city
FROM
Americans h
JOIN
Addresses a
ON
h.address_id == a.id
WHERE
a.current == true
quote { addressToSomeone(americans.map(am => 

Humanoid(am.firstName, am.lastName, am.address_id))) }
SELECT
'Hello ' ||
h.firstName || ' ' ||
h.lastName || ' of ' ||
a.city
FROM
Americans h
JOIN
Addresses a
ON
h.address_id == a.id
WHERE
a.current == true
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
}
quote {addressToSomeone(canadians.map(am => 

Humanoid(am.name, am.surname, am.residence_id)))}
SELECT
'Hello ' ||
h.name || ' ' ||
h.surname || ' of ' ||
a.city
FROM
Canadians h
JOIN
Addresses a
ON
h.residence_id == a.id
WHERE
a.current == true
SELECT
'Hello ' ||
h.firstName || ' ' ||
h.lastName || ' of ' ||
a.city
FROM
Americans h
JOIN
Addresses a
ON
h.address_id == a.id
WHERE
a.current == true
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
}
quote {addressToSomeone(yeti(am => 

Humanoid(am.gruntingSound, am.roaringSound, am.cave_id)))}
SELECT
'Hello ' ||
h.name || ' ' ||
h.surname || ' of ' ||
a.city
FROM
Canadians h
JOIN
Addresses a
ON
h.residence_id == a.id
WHERE
a.current == true
SELECT
'Hello ' ||
h.gruntingSound || ' ' ||
h.roaringSound || ' of ' ||
a.city
FROM
Yeti h
JOIN
Addresses a
ON
h.cave_id == a.id
WHERE
a.current == true
SELECT
'Hello ' ||
h.firstName || ' ' ||
h.lastName || ' of ' ||
a.city
FROM
Americans h
JOIN
Addresses a
ON
h.address_id == a.id
WHERE
a.current == true
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
}
SELECT
'Hello ' ||
h.name || ' ' ||
h.surname || ' of ' ||
a.city
FROM
Canadians h
JOIN
Addresses a
ON
h.residence_id == a.id
WHERE
a.current == true
SELECT
'Hello ' ||
h.gruntingSound || ' ' ||
h.roaringSound || ' of ' ||
a.city
FROM
Yeti h
JOIN
Addresses a
ON
h.cave_id == a.id
WHERE
a.current == true
▶
quote {addressToSomeone(yeti(am => 

Humanoid(am.gruntingSound, am.roaringSound, am.cave_id)))}
SELECT
'Hello ' ||
h.firstName || ' ' ||
h.lastName || ' of ' ||
a.city
FROM
Americans h
JOIN
Addresses a
ON
h.address_id == a.id
WHERE
a.current == true
SELECT
'Hello ' ||
h.name || ' ' ||
h.surname || ' of ' ||
a.city
FROM
Canadians h
JOIN
Addresses a
ON
h.residence_id == a.id
WHERE
a.current == true
SELECT
'Hello ' ||
h.gruntingSound || ' ' ||
h.roaringSound || ' of ' ||
a.city
FROM
Yeti h
JOIN
Addresses a
ON
h.cave_id == a.id
WHERE
a.current == true
DataFrame
quote {addressToSomeone(yeti(am => 

Humanoid(am.gruntingSound,
am.roaringSound, am.cave_id)))}
quote {addressToSomeone(canadians.map(am => 

Humanoid(am.name, am.surname,
am.residence_id)))}
quote { addressToSomeone(americans.map(am => 

Humanoid(am.firstName, am.lastName,
am.address_id))) }
run(Query[String]) run(Query[String]) run(Query[String])
DataFrame

Dataset[String]
SELECT
'Hello ' ||
h.firstName || ' ' ||
h.lastName || ' of ' ||
a.city
FROM
Americans h
JOIN
Addresses a
ON
h.address_id == a.id
WHERE
a.current == true
SELECT
'Hello ' ||
h.name || ' ' ||
h.surname || ' of ' ||
a.city
FROM
Canadians h
JOIN
Addresses a
ON
h.residence_id == a.id
WHERE
a.current == true
SELECT
'Hello ' ||
h.gruntingSound || ' ' ||
h.roaringSound || ' of ' ||
a.city
FROM
Yeti h
JOIN
Addresses a
ON
h.cave_id == a.id
WHERE
a.current == true
run(Query[String]) run(Query[String]) run(Query[String])
DataFrame

Dataset[Humanoid]
SELECT
h
FROM
Americans h
JOIN
Addresses a
ON
h.address_id == a.id
WHERE
a.current == true
SELECT
h
FROM
Canadians h
JOIN
Addresses a
ON
h.residence_id == a.id
WHERE
a.current == true
SELECT
h
FROM
Yeti h
JOIN
Addresses a
ON
h.cave_id == a.id
WHERE
a.current == true
run(Query[Humanoid]) run(Query[Humanoid]) run(Query[Humanoid])
DataFrame

Dataset[T]
run(Query[T]) run(Query[T]) run(Query[T])
SELECT
?
FROM
Americans h
JOIN
Addresses a
ON
h.address_id == a.id
WHERE
a.current == true
SELECT
?
FROM
Canadians h
JOIN
Addresses a
ON
h.residence_id == a.id
WHERE
a.current == true
SELECT
?
FROM
Yeti h
JOIN
Addresses a
ON
h.cave_id == a.id
WHERE
a.current == true
= Dataset[T]
run(Query[T])
= Dataset[T]
run(Query[T])
Dataset[T] DataFrame
Easy! Just '.toDF'
Harder! '.[DoIReallyKnowItsThis?]'
run(Query[T])
val spark = SparkSession.builder()
.appName("SparkQuillExample")
.enableHiveSupport()
.getOrCreate()
implicit val sqlContext = spark.sqlContext
import sqlContext.implicits._
import QuillSparkContext._
val yetiDS = spark.read.parquet("output/yeti").as[Yeti]
val addressesDS = spark.read.parquet("output/addresses").as[Address]
Dataset[Yeti]
Dataset[Address]
val yetiDS = spark.read.parquet("output/yeti").as[Yeti]
val addressesDS = spark.read.parquet("output/addresses").as[Address]
val yeti = quote { liftQuery(yetiDS) }
val addresses = quote { liftQuery(addressesDS) }
Quoted[Query[Yeti]]
Quoted[Query[Address]]
Dataset[Yeti]
Dataset[Address]
val yetiDS = spark.read.parquet("output/yeti").as[Yeti]
val addressesDS = spark.read.parquet("output/addresses").as[Address]
val addressToSomeone = quote {
(humanoids: Query[Humanoid]) =>
for {
h <- humanoids
a <- addresses.join(a => a.id == h.whereHeLives_id)
if (a.current)
} yield "Hello " + h.called + " " + h.alsoCalled + " of " + a.city
}
val output = quote {
addressToSomeone(
yeti.map(am =>
Humanoid(am.firstName, am.lastName, am.address_id)
)
)
}
val yeti = quote { liftQuery(yetiDS) }
val addresses = quote { liftQuery(addressesDS) }
val yetiDS = spark.read.parquet("output/yeti").as[Yeti]
val addressesDS = spark.read.parquet("output/addresses").as[Address]
val addressToSomeone = quote {
(humanoids: Query[Humanoid]) =>
for {
h <- humanoids
a <- addresses.join(a => a.id == h.whereHeLives_id)
if (a.current)
} yield "Hello " + h.called + " " + h.alsoCalled + " of " + a.city
}
val yeti = quote { liftQuery(yetiDS) }
val addresses = quote { liftQuery(addressesDS) }
val yetiOfSomeplace: Dataset[String] = run(output)
val output = quote {
addressToSomeone(
yeti.map(am =>
Humanoid(am.firstName, am.lastName, am.address_id)
)
)
}
val yetiDS = spark.read.parquet("output/yeti").as[Yeti]
val addressesDS = spark.read.parquet("output/addresses").as[Address]
val addressToSomeone = quote {
(humanoids: Query[Humanoid]) =>
for {
h <- humanoids
a <- addresses.join(a => a.id == h.whereHeLives_id)
if (a.current)
} yield "Hello " + h.called + " " + h.alsoCalled + " of " + a.city
}
val yeti = quote { liftQuery(yetiDS) }
val addresses = quote { liftQuery(addressesDS) }
val yetiOfSomeplace: Dataset[String] = run(output)
val output = quote {
addressToSomeone(
yeti.map(am =>
Humanoid(am.firstName, am.lastName, am.address_id)
)
)
}
Run This Query:
Then Give Me Back
My Dataset!!!
SELECT
'Hello ' ||
h.gruntingSound || ' ' ||
h.roaringSound || ' of ' ||
a.city
FROM
Yeti h
JOIN
Addresses a
ON
h.cave_id == a.id
WHERE
a.current == true
val addressToSomeone = quote {
(humanoids: Query[Humanoid]) =>
for {
h <- humanoids
a <- addresses.join(a => a.id == h.whereHeLives_id)
if (a.current)
} yield "Hello " + h.called + " " + h.alsoCalled + " of " + a.city
}
val yetiOfSomeplace: Dataset[String] = run(output)
*(5) Project [concat(Hello , firstName, , lastName, of , city)]
+- *(5) SortMergeJoin [address_id], [id], Inner
+- Exchange hashpartitioning(address_id)
+- *(1) Project [firstName, lastName, address_id]
+- *(1) FileScan parquet [firstName,lastName,address_id]
+- Exchange hashpartitioning(id)
+- *(3) FileScan parquet [id,city,current]
PushedFilters: [EqualTo(current,true)]
val output = quote {
addressToSomeone(
yeti.map(am =>
Humanoid(am.firstName, am.lastName, am.address_id)
)
)
}
val addressToSomeone = quote {
(humanoids: Query[Humanoid]) =>
for {
h <- humanoids
a <- addresses.join(a => a.id == h.whereHeLives_id)
if (a.current)
} yield "Hello " + h.called + " " + h.alsoCalled + " of " + a.city
}
val output = quote {
addressToSomeone(
yeti.map(am =>
Humanoid(am.firstName, am.lastName, am.address_id)
)
)
}
val yetiOfSomeplace: Dataset[String] = run(output)
*(5) Project [concat(Hello , firstName, , lastName, of , city)]
+- *(5) SortMergeJoin [address_id], [id], Inner
+- Exchange hashpartitioning(address_id)
+- *(1) Project [firstName, lastName, address_id]
+- *(1) FileScan parquet [firstName,lastName,address_id]
+- Exchange hashpartitioning(id)
+- *(3) FileScan parquet [id,city,current]
PushedFilters: [EqualTo(current,true)]
Optimized + No Serialization 😎
Optimized + No Serialization 😎
val yetiOfSomeplace: Dataset[String] = run(output)
val addressToSomeone = quote { Quill Magic! }
▶
val output = quote { Quill Magic! }
val yetiDS: Dataset[Yeti] = parquet("output/yeti").as[Yeti]
val addressesDS: Dataset[Address] = parquet("output/addresses").as[Address]
*(5) Project [concat(Hello , firstName, , lastName, of , city)]
+- *(5) SortMergeJoin [address_id], [id], Inner
+- Exchange hashpartitioning(address_id)
+- *(1) Project [firstName, lastName, address_id]
+- *(1) FileScan parquet [firstName,lastName,address_id]
+- Exchange hashpartitioning(id)
+- *(3) FileScan parquet [id,city,current]
PushedFilters: [EqualTo(current,true)]
▶
val yetiOfSomeplace: Dataset[String] = run(output)
val addressToSomeone = quote { Quill Magic! }
val output = quote { Quill Magic! }
val yetiDS: Dataset[Yeti] = parquet("output/yeti").as[Yeti]
val addressesDS: Dataset[Address] = parquet("output/addresses").as[Address]
// Applicative Joins
yeti.join(addresses).on(_.caveId == _.id)
yeti.leftJoin(addresses).on(_.caveId == _.id)
// Implicit Joins
for {
y <- yeti
a <- addresses if (y.caveId == a.id)
} yield (y, a)
// Semi-Joins
val cavelessYeti = quote {
yeti.filter(y => !addresses.map(_.id).contains(y.caveId))
}
Some other stuff we can do...
Some other stuff we can do...
// Group-By
orders.groupBy(_.sku).map {
case (sku, orders) => (sku, orders.map(_.price).avg)
}
// Concat-Map
val nodesChildren = quote {
(ns: Query[Node]) => ns.concatMap(n => n.children)
}
// Union/UnionAll
val americansAndCanadians = quote {
americans.map(_.firstName) unionAll canadians.map(_.surname)
}
Some other stuff we can do...
// User Defined Aggregation Functions (UDAFs)
spark.udf.register("geomMean", new GeometricMean)
val geomMean = quote {
(q: Query[BigDecimal]) => infix"geomMean(${q})".as[BigDecimal]
}
orders.groupBy(_.sku).map {
case (sku, orders) => (sku, geomMean(orders.map(_.price)))
}
// Using Spark UDFs
spark.udf.register("businessLogicUdf", (str:String) => str + "-suffix")
val businessLogicUdf = quote {
(str: String) => infix"businessLogicUdf(${str})".as[String]
}
quote {
yeti.map(y => businessLogicUdf(y.gruntingSound))
}
Quill + Spark = Better Together
https://getquill.io/

https://github.com/getquill/quill

https://gitter.im/getquill/quill
...Try It Out!
libraryDependencies ++= Seq(
"io.getquill" %% "quill-spark" % "3.4.10"
)
<dependency>
<groupId>io.getquill</groupId>
<artifactId>quill-spark_2.12</artifactId>
<version>3.4.10</version>
</dependency>

More Related Content

PDF
Get started with Reason
PDF
Devs for Leokz e 7Masters - WTF Oriented Programming
PDF
Taking Perl to Eleven with Higher-Order Functions
PDF
WordPress 3.1 at DC PHP
PPT
An Elephant of a Different Colour: Hack
PDF
Writing Sensible Code
PDF
(안드로이드 개발자를 위한) 오픈소스 라이브러리 사용 가이드
PDF
Everything you always wanted to know about forms* *but were afraid to ask
Get started with Reason
Devs for Leokz e 7Masters - WTF Oriented Programming
Taking Perl to Eleven with Higher-Order Functions
WordPress 3.1 at DC PHP
An Elephant of a Different Colour: Hack
Writing Sensible Code
(안드로이드 개발자를 위한) 오픈소스 라이브러리 사용 가이드
Everything you always wanted to know about forms* *but were afraid to ask

What's hot (14)

PDF
Leveraging Symfony2 Forms
DOC
CBSE Class XII Comp sc practical file
PDF
COMP2021 Final Project - LightHTML
PDF
Reason - introduction to language and its ecosystem | Łukasz Strączyński
PDF
레진코믹스가 코틀린으로 간 까닭은?
PDF
Kotlin: Let's Make Android Great Again
PDF
WTF Oriented Programming, com Fabio Akita
PDF
Tablas, Codigos De Base De Datos
PDF
Sql commands
PDF
Functional Error Handling with Cats
PPTX
Hacking Your Way To Better Security - Dutch PHP Conference 2016
PDF
PHP object calisthenics
KEY
Datamapper @ Railsconf2010
PPTX
Working With JQuery Part1
Leveraging Symfony2 Forms
CBSE Class XII Comp sc practical file
COMP2021 Final Project - LightHTML
Reason - introduction to language and its ecosystem | Łukasz Strączyński
레진코믹스가 코틀린으로 간 까닭은?
Kotlin: Let's Make Android Great Again
WTF Oriented Programming, com Fabio Akita
Tablas, Codigos De Base De Datos
Sql commands
Functional Error Handling with Cats
Hacking Your Way To Better Security - Dutch PHP Conference 2016
PHP object calisthenics
Datamapper @ Railsconf2010
Working With JQuery Part1
Ad

Similar to Quill + Spark = Better Together (20)

PDF
Perl6 grammars
PPTX
CS 542 Controlling Database Integrity and Performance
PPTX
CS 542 Database Index Structures
PDF
Kotlin for Android Developers
PPTX
Php functions
PDF
Functional Principles for OO Developers
PDF
Feature-Engineering-Earth-Advocacy-Project-2015
PDF
The Art of Transduction
PPTX
Using R for Building a Simple and Effective Dashboard
PDF
Ruby Language - A quick tour
KEY
Writeable ct es_pgcon_may_2011
PPTX
[MongoDB.local Bengaluru 2018] Tutorial: Pipeline Power - Doing More with Mon...
KEY
(Ab)Using the MetaCPAN API for Fun and Profit
PPTX
JSON + MariaDB: Hybrid Model Best Practices
KEY
Metaprogramming in Haskell
PPTX
Connect() Mini 2016
PDF
DataMapper @ RubyEnRails2009
ODP
Beginning Scala Svcc 2009
PDF
SQL FILE FROM MOODLEUSE [master]GO Object Databa.pdf
Perl6 grammars
CS 542 Controlling Database Integrity and Performance
CS 542 Database Index Structures
Kotlin for Android Developers
Php functions
Functional Principles for OO Developers
Feature-Engineering-Earth-Advocacy-Project-2015
The Art of Transduction
Using R for Building a Simple and Effective Dashboard
Ruby Language - A quick tour
Writeable ct es_pgcon_may_2011
[MongoDB.local Bengaluru 2018] Tutorial: Pipeline Power - Doing More with Mon...
(Ab)Using the MetaCPAN API for Fun and Profit
JSON + MariaDB: Hybrid Model Best Practices
Metaprogramming in Haskell
Connect() Mini 2016
DataMapper @ RubyEnRails2009
Beginning Scala Svcc 2009
SQL FILE FROM MOODLEUSE [master]GO Object Databa.pdf
Ad

Recently uploaded (20)

PDF
Autodesk AutoCAD Crack Free Download 2025
PDF
DNT Brochure 2025 – ISV Solutions @ D365
PDF
AI-Powered Threat Modeling: The Future of Cybersecurity by Arun Kumar Elengov...
PPTX
AMADEUS TRAVEL AGENT SOFTWARE | AMADEUS TICKETING SYSTEM
PDF
Ableton Live Suite for MacOS Crack Full Download (Latest 2025)
PPTX
Log360_SIEM_Solutions Overview PPT_Feb 2020.pptx
PPTX
Introduction to Windows Operating System
PDF
Designing Intelligence for the Shop Floor.pdf
PDF
EaseUS PDF Editor Pro 6.2.0.2 Crack with License Key 2025
PPTX
Cybersecurity: Protecting the Digital World
PDF
Visual explanation of Dijkstra's Algorithm using Python
PDF
Time Tracking Features That Teams and Organizations Actually Need
PPTX
"Secure File Sharing Solutions on AWS".pptx
PDF
How Tridens DevSecOps Ensures Compliance, Security, and Agility
PPTX
assetexplorer- product-overview - presentation
PPTX
WiFi Honeypot Detecscfddssdffsedfseztor.pptx
PDF
How AI/LLM recommend to you ? GDG meetup 16 Aug by Fariman Guliev
PDF
CCleaner 6.39.11548 Crack 2025 License Key
PPTX
GSA Content Generator Crack (2025 Latest)
PPTX
Why Generative AI is the Future of Content, Code & Creativity?
Autodesk AutoCAD Crack Free Download 2025
DNT Brochure 2025 – ISV Solutions @ D365
AI-Powered Threat Modeling: The Future of Cybersecurity by Arun Kumar Elengov...
AMADEUS TRAVEL AGENT SOFTWARE | AMADEUS TICKETING SYSTEM
Ableton Live Suite for MacOS Crack Full Download (Latest 2025)
Log360_SIEM_Solutions Overview PPT_Feb 2020.pptx
Introduction to Windows Operating System
Designing Intelligence for the Shop Floor.pdf
EaseUS PDF Editor Pro 6.2.0.2 Crack with License Key 2025
Cybersecurity: Protecting the Digital World
Visual explanation of Dijkstra's Algorithm using Python
Time Tracking Features That Teams and Organizations Actually Need
"Secure File Sharing Solutions on AWS".pptx
How Tridens DevSecOps Ensures Compliance, Security, and Agility
assetexplorer- product-overview - presentation
WiFi Honeypot Detecscfddssdffsedfseztor.pptx
How AI/LLM recommend to you ? GDG meetup 16 Aug by Fariman Guliev
CCleaner 6.39.11548 Crack 2025 License Key
GSA Content Generator Crack (2025 Latest)
Why Generative AI is the Future of Content, Code & Creativity?

Quill + Spark = Better Together

  • 5. So What’s The Difference? • Abstraction • Encapsulation • Error Handling • Good Control Flow • Performance Application Development Languages Data Retrieval 
 Languages • Natural Expression • Possible Optimization • Good Control Flow • Performance
  • 6. They Make Different Tradeoffs! AbstractionPower Possible Optimizations Data Retrieval
 Languages Application Development Languages
  • 7. Example Please??? CREATE VIEW HelloAmerican AS SELECT 'Hello ' || t.firstName + ' ' || t.lastName + ' of ' || a.city FROM Americans t JOIN Addresses a on t.address_id == a.id -- Hello John James of New York CREATE VIEW HelloCanadian AS SELECT 'Hello ' + t.name + ' ' + t.surname + ' of ' + a.city FROM Canadians t JOIN Addresses a on t.residence_id == a.id -- Hello Jim Jones of Toronto CREATE VIEW HelloYeti AS SELECT 'Hello ' + t.gruntingSound + ' ' + t.roaringSound + ' of ' + a.city FROM AbominableShowmen t JOIN Addresses a on t.cave_id == a.id -- Hello Aaargalah Gralala of Kholat Syakhl
  • 8. CREATE FUNCTION AddressToSomeone ( @humanoidLivingSomewhere Table(called VARCHAR; alsoCalled VARCHAR; whereHeLives_id) ) SELECT 'Hello ' || t.called || ' ' || t.alsoCalled || ' of ' || a.city FROM @humanoidLivingSomewhere t JOIN Addresses a on t.whereHeLives_id == a.id CREATE VIEW AmericanClients AS SELECT * from AddressToSomeone( SELECT t.firstName as called, t.lastName as alsoCalled, a.address_id as whereHeLives_id FROM Americans ) CREATE VIEW CanadianClients AS SELECT * from AddressToSomeone( SELECT t.name as called, t.surname as alsoCalled, a.residence_id as whereHeLives_id FROM Canadians ) CREATE VIEW YetiClients AS SELECT * from AddressToSomeone( SELECT t.gruntingSound as called, t.roaringSound as alsoCalled, a.cave_id as whereHeLives_id FROM AbominableShowmen )
  • 9. CREATE FUNCTION concatName ( @called VARCHAR; @alsoCalled VARCHAR; @whereHeLives_id) ) AS 'Hello ' || t.called || ' ' || t.alsoCalled || ' of ' || a.city SELECT concatName(t.firstName, t.lastName, a.city) FROM American t JOIN Addresses a on t.whereHeLives_id == a.id SELECT concatName(t.name, t.surname, a.city) FROM American t JOIN Addresses a on t.whereHeLives_id == a.id SELECT concatName(t.gruntingSound, t.roaringSound, a.city) FROM American t JOIN Addresses a on t.whereHeLives_id == a.id
  • 10. CREATE FUNCTION concatName ( @called VARCHAR; @alsoCalled VARCHAR; @whereHeLives_id) ) AS 'Hello ' || t.called || ' ' || t.alsoCalled || ' of ' || a.city CREATE FUNCTION AddressToSomeone ( @humanoidLivingSomewhere Table(called VARCHAR; alsoCalled VARCHAR; whereHeLives_id) ) SELECT 'Hello ' ||t .called || ' ' || t.alsoCalled || ' of ' || a.city, CASE WHEN zd.zone_type = 'K' THEN 'StandardCategory' WHEN zd.zone_type = 'N' AND rc.barbaz = 'GT' THEN 'NonStandardCategory' ELSE 'UnknownCategory' END as zoning_category1, CASE WHEN ru.kdd = 'IK' THEN 'Insanity' WHEN zd.kdd = 'N' AND rc.barbaz = 'GTT' THEN 'MoreInsanity' ELSE 'I_Dont_Even_Know_What_Goes_Here' END as zoning_category2 FROM @humanoidLivingSomewhere t JOIN Addresses a on t.whereHeLives_id = a.id JOIN ResidenceUnit ru on a.rid = ru.id JOIN ResidenceClass rc on ru.class_id = rc.class_id JOIN ZoningDesignation zd on ru.zone_id = zd.rzid and zd.cid = rc.class_id SELECT concatName(t.firstName, t.lastName, a.city) FROM American t JOIN Addresses a on t.whereHeLives_id == a.id SELECT concatName(t.name, t.surname, a.city) FROM American t JOIN Addresses a on t.whereHeLives_id == a.id SELECT concatName(t.gruntingSound, t.roaringSound, a.city) FROM American t JOIN Addresses a on t.whereHeLives_id == a.id
  • 11. CREATE FUNCTION AddressToSomeone ( @humanoidLivingSomewhere Table(called VARCHAR; alsoCalled VARCHAR; whereHeLives_id) ) SELECT 'Hello ' || t.called || ' ' || t.alsoCalled || ' of ' || a.city FROM @humanoidLivingSomewhere t JOIN Addresses a on t.whereHeLives_id == a.id
  • 12. CREATE FUNCTION AddressToSomeone ( @humanoidLivingSomewhere Table(called VARCHAR; alsoCalled VARCHAR; whereHeLives_id) ) SELECT 'Hello ' || t.called || ' ' || t.alsoCalled || ' of ' || a.city FROM @humanoidLivingSomewhere t JOIN Addresses a on t.whereHeLives_id == a.id WHERE a.current = true DataFrame Can! def addressToSomeone(df: DataFrame) = { df.as("t") .join(addresses.as("a"), $"whereHeLivesId" === $"id") .select( concat(lit("Hello "), $"t.called", lit(" "), $"t.alsoCalled", lit(" of "), $"a.city")) .filter($"a.current" === lit(true)) }
  • 13. DataFrame Can! addressToSomeone( americans.select($"firstName" as "called", $"lastName" as "alsoCalled", $"address_id" as "whereHeLives_id") ) addressToSomeone( canadians.select($"name" as "called", $"surname" as "alsoCalled", $"residence_id" as "whereHeLives_id") ) addressToSomeone( yeti.select($"gruntSound" as "called", $"roarSound" as "alsoCalled", $"cave_id" as "whereHeLives_id") ) def addressToSomeone(df: DataFrame) = { df.as("t") .join(addresses.as("a"), $"id" === $"whereHeLives_id") .select( concat(lit("Hello "), $"t.called", lit(" "), $"t.alsoCalled", lit(" of "), $"a.city")) .filter($"a.current" === lit(true)) }
  • 14. DataFrame Can… addressToSomeone( americans.select($"firstName" as "called", $"lastName" as "alsoCalled", $"address_id" as "whereHeLives_id") ) addressToSomeone( canadians.select($"name" as "called", $"surname" as "alsoCalled", $"residence_id" as "whereHeLives_id") ) addressToSomeone( yeti.select($"gruntSound" as "called", $"roarSound" as "alsoCalled", $"cave_id" as "whereHeLives_id") ) def addressToSomeone(df: DataFrame) = { df.as("t") .join(addresses.as("a"), $"whereHeLives_id" === $"id") .select( concat(lit("Hello "), $"t.called", lit(" "), $"t.alsoCalled", lit(" of "), $"a.city")) .filter($"a.current" === lit(true)) }
  • 15. … Hurt! def insaneJoin(df: DataFrame) = df.as("t") .join(addresses.as("a"), $"t.whereHeLives_id" === $"a.id") .join(residenceUnit.as("ru"), $"a.rid" === $"ru.id") .join(residenceClass.as("rc"), $"ru.class_id" === $"rc.class_id") .join(zoningDesignation.as("zd"), ($"ru.zone_id" === "zd.rid") && ($"zd.cid" === $"rc.class_id") ) .select( concat( lit("Hello "), $"t.called", lit(" "), $"t.alsoCalled", lit(" of "), $"a.city"), when($"zd.zone_type" === lit("K"), "StandardCategory") .when(($"zd.zone_type" === lit("N")) && ($"rc.barbaz" === lit("GT")), "NonStandardCategory") .otherwise("UnknownCategory") .as("zoning_category1"), when($"ru.kdd" === lit("IK"), "Insanity") .when(($"zd.kdd" === lit("N")) && ($"rc.barbaz" === lit("GTT")), "MoreInsanity") .otherwise("I_Dont_Even_Know_What_Goes_Here") .as("zoning_category2") )
  • 16. … Hurt! def insaneJoin(df: DataFrame) = df.as("t") .join(addresses.as("a"), $"t.whereHeLives_id" === $"a.id") .join(residenceUnit.as("ru"), $"a.rid" === $"ru.id") .join(residenceClass.as("rc"), $"ru.class_id" === $"rc.class_id") .join(zoningDesignation.as("zd"), ($"ru.zone_id" === "zd.rzid") && ($"zd.cid" === $"rc.class_id") ) .select( concat( lit("Hello "), $"t.called", lit(" "), $"t.alsoCalled", lit(" of "), $"a.city"), when($"zd.zone_type" === lit("K"), "StandardCategory") .when(($"zd.zone_type" === lit("N")) && ($"rc.barbaz" === lit("GT")), "NonStandardCategory") .otherwise("UnknownCategory") .as("zoning_category1"), when($"ru.kdd" === lit("IK"), "Insanity") .when(($"zd.kdd" === lit("N")) && ($"rc.barbaz" === lit("GTT")), "MoreInsanity") .otherwise("I_Dont_Even_Know_What_Goes_Here") .as("zoning_category2") )
  • 18. case class HumanoidLivingSomewhere( called:String, alsoCalled: String, whereHeLives_id:Int )
  • 19. Wait… Dataset? def insaneJoin(humanoidsLivingSomewhere: Dataset[HumanoidLivingSomewhere]) = humanoidsLivingSomewhere.as("t") .joinWith(addresses.as("a"), $"whereHeLives_id" === $"id") .joinWith(residenceUnit.as("ru"), $"a.rid" === $"ru.id") .joinWith(residenceClass.as("rc"), $"ru.class_id" === $"rc.class_id") .joinWith(zoningDesignation.as("zd"), ($"ru.zone_id" === "zd.rzid") && ($"zd.cid" === $"ru.class_id") ) .map { case ((((t, a), ru), rc), zd) => ( s"Hello ${t.called} ${t.alsoCalled} of ${a.city}", if (zd.zone_type == "K") "StandardCategory" else if (zd.zone_type == "N" && rc.barbaz == "GT") "NonStandardCategory" else "UnknownCategory", if (ru.kdd == "IK") "Insanity" else if (zd.kdd == "N" && rc.barbaz == "GT") "MoreInsanity" else "I_Dont_Even_Know_What_Goes_Here" ) }
  • 20. Wait… Dataset? def insaneJoin(humanoidsLivingSomewhere: Dataset[HumanoidLivingSomewhere]) = humanoidsLivingSomewhere.as("t") .joinWith(addresses.as("a"), $"whereHeLives_id" === $"id") .joinWith(residenceUnit.as("ru"), $"_2.rid" === $"ru.id") .joinWith(residenceClass.as("rc"), $"_2.class_id" === $"rc.class_id") .joinWith(zoningDesignation.as("zd"), ($"_1._2.zone_id" === "zd.rzid") && ($"zd.cid" === $"_1._2.class_id") ) .map { case ((((t, a), ru), rc), zd) => ( s"Hello ${t.called} ${t.alsoCalled} of ${a.city}", if (zd.zone_type == "K") "StandardCategory" else if (zd.zone_type == "N" && rc.barbaz == "GT") "NonStandardCategory" else "UnknownCategory", if (ru.kdd == "IK") "Insanity" else if (zd.kdd == "N" && rc.barbaz == "GT") "MoreInsanity" else "I_Dont_Even_Know_What_Goes_Here" ) }
  • 21. Wait… Dataset? def insaneJoin(humanoidsLivingSomewhere: Dataset[HumanoidLivingSomewhere]) = humanoidsLivingSomewhere.as("t") .joinWith(addresses.as("a"), $"whereHeLives_id" === $"id") .joinWith(residenceUnit.as("ru"), $"_2.rid" === $"ru.id") .joinWith(residenceClass.as("rc"), $"_2.class_id" === $"rc.class_id") .joinWith(zoningDesignation.as("zd"), ($"_1._2.zone_id" === "zd.rzid") && ($"zd.cid" === $"_1._2.class_id") ) .map { case ((((t, a), ru), rc), zd) => ( s"Hello ${t.called} ${t.alsoCalled} of ${a.city}", if (zd.zone_type == "K") "StandardCategory" else if (zd.zone_type == "N" && rc.barbaz == "GT") "NonStandardCategory" else "UnknownCategory", if (ru.kdd == "IK") "Insanity" else if (zd.kdd == "N" && rc.barbaz == "GT") "MoreInsanity" else "I_Dont_Even_Know_What_Goes_Here" ) } Dataset[(HumanoidLivingSomewhere, Address)]
  • 22. Wait… Dataset? def insaneJoin(humanoidsLivingSomewhere: Dataset[HumanoidLivingSomewhere]) = humanoidsLivingSomewhere.as("t") .joinWith(addresses.as("a"), $"whereHeLives_id" === $"id") .joinWith(residenceUnit.as("ru"), $"_2.rid" === $"ru.id") .joinWith(residenceClass.as("rc"), $"_2.class_id" === $"rc.class_id") .joinWith(zoningDesignation.as("zd"), ($"_1._2.zone_id" === "zd.rzid") && ($"zd.cid" === $"_1._2.class_id") ) .map { case ((((t, a), ru), rc), zd) => ( s"Hello ${t.called} ${t.alsoCalled} of ${a.city}", if (zd.zone_type == "K") "StandardCategory" else if (zd.zone_type == "N" && rc.barbaz == "GT") "NonStandardCategory" else "UnknownCategory", if (ru.kdd == "IK") "Insanity" else if (zd.kdd == "N" && rc.barbaz == "GT") "MoreInsanity" else "I_Dont_Even_Know_What_Goes_Here" ) } Dataset[ (((HumanoidLivingSomewhere, Address), ResidenceUnit), ResidenceClass) ]
  • 23. def insaneJoin(humanoidsLivingSomewhere: Dataset[HumanoidLivingSomewhere]) = humanoidsLivingSomewhere.as("t") .joinWith(addresses.as("a"), $"whereHeLives_id" === $"id") .joinWith(residenceUnit.as("ru"), $"_2.rid" === $"ru.id") .joinWith(residenceClass.as("rc"), $"_2.class_id" === $"rc.class_id") .joinWith(zoningDesignation.as("zd"), ($"_1._2.zone_id" === "zd.rzid") && ($"zd.cid" === $"_1._2.class_id") ) .map { case ((((t, a), ru), rc), zd) => ( s"Hello ${t.called} ${t.alsoCalled} of ${a.city}", if (zd.zone_type == "K") "StandardCategory" else if (zd.zone_type == "N" && rc.barbaz == "GT") "NonStandardCategory" else "UnknownCategory", if (ru.kdd == "IK") "Insanity" else if (zd.kdd == "N" && rc.barbaz == "GT") "MoreInsanity" else "I_Dont_Even_Know_What_Goes_Here" ) }
  • 24. case class American( firstName:String, lastName:String, address_id:Int, irrelevantP1:String... irrelevantP100:String ) case class Canadian( name:String, surname:String, residence_id:Int, irrelevantP1:String... irrelevantP100:String ) case class Yeti( gruntingSound:String, roaringSound:String, address_id:Int, irrelevantP1:String... irrelevantP100:String ) Say There's Stuff We Don't Care About case class Address( id:Int, street:String, city:String, current: Boolean irrelevantA1:String... irrelevantA100:String ) case class HumanoidLivingSomewhere( called:String, alsoCalled: String, whereHeLives_id:Int )
  • 25. Let's Plug it In! def addressToSomeone(humanoidLivingSomewhere: DataFrame) = { humanoidLivingSomewhere.as("t") .join(addresses.as("a"), $"whereHeLivesId" === $"id") .select( concat(lit("Hello "), $"t.called", lit(" "), $"t.alsoCalled", lit(" of "), $"a.city")) .filter($"a.current" === lit(true)) } addressToSomeone( americans.select( $"firstName" as "called", $"lastName" as "alsoCalled", $"address_id" as "whereHeLives_id") ) americans .select($"firstName" as "called", $"lastName" as "alsoCalled", $"address_id" as "whereHeLives_id") .as("t") .join(addresses.as("a"), $"whereHeLivesId" === $"id") .select( concat(lit("Hello "), $"t.called", lit(" "), $"t.alsoCalled", lit(" of "), $"a.city")) .filter($"a.current" === lit(true))
  • 26. Explain This! *(5) Project [concat(Hello , called, , alsoCalled, of , city)] +- *(5) SortMergeJoin [whereHeLives_id], [id], Inner +- Exchange hashpartitioning(whereHeLives_id) +- *(1) Project [firstName AS called, ... AS whereHeLives_id] +- *(1) FileScan parquet [firstName,lastName,address_id] +- Exchange hashpartitioning(id) +- *(3) FileScan parquet [id,city,current] PushedFilters: [EqualTo(current,true)], americans .select($"firstName" as "called", $"lastName" as "alsoCalled", $"address_id" as "whereHeLives_id") .as("t") .join(addresses.as("a"), $"whereHeLivesId" === $"id") .select( concat(lit("Hello "), $"t.called", lit(" "), $"t.alsoCalled", lit(" of "), $"a.city")) .filter($"a.current" === lit(true)) .explain()
  • 27. SQL Does the Same Thing spark.sql( """ |select concat('Hello ', t.called, ' ', t.alsoCalled, ' of ', a.city) as _1 |from ( | select firstName as called, lastName as alsoCalled, address_id as whereHeLives_id | from americans |) as t |join addresses a on (t.whereHeLives_id = a.id) |where a.current = true |""".stripMargin ) .explain() *(5) Project [concat(Hello , called, , alsoCalled, of , city)] +- *(5) SortMergeJoin [whereHeLives_id], [id], Inner +- Exchange hashpartitioning(whereHeLives_id) +- *(1) Project [firstName AS called, ... AS whereHeLives_id] +- *(1) FileScan parquet [firstName,lastName,address_id] +- Exchange hashpartitioning(id) +- *(3) FileScan parquet [id,city,current] PushedFilters: [EqualTo(current,true)],
  • 28. *(5) Project [concat(Hello , called, , alsoCalled, of , city)] +- *(5) SortMergeJoin [whereHeLives_id], [id], Inner +- Exchange hashpartitioning(whereHeLives_id) +- *(1) Project [firstName AS called, ... AS whereHeLives_id] +- *(1) FileScan parquet [firstName,lastName,address_id] +- Exchange hashpartitioning(id) +- *(3) FileScan parquet [id,city,current] PushedFilters: [EqualTo(current,true)], americans .select($"firstName" as "called", $"lastName" as "alsoCalled", $"address_id" as "whereHeLives_id") .as("t") .join(addresses.as("a"), $"whereHeLivesId" === $"id") .select( concat(lit("Hello "), $"t.called", lit(" "), $"t.alsoCalled", lit(" of "), $"a.city")) .filter($"a.current" === lit(true)) .explain() Explain This!
  • 29. In English Please? *(🤞) Gimme My Result! [concat(Hello , called, , alsoCalled, of , city)] +- *(💂) We're Joining! Huzzah! [whereHeLives_id], [id], Inner +- Join Key for the Left Side! (whereHeLives_id) +- *(1) Rename these like I said! Pronto! [firstName as Called... ] +- *(😇) I'm a smart format, load only: [firstName,lastName,address_id] +- Join Key for the Right Side! (id) +- *(😇) I'm a smart format, load only: [id,city,current] Read only current addr. from the file! 😎: [EqualTo(current,true)], americans .select($"firstName" as "called", $"lastName" as "alsoCalled", $"address_id" as "whereHeLives_id") .as("t") .join(addresses.as("a"), $"whereHeLivesId" === $"id") .select( concat(lit("Hello "), $"t.called", lit(" "), $"t.alsoCalled", lit(" of "), $"a.city")) .filter($"a.current" === lit(true)) .explain()
  • 30. How About Dataset? def addressToSomeone(humanoidsLivingSomewhere: Dataset[HumanoidLivingSomewhere]) = { humanoidsLivingSomewhere .joinWith(addresses, $"id" === $"whereHeLivesId") .filter(ta => ta._2.current == true) .map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" } } val americanClients = addressToSomeone( americans.map(a => HumanoidLivingSomewhere(a.firstName, a.lastName, a.addressId)) ) americans.map(a => HumanoidLivingSomewhere(a.firstName, a.lastName, a.addressId)) .joinWith(addresses, $"id" === $"whereHeLivesId") .filter(ta => ta._2.current == true) .map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" }
  • 31. Explain This... Please! *(3) SerializeFromObject [UTF8String] +- *(3) MapElements java.lang.String +- DeserializeToObject newInstance(Tuple2) +- SortMergeJoin [_1.whereHeLives_id], [_2.id], Inner +- Exchange hashpartitioning(_1.whereHeLives_id) +- *(1) Project [called, alsoCalled, whereHeLives_id] +- *(1) SerializeFromObject [UTF8String] +- *(1) MapElements HumanoidLivingSomewhere +- DeserializeToObject newInstance(American) +- FileScan parquet [firstName,lastName,address_id,irrelevantP1,irrelevantP2,i +- Exchange hashpartitioning(_2.id) +- FileScan parquet [id,street,city,current,irrelevantA1,irrelevantA2,irrelevantA3,irrel PushedFilters: [] americans.map(a => HumanoidLivingSomewhere(a.firstName, a.lastName, a.addressId)) .joinWith(addresses, $"id" === $"whereHeLivesId") .filter(ta => ta._2.current == true) .map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" } .explain()
  • 32. *(🤮) Serialize Back Into a String Expensive! +- *(3) Do the Outer Map that we Invoked +- (🤮) Deserialize Tuple2 Expensive! +- & We're Joining! Huzzah! [_1.whereHeLives_id], [_2.id], Inner +- Join Key for the Left Side (_1.whereHeLives_id) +- *(1) Project [called, alsoCalled, whereHeLives_id] +- *(🤮) Serialize the Join Key. Expensive! +- *(1) MapElements HumanoidLivingSomewhere +- (🤮) Deserialize into a JVM Object (i.e. class American) +- Scan All 'American' Columns Including 100 irrelevant ones!😱 +- Join Key for the Right Side (_2.id) +- Scan All 'Address' Columns Including 100 irrelevant ones! 😱 We Need to Read The Entire Dataset! No Excluding Non-Current Addresses 😢 americans.map(a => HumanoidLivingSomewhere(a.firstName, a.lastName, a.addressId)) .joinWith(addresses, $"id" === $"whereHeLivesId") .filter(ta => ta._2.current == true) .map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" } .explain()
  • 33. americans.map(a => HumanoidLivingSomewhere(a.firstName, a.lastName, a.addressId)) .joinWith(addresses, $"id" === $"whereHeLivesId") .filter(ta => ta._2.current == true) .map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" }
  • 34. val americanClients = americans.map(a => HumanoidLivingSomewhere(a.firstName, a.lastName, a.address_id) ) .joinWith(addresses, $"whereHeLives_id" === $"id") .filter { tup => tup._2.current == true } .map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" } What We See: What Catalyst Sees: val americanClients = americans.map(¯_(ツ)_/¯ : HumanoidLivingSomewhere) .joinWith(addresses, $"whereHeLives_id" === $"id") .filter { ¯_(ツ)_/¯ : Boolean } .map { ¯_(ツ)_/¯ : String }
  • 35. val americanClients = americans.map(a => HumanoidLivingSomewhere(a.firstName, a.lastName, a.address_id) ) .joinWith(addresses, $"whereHeLives_id" === $"id") .filter { tup => tup._2.current == true } .map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" } What We See: What Catalyst Sees: val americanClients = americans.map(¯_(ツ)_/¯ : HumanoidLivingSomewhere) .joinWith(addresses, $"whereHeLives_id" === $"id") .filter { ¯_(ツ)_/¯ : Boolean } .map { ¯_(ツ)_/¯ : String } Which Columns are we using in here? Which Columns are we using in here? Which Columns are we using in here? ⏸
  • 36. val americanClients = americans.map(a => HumanoidLivingSomewhere(a.firstName, a.lastName, a.address_id) ) .joinWith(addresses, $"whereHeLives_id" === $"id") .filter { tup => tup._2.current == true } .map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" } What We See: What Catalyst Sees: val americanClients = americans.map(¯_(ツ)_/¯ : HumanoidLivingSomewhere) .joinWith(addresses, $"whereHeLives_id" === $"id") .filter { ¯_(ツ)_/¯ : Boolean } .map { ¯_(ツ)_/¯ : String } I Guess We Need
 All Of Them! I Guess We Need
 All Of Them! I Guess We Need
 All Of Them! ⏸
  • 37. val americanClients = americans.map(a => HumanoidLivingSomewhere(a.firstName, a.lastName, a.address_id) ) .joinWith(addresses, $"whereHeLives_id" === $"id") .filter { tup => tup._2.current == true } .map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" } What We See: What Catalyst Sees: val americanClients = americans.map(¯_(ツ)_/¯ : HumanoidLivingSomewhere) .joinWith(addresses, $"whereHeLives_id" === $"id") .filter { ¯_(ツ)_/¯ : Boolean } .map { ¯_(ツ)_/¯ : String }
  • 38. val americanClients = americans.map(a => HumanoidLivingSomewhere(a.firstName, a.lastName, a.address_id) ) .joinWith(addresses, $"whereHeLives_id" === $"id") .filter { tup => tup._2.current == true } .map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" } What We See: What Catalyst Sees: val americanClients = americans.map(¯_(ツ)_/¯ : HumanoidLivingSomewhere) .joinWith(addresses, ¯_(ツ)_/¯) .filter { ¯_(ツ)_/¯ : Boolean } .map { ¯_(ツ)_/¯ : String } ⏸ What columns am I joining by???
  • 39. val americanClients = americans.map(a => HumanoidLivingSomewhere(a.firstName, a.lastName, a.address_id) ) .joinWith(addresses, $"whereHeLives_id" === $"id") .filter { tup => tup._2.current == true } .map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" } What We See: What Catalyst Sees: val americanClients = americans.map( (🖼 American) => HumanoidLivingSomewhere 🖼 ) 🤮 .joinWith(addresses, $"whereHeLives_id" === $"id") .filter { (🖼 HumanoidLivingSomewhere) => Boolean } 🤮 .map { (🖼 HumanoidLivingSomewhere) => String 🖼 } 🤮 → → → → →
  • 40. americans.map(a => HumanoidLivingSomewhere(a.firstName, a.lastName, a.addressId)) .joinWith(addresses, $"id" === $"whereHeLivesId") .filter(ta => ta._2.current == true) .map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" }
  • 41. def addressToSomeone(humanoid: TypedDataset[HumanoidLivingSomewhere]) = { val joined = humanoid .joinInner(addresses) { humanoid('where) === addresses('id) } joined.select(concat( lit("Hello "), joined.colMany('_1, 'called), lit(" "), joined.colMany('_1, 'alsoCalled), lit(" of "), joined.colMany('_2, 'city))) } What About Frameless? addressToSomeone( americans.select(americans('firstName), americans('lastName), americans('addressId)) .deserialized.map{ case (name, age, whereHeLives_id ) => HumanoidLivingSomewhere( name.asInstanceOf[String], age.asInstanceOf[String], whereHeLives_id.asInstanceOf[Int]) } )
  • 42. What About Frameless? def addressToSomeone(humanoid: TypedDataset[HumanoidLivingSomewhere]) = { val joined = humanoid .joinInner(addresses) { humanoid('where) === addresses('id) } joined.select(concat( lit("Hello "), joined.colMany('_1, 'called), lit(" "), joined.colMany('_1, 'alsoCalled), lit(" of "), joined.colMany('_2, 'city))) } addressToSomeone( americans.select(americans('firstName), americans('lastName), americans('addressId)) .deserialized.map{ case (name, age, whereHeLives_id ) => HumanoidLivingSomewhere( name.asInstanceOf[String], age.asInstanceOf[String], whereHeLives_id.asInstanceOf[Int]) } )
  • 43. What About Frameless? def addressToSomeone(humanoid: TypedDataset[HumanoidLivingSomewhere]) = { val joined = humanoid .joinInner(addresses) { humanoid('where) === addresses('id) } joined.select(concat( lit("Hello "), joined.colMany('_1, 'called), lit(" "), joined.colMany('_1, 'alsoCalled), lit(" of "), joined.colMany('_2, 'city))) }
  • 44. def addressToSomeone(humanoid: TypedDataset[HumanoidLivingSomewhere]) = { val j1 = humanoid.joinInner(addresses) { humanoid('whereHeLives_id) === addresses('id) } val j2 = j1.joinInner(residenceUnit) { j1.colMany('_2, 'rid) === residenceUnit('id) } val j3 = j2.joinInner(residenceClass) { j2.colMany('_2, 'class_id) === residenceClass('class_id) } val j4 = j3.joinInner(zoningDesignation) { (j3.colMany('_1, '_2, 'zone_id) === zoningDesignation('rzid)) && (zoningDesignation('rzid) === j3.colMany('_1, '_, 'class_id)) } type OUT = ((((HumanoidLivingSomewhere, Address), ResidenceUnit), ResidenceClass), ZoningDesignation) j4.select( concat( lit("Hello "), j4.colMany('_1, '_1, '_1, '_1, 'called), lit(" "), j4.colMany('_1, '_1, '_1, '_1, 'alsoCalled), lit(" of "), j4.colMany('_1, '_1, '_1, '_2, 'city) ), when(j4.colMany('_2, 'zone_type) === "K", lit[String, OUT]("StandardCategory")) .when((j4.colMany('_2, 'zone_type) === "N") && (j4.colMany('_1, '_2, 'barbaz) === "GT"), lit("NonStandardCategory")) .otherwise( lit("UnknownCategory")), when(j4.colMany('_1, '_1, '_2, 'kdd) === "IK", lit[String, OUT]("Insanity")) .when((j4.colMany('_2, 'kdd) === "N") && (j4.colMany('_1, '_2, 'barbaz) === "GTT"), lit("MoreInsanity")) .otherwise(lit("I_Dont_Even_Know_What_Goes_Here")) ) } What Sub-Tuple is class_id inside of?
  • 45. def addressToSomeone(humanoid: TypedDataset[HumanoidLivingSomewhere]) = { val j1 = humanoid.joinInner(addresses) { humanoid('whereHeLives_id) === addresses('id) } val j2 = j1.joinInner(residenceUnit) { j1.colMany('_2, 'rid) === residenceUnit('id) } val j3 = j2.joinInner(residenceClass) { j2.colMany('_2, 'class_id) === residenceClass('class_id) } val j4 = j3.joinInner(zoningDesignation) { (j3.colMany('_1, '_2, 'zone_id) === zoningDesignation('rzid)) && (zoningDesignation('rzid) === j3.colMany('_1, '_, 'class_id)) } type OUT = ((((HumanoidLivingSomewhere, Address), ResidenceUnit), ResidenceClass), ZoningDesignation) j4.select( concat( lit("Hello "), j4.colMany('_1, '_1, '_1, '_1, 'called), lit(" "), j4.colMany('_1, '_1, '_1, '_1, 'alsoCalled), lit(" of "), j4.colMany('_1, '_1, '_1, '_2, 'city) ), when(j4.colMany('_2, 'zone_type) === "K", lit[String, OUT]("StandardCategory")) .when((j4.colMany('_2, 'zone_type) === "N") && (j4.colMany('_1, '_2, 'barbaz) === "GT"), lit("NonStandardCategory")) .otherwise( lit("UnknownCategory")), when(j4.colMany('_1, '_1, '_2, 'kdd) === "IK", lit[String, OUT]("Insanity")) .when((j4.colMany('_2, 'kdd) === "N") && (j4.colMany('_1, '_2, 'barbaz) === "GTT"), lit("MoreInsanity")) .otherwise(lit("I_Dont_Even_Know_What_Goes_Here")) ) } What Sub-Tuple is class_id inside of? Dataset[ (((HumanoidLivingSomewhere, Address), ResidenceUnit), ResidenceClass) ]
  • 46. def addressToSomeone(humanoid: TypedDataset[HumanoidLivingSomewhere]) = { val j1 = humanoid.joinInner(addresses) { humanoid('whereHeLives_id) === addresses('id) } val j2 = j1.joinInner(residenceUnit) { j1.colMany('_2, 'rid) === residenceUnit('id) } val j3 = j2.joinInner(residenceClass) { j2.colMany('_2, 'class_id) === residenceClass('class_id) } val j4 = j3.joinInner(zoningDesignation) { (j3.colMany('_1, '_2, 'zone_id) === zoningDesignation('rzid)) && (zoningDesignation('rzid) === j3.colMany('_1, '_, 'class_id)) } type OUT = ((((HumanoidLivingSomewhere, Address), ResidenceUnit), ResidenceClass), ZoningDesignation) j4.select( concat( lit("Hello "), j4.colMany('_1, '_1, '_1, '_1, 'called), lit(" "), j4.colMany('_1, '_1, '_1, '_1, 'alsoCalled), lit(" of "), j4.colMany('_1, '_1, '_1, '_2, 'city) ), when(j4.colMany('_2, 'zone_type) === "K", lit[String, OUT]("StandardCategory")) .when((j4.colMany('_2, 'zone_type) === "N") && (j4.colMany('_1, '_2, 'barbaz) === "GT"), lit("NonStandardCategory")) .otherwise( lit("UnknownCategory")), when(j4.colMany('_1, '_1, '_2, 'kdd) === "IK", lit[String, OUT]("Insanity")) .when((j4.colMany('_2, 'kdd) === "N") && (j4.colMany('_1, '_2, 'barbaz) === "GTT"), lit("MoreInsanity")) .otherwise(lit("I_Dont_Even_Know_What_Goes_Here")) ) } Also... What's This???
  • 47. [error] found : frameless.TypedColumn[Nothing,String] [error] required: frameless.AbstractTypedColumn[((((org.ctl.complex.HumanoidLivingSomewhere, org.ctl.complex.Address), org.ctl.complex.ResidenceUnit), org.ctl.complex.ResidenceClass), org.ctl.complex.ZoningDesignation),String] [error] Note: Nothing <: ((((org.ctl.complex.HumanoidLivingSomewhere, org.ctl.complex.Address), org.ctl.complex.ResidenceUnit), org.ctl.complex.ResidenceClass), org.ctl.complex.ZoningDesignation), but class AbstractTypedColumn is invariant in type T. [error] You may wish to define T as +T instead. (SLS 4.5) [error] when(j4.colMany('_2, 'zone_type) === "K", lit("StandardCategory"))
  • 48. DataFrame/SQL Untyped 😢 Column Pruning Filter Pushdown 😎 Dataset Almost Typed 😕 Extra Serialization 🤮 Frameless Typed 😃 Very Complex if you don't know Shapeless.
  • 51. ? Scala Code SQL DataFrame
  • 52. ? Scala Code SQL DataFrame TypecheckingPower Possible Optmizations
  • 53. Scala Code SQL Quill DataFrame quote { scala-syntax-tree } AST Macro
  • 54. Scala Code SQL Quill quote { scala-syntax-tree } AST Macro
  • 55. Query[R] SQL Quill quote { Query[R] } AST Macro Quoted[Query[R]]
  • 58. CREATE FUNCTION AddressToSomeone ( @humanoid Table( called VARCHAR; alsoCalled VARCHAR; whereHeLives_id ) ) SELECT 'Hello ' || h.called || ' ' || h.alsoCalled || ' of ' || a.city FROM @humanoid h JOIN Addresses a ON h.whereHeLives_id == a.id WHERE a.current == true case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[?]) => for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) }
  • 59. CREATE FUNCTION AddressToSomeone ( @humanoid Table( called VARCHAR; alsoCalled VARCHAR; whereHeLives_id ) ) SELECT 'Hello ' || h.called || ' ' || h.alsoCalled || ' of ' || a.city FROM @humanoid h JOIN Addresses a ON h.whereHeLives_id == a.id WHERE a.current == true case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) }
  • 60. CREATE FUNCTION AddressToSomeone ( @humanoid Table( called VARCHAR; alsoCalled VARCHAR; whereHeLives_id ) ) SELECT 'Hello ' || h.called || ' ' || h.alsoCalled || ' of ' || a.city FROM @humanoid h JOIN Addresses a ON h.whereHeLives_id == a.id WHERE a.current == true case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( ) "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) } Query [Humanoid] Humanoid Query [Address] Address ⏸
  • 61. CREATE FUNCTION AddressToSomeone ( @humanoid Table( called VARCHAR; alsoCalled VARCHAR; whereHeLives_id ) ) SELECT 'Hello ' || h.called || ' ' || h.alsoCalled || ' of ' || a.city FROM @humanoid h JOIN Addresses a ON h.whereHeLives_id == a.id WHERE a.current == true case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) }
  • 62. CREATE FUNCTION AddressToSomeone ( @humanoid Table( called VARCHAR; alsoCalled VARCHAR; whereHeLives_id ) ) SELECT 'Hello ' || h.called || ' ' || h.alsoCalled || ' of ' || a.city FROM @humanoid h JOIN Addresses a ON h.whereHeLives_id == a.id WHERE a.current == true case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) }
  • 63. CREATE FUNCTION AddressToSomeone ( @humanoid Table( called VARCHAR; alsoCalled VARCHAR; whereHeLives_id ) ) SELECT 'Hello ' || h.called || ' ' || h.alsoCalled || ' of ' || a.city FROM @humanoid h JOIN Addresses a ON h.whereHeLives_id == a.id WHERE a.current == true case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses if ( a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) }
  • 64. CREATE FUNCTION AddressToSomeone ( @humanoid Table( called VARCHAR; alsoCalled VARCHAR; whereHeLives_id ) ) SELECT 'Hello ' || h.called || ' ' || h.alsoCalled || ' of ' || a.city FROM @humanoid h JOIN Addresses a ON h.whereHeLives_id == a.id WHERE a.current == true case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) }
  • 65. CREATE FUNCTION AddressToSomeone ( @humanoid Table( called VARCHAR; alsoCalled VARCHAR; whereHeLives_id ) ) SELECT 'Hello ' || h.called || ' ' || h.alsoCalled || ' of ' || a.city FROM @humanoid h LEFT JOIN Addresses a ON h.whereHeLives_id == a.id WHERE a.current == true case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses.leftJoin(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) }
  • 66. CREATE FUNCTION AddressToSomeone ( @humanoid Table( called VARCHAR; alsoCalled VARCHAR; whereHeLives_id ) ) SELECT 'Hello ' || h.called || ' ' || h.alsoCalled || ' of ' || a.city FROM @humanoid h LEFT JOIN Addresses a ON h.whereHeLives_id == a.id WHERE a.current == true case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses.leftJoin(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) } Address
  • 67. CREATE FUNCTION AddressToSomeone ( @humanoid Table( called VARCHAR; alsoCalled VARCHAR; whereHeLives_id ) ) SELECT 'Hello ' || h.called || ' ' || h.alsoCalled || ' of ' || a.city FROM @humanoid h LEFT JOIN Addresses a ON h.whereHeLives_id == a.id WHERE a.current == true case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses.leftJoin(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) } Address Option[Address] Address
  • 68. CREATE FUNCTION AddressToSomeone ( @humanoid Table( called VARCHAR; alsoCalled VARCHAR; whereHeLives_id ) ) SELECT 'Hello ' || h.called || ' ' || h.alsoCalled || ' of ' || a.city FROM @humanoid h JOIN Addresses a ON h.whereHeLives_id == a.id WHERE a.current == true case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) }
  • 69. case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) } CREATE FUNCTION AddressToSomeone ( @humanoid Table( called VARCHAR; alsoCalled VARCHAR; whereHeLives_id ) ) SELECT 'Hello ' || h.called || ' ' || h.alsoCalled || ' of ' || a.city FROM @humanoid h JOIN Addresses a ON h.whereHeLives_id == a.id WHERE a.current == trueString
  • 70. case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) } CREATE FUNCTION AddressToSomeone ( @humanoid Table( called VARCHAR; alsoCalled VARCHAR; whereHeLives_id ) ) SELECT 'Hello ' || h.called || ' ' || h.alsoCalled || ' of ' || a.city FROM @humanoid h JOIN Addresses a ON h.whereHeLives_id == a.id WHERE a.current == true Query[String]
  • 71. case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) } CREATE FUNCTION AddressToSomeone ( @humanoid Table( called VARCHAR; alsoCalled VARCHAR; whereHeLives_id ) ) SELECT 'Hello ' || h.called || ' ' || h.alsoCalled || ' of ' || a.city FROM @humanoid h JOIN Addresses a ON h.whereHeLives_id == a.id WHERE a.current == true Query[Humanoid] => Query[String] Query[String]
  • 72. case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) } CREATE FUNCTION AddressToSomeone ( @humanoid Table( called VARCHAR; alsoCalled VARCHAR; whereHeLives_id ) ) SELECT 'Hello ' || h.called || ' ' || h.alsoCalled || ' of ' || a.city FROM @humanoid h JOIN Addresses a ON h.whereHeLives_id == a.id WHERE a.current == true Quoted[Query[Humanoid] => Query[String]] Query[Humanoid] => Query[String] Query[String]
  • 73. case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield h } CREATE FUNCTION AddressToSomeone ( @humanoid Table( called VARCHAR; alsoCalled VARCHAR; whereHeLives_id ) ) SELECT h.* FROM @humanoid h JOIN Addresses a ON h.whereHeLives_id == a.id WHERE a.current == true Quoted[Query[Humanoid] => Query[Humanoid]] Query[Humanoid] => Query[Humanoid] Query[Humanoid]
  • 74. case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield a } CREATE FUNCTION AddressToSomeone ( @humanoid Table( called VARCHAR; alsoCalled VARCHAR; whereHeLives_id ) ) SELECT a.* FROM @humanoid h JOIN Addresses a ON h.whereHeLives_id == a.id WHERE a.current == true Quoted[Query[Humanoid] => Query[Address]] Query[Humanoid] => Query[Address] Query[Address]
  • 75. case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield (h, a) } CREATE FUNCTION AddressToSomeone ( @humanoid Table( called VARCHAR; alsoCalled VARCHAR; whereHeLives_id ) ) SELECT h.*, a.* FROM @humanoid h JOIN Addresses a ON h.whereHeLives_id == a.id WHERE a.current == true Quoted[Query[Humanoid] => Query[(Humanoid, Address)]] Query[Humanoid] => Query[(Humanoid, Address)] Query[(Humanoid, Address)]
  • 76. case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield (Foobar(h, a)) } CREATE FUNCTION AddressToSomeone ( @humanoid Table( called VARCHAR; alsoCalled VARCHAR; whereHeLives_id ) ) SELECT ?? I don't understand objects ?? FROM @humanoid h JOIN Addresses a ON h.whereHeLives_id == a.id WHERE a.current == true Quoted[Query[Humanoid] => Query[Foobar]] Query[Humanoid] => Query[Foobar] Query[Foobar]
  • 77. CREATE FUNCTION AddressToSomeone ( @humanoid Table( called VARCHAR; alsoCalled VARCHAR; whereHeLives_id ) ) SELECT 'Hello ' || h.called || ' ' || h.alsoCalled || ' of ' || a.city FROM @humanoid h JOIN Addresses a ON h.whereHeLives_id == a.id WHERE a.current == true case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) } for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) ▶
  • 78. for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city )
  • 79. for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city )
  • 80. for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id) ru <-residenceUnit.join(ru => a.rid == ru.id) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city )
  • 81. for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id) ru <-residenceUnit.join(ru => a.rid == ru.id)
 rc <- residenceClass.join(rc => ru.class_id == rc.class_id) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city )
  • 82. for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id) ru <-residenceUnit.join(ru => a.rid == ru.id)
 rc <- residenceClass.join(rc => ru.class_id == rc.class_id) zd <- zoningDesignation.join(zd => ru.zone_id == zd.rid && zd.cid == rc.class_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city )
  • 83. for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id) ru <-residenceUnit.join(ru => a.rid == ru.id)
 rc <- residenceClass.join(rc => ru.class_id == rc.class_id) zd <- zoningDesignation.join(zd => ru.zone_id == zd.r && zd.cid == rc.class_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city, if (zd.zone_type == "K") "StandardCategory" else if (zd.zone_type == "N" && rc.barbaz == "GT") "NonStandard" else "UnknownCategory" )
  • 84. for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id) ru <-residenceUnit.join(ru => a.rid == ru.id)
 rc <- residenceClass.join(rc => ru.class_id == rc.class_id) zd <- zoningDesignation.join(zd => ru.zone_id == zd.r && zd.cid == rc.class_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city, if (zd.zone_type == "K") "StandardCategory" else if (zd.zone_type == "N" && rc.barbaz == "GT") "NonStandard" else "UnknownCategory", if (ru.kdd == "IK") "Insanity" else if (zd.kdd == "N" && rc.barbaz == "GTT") "MoreInsanity" else "I_Dont_Even_Know_What_Goes_Here" )
  • 85. for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id) ru <-residenceUnit.join(ru => a.rid == ru.id)
 rc <- residenceClass.join(rc => ru.class_id == rc.class_id) zd <- zoningDesignation.join(zd => ru.zone_id == zd.r && zd.cid == rc.class_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city, if (zd.zone_type == "K") "StandardCategory" else if (zd.zone_type == "N" && rc.barbaz == "GT") "NonStandard" else "UnknownCategory", if (ru.kdd == "IK") "Insanity" else if (zd.kdd == "N" && rc.barbaz == "GTT") "MoreInsanity" else "I_Dont_Even_Know_What_Goes_Here" )
  • 86. for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id) ru <-residenceUnit.join(ru => a.rid == ru.id)
 rc <- residenceClass.join(rc => ru.class_id == rc.class_id) zd <- zoningDesignation.join(zd => ru.zone_id == zd.rid && zd.cid == rc.class_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city, if (zd.zone_type == "K") "StandardCategory" else if (zd.zone_type == "N" && rc.barbaz == "GT") "NonStandard" else "UnknownCategory", if (ru.kdd == "IK") "Insanity" else if (zd.kdd == "N" && rc.barbaz == "GTT") "MoreInsanity" else "I_Dont_Even_Know_What_Goes_Here" ) [error] quillspark-examples/src/main/scala/Main.scala:107:28: value rid is not a member of org.ctl.complex.ZoningDesignation [error] ru.zone_id == zd.rid && zd.cid == rc.class_id [error] ^
  • 87. for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id) ru <-residenceUnit.join(ru => a.rid == ru.id)
 rc <- residenceClass.join(rc => ru.class_id == rc.class_id) zd <- zoningDesignation.join(zd => ru.zone_id == zd.rid && zd.cid == rc.class_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city, if (zd.zone_type == "K") "StandardCategory" else if (zd.zone_type == "N" && rc.barbaz == "GT") "NonStandard" else "UnknownCategory", if (ru.kdd == "IK") "Insanity" else if (zd.kdd == "N" && rc.barbaz == "GTT") "MoreInsanity" else "I_Dont_Even_Know_What_Goes_Here" ) ⏸
  • 88. humanoidsLivingSomewhere.as("t") .joinWith(addresses.as("a"), $"whereHeLives_id" === $"id") .joinWith(residenceUnit.as("ru"), $"_2.rid" === $"ru.id") .joinWith(residenceClass.as("rc"), $"_2.class_id" === $"rc.class_id") .joinWith(zoningDesignation.as("zd"), ($"_1._2.zone_id" === "zd.rzid") && ($"zd.cid" === $"_1._2.class_id") ) .map { case ((((t, a), ru), rc), zd) => ( s"Hello ${t.called} ${t.alsoCalled} of ${a.city}", if (zd.zone_type == "K") "StandardCategory" else if (zd.zone_type == "N" && rc.barbaz == "GT") "NonStandardCategory" else "UnknownCategory", if (ru.kdd == "IK") "Insanity" else if (zd.kdd == "N" && rc.barbaz == "GT") "MoreInsanity" else "I_Dont_Even_Know_What_Goes_Here" ) }
  • 89. for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id) ru <-residenceUnit.join(ru => a.rid == ru.id)
 rc <- residenceClass.join(rc => ru.class_id == rc.class_id) zd <- zoningDesignation.join(zd => ru.zone_id == zd.rid && zd.cid == rc.class_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city, if (zd.zone_type == "K") "StandardCategory" else if (zd.zone_type == "N" && rc.barbaz == "GT") "NonStandard" else "UnknownCategory", if (ru.kdd == "IK") "Insanity" else if (zd.kdd == "N" && rc.barbaz == "GTT") "MoreInsanity" else "I_Dont_Even_Know_What_Goes_Here" )
  • 90. for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id) ru <-residenceUnit.join(ru => a.rid == ru.id)
 rc <- residenceClass.join(rc => ru.class_id == rc.class_id) zd <- zoningDesignation.join(zd => ru.zone_id == zd.rid && zd.cid == rc.class_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city, if (zd.zone_type == "K") "StandardCategory" else if (zd.zone_type == "N" && rc.barbaz == "GT") "NonStandard" else "UnknownCategory", if (ru.kdd == "IK") "Insanity" else if (zd.kdd == "N" && rc.barbaz == "GTT") "MoreInsanity" else "I_Dont_Even_Know_What_Goes_Here" )
  • 91. for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id) ru <-residenceUnit.join(ru => a.rid == ru.id)
 rc <- residenceClass.join(rc => ru.class_id == rc.class_id) zd <- zoningDesignation.join(zd => ru.zone_id == zd.rid && zd.cid == rc.class_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city, if (zd.zone_type == "K") "StandardCategory" else if (zd.zone_type == "N" && rc.barbaz == "GT") "NonStandard" else "UnknownCategory" )
  • 92. for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id) ru <-residenceUnit.join(ru => a.rid == ru.id)
 rc <- residenceClass.join(rc => ru.class_id == rc.class_id) zd <- zoningDesignation.join(zd => ru.zone_id == zd.rid && zd.cid == rc.class_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city )
  • 93. for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id) ru <-residenceUnit.join(ru => a.rid == ru.id)
 rc <- residenceClass.join(rc => ru.class_id == rc.class_id) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city )
  • 94. for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id) ru <-residenceUnit.join(ru => a.rid == ru.id) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city )
  • 95. for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city )
  • 96. for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city )
  • 97. case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => } CREATE FUNCTION AddressToSomeone ( @humanoid Table( called VARCHAR; alsoCalled VARCHAR; whereHeLives_id ) ) SELECT 'Hello ' || h.called || ' ' || h.alsoCalled || ' of ' || a.city FROM @humanoid h JOIN Addresses a ON h.whereHeLives_id == a.id WHERE a.current == true for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) ▶
  • 98. case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => } CREATE FUNCTION AddressToSomeone ( @humanoid Table( called VARCHAR; alsoCalled VARCHAR; whereHeLives_id ) ) SELECT 'Hello ' || h.called || ' ' || h.alsoCalled || ' of ' || a.city FROM @humanoid h JOIN Addresses a ON h.whereHeLives_id == a.id WHERE a.current == true for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) ▶
  • 99. CREATE FUNCTION AddressToSomeone ( @humanoid Table( called VARCHAR; alsoCalled VARCHAR; whereHeLives_id ) ) SELECT 'Hello ' || h.called || ' ' || h.alsoCalled || ' of ' || a.city FROM @humanoid h JOIN Addresses a ON h.whereHeLives_id == a.id WHERE a.current == true ▶ case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) }
  • 100. case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) } SELECT 'Hello ' || h.firstName || ' ' || h.lastName || ' of ' || a.city FROM Americans h JOIN Addresses a ON h.address_id == a.id WHERE a.current == true quote { addressToSomeone(americans.map(am => 
 Humanoid(am.firstName, am.lastName, am.address_id))) }
  • 101. SELECT 'Hello ' || h.firstName || ' ' || h.lastName || ' of ' || a.city FROM Americans h JOIN Addresses a ON h.address_id == a.id WHERE a.current == true case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) } quote {addressToSomeone(canadians.map(am => 
 Humanoid(am.name, am.surname, am.residence_id)))} SELECT 'Hello ' || h.name || ' ' || h.surname || ' of ' || a.city FROM Canadians h JOIN Addresses a ON h.residence_id == a.id WHERE a.current == true
  • 102. SELECT 'Hello ' || h.firstName || ' ' || h.lastName || ' of ' || a.city FROM Americans h JOIN Addresses a ON h.address_id == a.id WHERE a.current == true case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) } quote {addressToSomeone(yeti(am => 
 Humanoid(am.gruntingSound, am.roaringSound, am.cave_id)))} SELECT 'Hello ' || h.name || ' ' || h.surname || ' of ' || a.city FROM Canadians h JOIN Addresses a ON h.residence_id == a.id WHERE a.current == true SELECT 'Hello ' || h.gruntingSound || ' ' || h.roaringSound || ' of ' || a.city FROM Yeti h JOIN Addresses a ON h.cave_id == a.id WHERE a.current == true
  • 103. SELECT 'Hello ' || h.firstName || ' ' || h.lastName || ' of ' || a.city FROM Americans h JOIN Addresses a ON h.address_id == a.id WHERE a.current == true case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) } SELECT 'Hello ' || h.name || ' ' || h.surname || ' of ' || a.city FROM Canadians h JOIN Addresses a ON h.residence_id == a.id WHERE a.current == true SELECT 'Hello ' || h.gruntingSound || ' ' || h.roaringSound || ' of ' || a.city FROM Yeti h JOIN Addresses a ON h.cave_id == a.id WHERE a.current == true ▶ quote {addressToSomeone(yeti(am => 
 Humanoid(am.gruntingSound, am.roaringSound, am.cave_id)))}
  • 104. SELECT 'Hello ' || h.firstName || ' ' || h.lastName || ' of ' || a.city FROM Americans h JOIN Addresses a ON h.address_id == a.id WHERE a.current == true SELECT 'Hello ' || h.name || ' ' || h.surname || ' of ' || a.city FROM Canadians h JOIN Addresses a ON h.residence_id == a.id WHERE a.current == true SELECT 'Hello ' || h.gruntingSound || ' ' || h.roaringSound || ' of ' || a.city FROM Yeti h JOIN Addresses a ON h.cave_id == a.id WHERE a.current == true DataFrame quote {addressToSomeone(yeti(am => 
 Humanoid(am.gruntingSound, am.roaringSound, am.cave_id)))} quote {addressToSomeone(canadians.map(am => 
 Humanoid(am.name, am.surname, am.residence_id)))} quote { addressToSomeone(americans.map(am => 
 Humanoid(am.firstName, am.lastName, am.address_id))) } run(Query[String]) run(Query[String]) run(Query[String])
  • 105. DataFrame
 Dataset[String] SELECT 'Hello ' || h.firstName || ' ' || h.lastName || ' of ' || a.city FROM Americans h JOIN Addresses a ON h.address_id == a.id WHERE a.current == true SELECT 'Hello ' || h.name || ' ' || h.surname || ' of ' || a.city FROM Canadians h JOIN Addresses a ON h.residence_id == a.id WHERE a.current == true SELECT 'Hello ' || h.gruntingSound || ' ' || h.roaringSound || ' of ' || a.city FROM Yeti h JOIN Addresses a ON h.cave_id == a.id WHERE a.current == true run(Query[String]) run(Query[String]) run(Query[String])
  • 106. DataFrame
 Dataset[Humanoid] SELECT h FROM Americans h JOIN Addresses a ON h.address_id == a.id WHERE a.current == true SELECT h FROM Canadians h JOIN Addresses a ON h.residence_id == a.id WHERE a.current == true SELECT h FROM Yeti h JOIN Addresses a ON h.cave_id == a.id WHERE a.current == true run(Query[Humanoid]) run(Query[Humanoid]) run(Query[Humanoid])
  • 107. DataFrame
 Dataset[T] run(Query[T]) run(Query[T]) run(Query[T]) SELECT ? FROM Americans h JOIN Addresses a ON h.address_id == a.id WHERE a.current == true SELECT ? FROM Canadians h JOIN Addresses a ON h.residence_id == a.id WHERE a.current == true SELECT ? FROM Yeti h JOIN Addresses a ON h.cave_id == a.id WHERE a.current == true
  • 110. Dataset[T] DataFrame Easy! Just '.toDF' Harder! '.[DoIReallyKnowItsThis?]' run(Query[T])
  • 111. val spark = SparkSession.builder() .appName("SparkQuillExample") .enableHiveSupport() .getOrCreate() implicit val sqlContext = spark.sqlContext import sqlContext.implicits._ import QuillSparkContext._ val yetiDS = spark.read.parquet("output/yeti").as[Yeti] val addressesDS = spark.read.parquet("output/addresses").as[Address] Dataset[Yeti] Dataset[Address]
  • 112. val yetiDS = spark.read.parquet("output/yeti").as[Yeti] val addressesDS = spark.read.parquet("output/addresses").as[Address] val yeti = quote { liftQuery(yetiDS) } val addresses = quote { liftQuery(addressesDS) } Quoted[Query[Yeti]] Quoted[Query[Address]] Dataset[Yeti] Dataset[Address]
  • 113. val yetiDS = spark.read.parquet("output/yeti").as[Yeti] val addressesDS = spark.read.parquet("output/addresses").as[Address] val addressToSomeone = quote { (humanoids: Query[Humanoid]) => for { h <- humanoids a <- addresses.join(a => a.id == h.whereHeLives_id) if (a.current) } yield "Hello " + h.called + " " + h.alsoCalled + " of " + a.city } val output = quote { addressToSomeone( yeti.map(am => Humanoid(am.firstName, am.lastName, am.address_id) ) ) } val yeti = quote { liftQuery(yetiDS) } val addresses = quote { liftQuery(addressesDS) }
  • 114. val yetiDS = spark.read.parquet("output/yeti").as[Yeti] val addressesDS = spark.read.parquet("output/addresses").as[Address] val addressToSomeone = quote { (humanoids: Query[Humanoid]) => for { h <- humanoids a <- addresses.join(a => a.id == h.whereHeLives_id) if (a.current) } yield "Hello " + h.called + " " + h.alsoCalled + " of " + a.city } val yeti = quote { liftQuery(yetiDS) } val addresses = quote { liftQuery(addressesDS) } val yetiOfSomeplace: Dataset[String] = run(output) val output = quote { addressToSomeone( yeti.map(am => Humanoid(am.firstName, am.lastName, am.address_id) ) ) }
  • 115. val yetiDS = spark.read.parquet("output/yeti").as[Yeti] val addressesDS = spark.read.parquet("output/addresses").as[Address] val addressToSomeone = quote { (humanoids: Query[Humanoid]) => for { h <- humanoids a <- addresses.join(a => a.id == h.whereHeLives_id) if (a.current) } yield "Hello " + h.called + " " + h.alsoCalled + " of " + a.city } val yeti = quote { liftQuery(yetiDS) } val addresses = quote { liftQuery(addressesDS) } val yetiOfSomeplace: Dataset[String] = run(output) val output = quote { addressToSomeone( yeti.map(am => Humanoid(am.firstName, am.lastName, am.address_id) ) ) } Run This Query: Then Give Me Back My Dataset!!! SELECT 'Hello ' || h.gruntingSound || ' ' || h.roaringSound || ' of ' || a.city FROM Yeti h JOIN Addresses a ON h.cave_id == a.id WHERE a.current == true
  • 116. val addressToSomeone = quote { (humanoids: Query[Humanoid]) => for { h <- humanoids a <- addresses.join(a => a.id == h.whereHeLives_id) if (a.current) } yield "Hello " + h.called + " " + h.alsoCalled + " of " + a.city } val yetiOfSomeplace: Dataset[String] = run(output) *(5) Project [concat(Hello , firstName, , lastName, of , city)] +- *(5) SortMergeJoin [address_id], [id], Inner +- Exchange hashpartitioning(address_id) +- *(1) Project [firstName, lastName, address_id] +- *(1) FileScan parquet [firstName,lastName,address_id] +- Exchange hashpartitioning(id) +- *(3) FileScan parquet [id,city,current] PushedFilters: [EqualTo(current,true)] val output = quote { addressToSomeone( yeti.map(am => Humanoid(am.firstName, am.lastName, am.address_id) ) ) }
  • 117. val addressToSomeone = quote { (humanoids: Query[Humanoid]) => for { h <- humanoids a <- addresses.join(a => a.id == h.whereHeLives_id) if (a.current) } yield "Hello " + h.called + " " + h.alsoCalled + " of " + a.city } val output = quote { addressToSomeone( yeti.map(am => Humanoid(am.firstName, am.lastName, am.address_id) ) ) } val yetiOfSomeplace: Dataset[String] = run(output) *(5) Project [concat(Hello , firstName, , lastName, of , city)] +- *(5) SortMergeJoin [address_id], [id], Inner +- Exchange hashpartitioning(address_id) +- *(1) Project [firstName, lastName, address_id] +- *(1) FileScan parquet [firstName,lastName,address_id] +- Exchange hashpartitioning(id) +- *(3) FileScan parquet [id,city,current] PushedFilters: [EqualTo(current,true)] Optimized + No Serialization 😎 Optimized + No Serialization 😎
  • 118. val yetiOfSomeplace: Dataset[String] = run(output) val addressToSomeone = quote { Quill Magic! } ▶ val output = quote { Quill Magic! } val yetiDS: Dataset[Yeti] = parquet("output/yeti").as[Yeti] val addressesDS: Dataset[Address] = parquet("output/addresses").as[Address] *(5) Project [concat(Hello , firstName, , lastName, of , city)] +- *(5) SortMergeJoin [address_id], [id], Inner +- Exchange hashpartitioning(address_id) +- *(1) Project [firstName, lastName, address_id] +- *(1) FileScan parquet [firstName,lastName,address_id] +- Exchange hashpartitioning(id) +- *(3) FileScan parquet [id,city,current] PushedFilters: [EqualTo(current,true)]
  • 119. ▶ val yetiOfSomeplace: Dataset[String] = run(output) val addressToSomeone = quote { Quill Magic! } val output = quote { Quill Magic! } val yetiDS: Dataset[Yeti] = parquet("output/yeti").as[Yeti] val addressesDS: Dataset[Address] = parquet("output/addresses").as[Address]
  • 120. // Applicative Joins yeti.join(addresses).on(_.caveId == _.id) yeti.leftJoin(addresses).on(_.caveId == _.id) // Implicit Joins for { y <- yeti a <- addresses if (y.caveId == a.id) } yield (y, a) // Semi-Joins val cavelessYeti = quote { yeti.filter(y => !addresses.map(_.id).contains(y.caveId)) } Some other stuff we can do...
  • 121. Some other stuff we can do... // Group-By orders.groupBy(_.sku).map { case (sku, orders) => (sku, orders.map(_.price).avg) } // Concat-Map val nodesChildren = quote { (ns: Query[Node]) => ns.concatMap(n => n.children) } // Union/UnionAll val americansAndCanadians = quote { americans.map(_.firstName) unionAll canadians.map(_.surname) }
  • 122. Some other stuff we can do... // User Defined Aggregation Functions (UDAFs) spark.udf.register("geomMean", new GeometricMean) val geomMean = quote { (q: Query[BigDecimal]) => infix"geomMean(${q})".as[BigDecimal] } orders.groupBy(_.sku).map { case (sku, orders) => (sku, geomMean(orders.map(_.price))) } // Using Spark UDFs spark.udf.register("businessLogicUdf", (str:String) => str + "-suffix") val businessLogicUdf = quote { (str: String) => infix"businessLogicUdf(${str})".as[String] } quote { yeti.map(y => businessLogicUdf(y.gruntingSound)) }
  • 124. https://getquill.io/ https://github.com/getquill/quill https://gitter.im/getquill/quill ...Try It Out! libraryDependencies ++= Seq( "io.getquill" %% "quill-spark" % "3.4.10" ) <dependency> <groupId>io.getquill</groupId> <artifactId>quill-spark_2.12</artifactId> <version>3.4.10</version> </dependency>