Cassandra 3.0 - JSON at scale - StampedeCon 2015

©2014 DataStax Confidential. Do not distribute without consent.
CTO, DataStax
Jonathan Ellis
Project Chair, Apache Cassandra
Cassandra 3.0: JSON at Scale

SELECT offices.name, MAX(orders.created_at)
FROM offices NATURAL JOIN orders
GROUP BY offices.name;

"glossary": {
"title": "example glossary",
! ! "GlossDiv": {
"title": "S",
! ! ! "GlossList": {
"GlossEntry": {
"ID": "SGML",
! ! ! ! ! "SortAs": "SGML",
! ! ! ! ! "GlossTerm": "Standard Generalized Markup Language",
! ! ! ! ! "Acronym": "SGML",
! ! ! ! ! "Abbrev": "ISO 8879:1986",
! ! ! ! ! "GlossDef": {
"para": "A meta-markup language, used to create markup
languages such as DocBook.",
! ! ! ! ! ! "GlossSeeAlso": ["GML", "XML"]
},
! ! ! ! ! "GlossSee": "markup"
}
}
}

CQL
CREATE TABLE users (
id uuid PRIMARY KEY,
name text,
state text,
birth_date int
);
CREATE INDEX ON users(state);
SELECT * FROM users
WHERE state=‘Texas’
AND birth_date > 1950;

Collections
CREATE TABLE example (
    id int PRIMARY KEY,
    tupleval tuple<int, text>,
    numbers set<int>,
    words list<text>
);
INSERT INTO example (id, tupleval, numbers, words)
VALUES (0, (1, 'foo'), {1, 2, 3, 6}, ['the', 'quick', 'brown', 'fox']);

User-deﬁned types (UDT)
CREATE TYPE address (number int, street text);
id int PRIMARY KEY,
street_address frozen<address>
);
INSERT INTO users (id, street_address)
VALUES (1, {number: 123, street: 'Cassandra Ave'});

JSON
INSERT INTO example JSON
'{"id": 0,
"tupleval": [1, "foo"],
"numbers": [1, 2, 3, 6],
"words": ["the", "quick", "brown", "fox"]}';
INSERT INTO users JSON
'{"id": 1,
"street_address": {"number": 1,
"" "street": "Cassandra Ave"}}';

Nested
CREATE TYPE address (
street text,
city text,
zip_code int,
phones set<text>
);
name text,
addresses map<text, frozen<address>>
);

Nested
INSERT INTO users JSON
'{"id": "0514e410-2a9f-11e5-a2cb-0800200c9a66",
"name": "jbellis",
"addresses": {"home": {"street": "9920 Cassandra Ave",
"city": "Austin",
"zip_code": 78700,
"phones": ["1238614789"]}}}';

What about schemaless documents?

{"userid": "2452347",
"name": "jbellis",
... }
{"userid": 2452348,
"name": "jhaddad",
... }
{"user_id": 2452349,
"name": "jlacefield",
... }

Performance and scale
read-mostly

read-mostly balanced

write-mostly

write-mostly op/analytic

Latency
balanced
write-mostly op/analytic
read-mostly

See also
•The myth of schema-less:
http://rustyrazorblade.com/2014/07/the-myth-of-schema-less/
•Schema-less is (usually) a lie:
https://www.compose.io/articles/schema-less-is-usually-a-lie/
•Schemaless databases don’t exist:
https://vividcortex.com/blog/2015/02/24/schemaless-databases-dont-
exist/

Lightweight transactions
INSERT INTO users
(username, name, email,
password, created_date)
VALUES ('pmcfadin',
'Patrick McFadin',
['patrick@datastax.com'],
'ba27e03fd9...',
'2011-06-20 13:50:00')
IF NOT EXISTS;

INSERT INTO users
VALUES ('pmcfadin',
'Patrick McFadin',
'ba27e03fd9...',
'2011-06-20 13:50:00')
IF NOT EXISTS;
INSERT INTO users
VALUES ('pmcfadin',
'Patrick McFadin',
'ea24e13ad9...',
'2011-06-20 13:50:01')
IF NOT EXISTS;

INSERT INTO users
VALUES ('pmcfadin',
'Patrick McFadin',
'ba27e03fd9...',
'2011-06-20 13:50:00')
IF NOT EXISTS;
[applied]
-----------
True
INSERT INTO users
VALUES ('pmcfadin',
'Patrick McFadin',
'ea24e13ad9...',
'2011-06-20 13:50:01')
IF NOT EXISTS;

[applied] | username | created_date | name
-----------+----------+----------------+----------------
False | pmcfadin | 2011-06-20 ... | Patrick McFadin
INSERT INTO users
VALUES ('pmcfadin',
'Patrick McFadin',
'ba27e03fd9...',
'2011-06-20 13:50:00')
IF NOT EXISTS;
[applied]
-----------
True
INSERT INTO users
VALUES ('pmcfadin',
'Patrick McFadin',
'ea24e13ad9...',
'2011-06-20 13:50:01')
IF NOT EXISTS;

Static columns
CREATE TABLE bills (
user text,
balance int static,
expense_id int,
amount int,
description text,
paid boolean,
PRIMARY KEY (user, expense_id)
);

Static columns + LWT
CREATE TABLE bills (
user text,
balance int static,
expense_id int,
amount int,
description text,
paid boolean,
PRIMARY KEY (user, expense_id)
);
BEGIN BATCH
UPDATE bills SET balance = -116 WHERE user='user1' IF balance = 84;
INSERT INTO bills (user, expense_id, amount, description, paid)
VALUES ('user1', 2, 200, 'hotel room', false);
APPLY BATCH;

Role-based authorization
CREATE ROLE manager
WITH PASSWORD 'foo' LOGIN;
GRANT authorize TO manager;
GRANT manager TO jbellis;

Hinted handoff improvements
CREATE TABLE system.hints (
target_id uuid,
hint_id timeuuid,
message_version int,
mutation blob,
PRIMARY KEY (target_id, hint_id, message_version)
) WITH COMPACT STORAGE
AND CLUSTERING ORDER BY (hint_id ASC, message_version ASC)

SSTable-based hints
Hint
Commitlog

SSTable-based hints
Hint
Commitlog
Memtable

SSTable-based hints
Hint
Commitlog
Memtable
SSTable

SSTable-based hints
Hint
Commitlog
Memtable
SSTable
Tombstone

SSTable-based hints
Hint
Commitlog
Memtable
SSTable
Tombstone
Commitlog

SSTable-based hints
Hint
Commitlog
Memtable
SSTable
Memtable
Tombstone
Commitlog

SSTable-based hints
Hint
Commitlog
Memtable
SSTable
Memtable
SSTable
Tombstone
Commitlog

SSTable-based hints
Hint
Commitlog
Memtable
SSTable
Memtable
SSTable
Tombstone
Commitlog
Compacted

File-based hints
Hint
.168.101

File-based hints
Hint
.168.101
Hint

File-based hints
Hint
.168.101
Hint
Hint

File-based hints
Hint
.168.101
Hint
Hint
Hint

File-based hints
Hint
.168.101
Hint
Hint
Hint
Hint

File-based hints
Hint
.168.101
Hint
Hint
Hint
Hint
Hint

File-based hints
Hint
.168.101
Hint
Hint
Hint
Hint
Hint
Hint

File-based hints
Hint
.168.101
Hint
Hint
Hint
Hint
Hint
Hint
Hint

File-based hints
Hint
.168.101
Hint
Hint
Hint
Hint
Hint
Hint
Hint
.168.104
Hint
Hint
Hint
Hint
Hint
Hint
Hint
Hint

File-based hints
Hint
.168.101
Hint
Hint
Hint
Hint
Hint
Hint
Hint
.168.104
Hint
Hint
Hint
Hint
Hint
Hint
Hint
Hint
.168.112
Hint
Hint
Hint
Hint
Hint
Hint
Hint
Hint

File-based hints
.168.104
Hint
Hint
Hint
Hint
Hint
Hint
Hint
Hint
.168.112
Hint
Hint
Hint
Hint
Hint
Hint
Hint
Hint

User-deﬁned functions
CREATE FUNCTION my_sin (input double)
RETURNS double LANGUAGE java
AS ’
return input == null
? null
: Double.valueOf(Math.sin(input.doubleValue()));
’;
SELECT key, my_sin(value) FROM my_table WHERE key IN (1, 2, 3);
also aggregates
http://www.slideshare.net/RobertStupp/user-definedfunctionscassandrasummiteu2014
[robert stupp user defined functions]

Materialized views
CREATE MATERIALIZED VIEW songs_by_album AS
SELECT * FROM songs
WHERE album IS NOT NULL
PRIMARY KEY (album, id);
SELECT * FROM songs_by_album
WHERE album = ‘Tres Hombres’;

Indexes
CREATE TABLE songs (
title text,
album text,
artist text
);
CREATE INDEX songs_by_album on songs(album);
insert into songs (id, title, artist, album)
values ('a3e64f8f...', 'La Grange', 'ZZ Top', 'Tres Hombres');
values ('8a172618...', 'Waitin for the Bus', 'ZZ Top', 'Tres Hombres');
values ('2b09185b...', 'Outside Woman Blues', 'Back Door Slam', 'Roll Away');
SELECT * FROM songs
WHERE album = ‘Tres Hombres’;

Local indexes
client
title artist album
La
Grange
ZZ Top
Tres
Hombre
s
title artist album
Outside...
Back Door
Slam
Roll Away
title artist album
Waitin... ZZ Top
Tres
Hombres

Materialized Views
client
album id
Tres
Hombres
a3e64f8f
Tres
Hombres
8a172618
album id
Roll Away 2b09185b

Upcoming releases
•2.2: July 20th

Upcoming releases
•2.2: July 20th
•3.0: Late September

Upcoming releases
•2.2: July 20th
•3.1: November

Upcoming releases
•2.2: July 20th
•3.1: November
•3.2: December

Cassandra 3.0 - JSON at scale - StampedeCon 2015

More Related Content

What's hot (20)

Viewers also liked (20)

Similar to Cassandra 3.0 - JSON at scale - StampedeCon 2015 (20)

More from StampedeCon (20)

Recently uploaded (20)

Cassandra 3.0 - JSON at scale - StampedeCon 2015