SlideShare a Scribd company logo
APIs de Visualização em
Python
TDC 2014
Wilson Freitas
http://aboutwilson.net
@aboutwilson
Visualização
APIs de Visualização em Python
Visualização + Python
seaborn
bokeh
ggplot
prettyplotlib
Vincent
Vega
(Trifacta)
JavascriptSupport
Pandas Support
ggplot
seaborn
● Biblioteca de visualização baseada no matplotlib
" seaborn is a library for making attractive and
informative statistical graphics in Python "
● Boa para análise exploratória de dados
● Fortemente integrada com pandas e numpy
" Bringing Matplotlib to the Browser "
● Integra matplotlib com D3js
● Cria visualização de dados interativa no
navegador
mpld3
Vincent
" A Python to Vega translator "
" Vega is a visualization grammar, a declarative
format for creating, saving and sharing visualization
designs. "
● Estruturas de dados Python
● Visualização em JavaScript
● Integração com pandas
Bokeh
● Biblioteca de visualização interativa
● Integração com pandas
● Foco: explorar a visualização dos navegadores
dados: Top 100 Wines of the
World 2014
DataFrame pandas: wty
Quantos vinhos por País?
ggplot(wty, aes("Country")) + geom_bar(fill="red", colour="black") +
theme(axis_text_x=element_text(angle=90))
Quantos vinhos por País? (com estilo)
ggplot(wty, aes("Country")) + geom_bar(fill="red", colour="black") +
theme_xkcd() + theme(axis_text_x=element_text(angle=90))
Qual a relação entre Pontos e Prêmios?
ggplot(wty, aes(x='Prizes', y='Points')) + geom_point()
Qual a relação entre Pontos e Prêmios?
ggplot(wty, aes(y="Points", x="Prizes")) + geom_point(size=60, shape=6) +
stat_smooth(method='lm', colour='red')
Qual a relação entre Pontos e Prêmios por País?
idx = (wty.Country == 'AUSTRALIA') |
(wty.Country == 'FRANCE') |
(wty.Country == 'SPAIN') |
(wty.Country == 'USA') |
(wty.Country == 'ARGENTINA')
gplot(wty[idx], aes(x="Prizes", y="Points")) +
geom_point() +
stat_smooth(method='lm', color='blue') +
facet_wrap("Country", scales="fixed")
Como se distribuem os prêmios por País?
wty_country_prizes = wty['Prizes'].groupby(wty['Country'])
idx = wty_country_prizes.count() == 1
wty_t = wty.ix[[c not in idx[idx].index for c in wty.Country], 
['Country','Prizes']]
wty_t_country_mean = wty_t.groupby(wty_t.Country).mean()
country_names = wty_t_country_mean.Prizes.sort(inplace=False).index
import seaborn as sb
sb.set(style="ticks")
f, ax = plt.subplots()
sb.offset_spines()
sb.violinplot(wty_t['Prizes'], wty_t['Country'], order=country_names)
locs, labels = plt.xticks()
plt.setp(labels, rotation=45)
sb.despine(trim=True)
wty_country = wty.groupby('Country')
country_count = wty_country['Wine'].aggregate(len)
country_count.sort(inplace=True)
fig, ax = plt.subplots(figsize=(12,8))
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.get_xaxis().tick_bottom()
ax.get_yaxis().tick_left()
plt.subplot(221)
g = wty_country['Prizes'].count()
x = np.arange(len(g))
countries = list(g.index.values)
y = g.values
plt.bar(x+1, y, align='center', width=0.5, alpha=0.4)
plt.xticks(x+1, [' ']*10, rotation=90)
plt.title('Contagem')
plt.ylabel('Prizes')
plt.subplot(222)
g = wty_country['Prizes'].mean()
y = g.values
plt.bar(x+1, y, align='center', color='red', width=0.5, alpha=0.4)
plt.xticks(x+1, [' ']*10, rotation=90);
plt.title(u'Média')
plt.subplot(223)
g = wty_country['Prizes'].max()
y = g.values
plt.bar(x+1, y, align='center', color='darkgreen', width=0.5, alpha=0.4)
plt.xticks(x+1, countries, rotation=90);
plt.title(u'Máx')
plt.ylabel('Prizes')
plt.subplot(224)
g = wty_country['Prizes'].min()
y = g.values
plt.bar(x+1, y, align='center', color='magenta', width=0.5, alpha=0.4)
plt.xticks(x+1, countries, rotation=90);
plt.title(u'Mín')
Mas qual País possui o melhor vinho?
>>> mpld3.save_html(fig, 'fig.html')
<div id="fig_el672345700473765832459779"></div>
<script>
function mpld3_load_lib(url, callback){
var s = document.createElement('script');
s.src = url;
s.async = true;
s.onreadystatechange = s.onload = callback;
s.onerror = function(){console.warn("failed to load library " + url);};
document.getElementsByTagName("head")[0].appendChild(s);
}
if(typeof(mpld3) !== "undefined" && mpld3._mpld3IsLoaded){
// already loaded: just create the figure
!function(mpld3){
...
mpld3 salva fig em html
import Quandl
tickers = [
"GOOG/BVMF_BOVA11.4",
"YAHOO/SA_PETR3.6",
"GOOG/BVMF_VALE5.4",
"GOOG/BVMF_BBDC4.4",
"GOOG/BVMF_BBAS3.4",
"GOOG/BVMF_ITUB4.4"]
series = Quandl.get(tickers,
trim_start="2013-01-01",
trim_end="2013-12-31")
series.columns = [
"BOVESPA",
"PETROBRAS",
"VALE",
"BRADESCO",
"BB",
"ITAU"]
series.dropna(inplace=True)
dados: Ações (Quandl)
>>> print series
BOVESPA PETROBRAS VALE BRADESCO BB ITAU
Date
2013-01-02 61.27 19.00 42.60 36.02 25.80 31.3
2013-01-03 61.92 19.70 42.09 38.12 26.31 32.2
2013-01-04 61.32 19.77 41.17 37.45 26.00 31.9
2013-01-07 60.75 19.51 40.35 37.29 26.15 31.5
2013-01-08 59.91 18.96 40.10 37.42 26.45 31.6
2013-01-09 60.25 19.15 40.30 37.90 26.70 32.1
2013-01-10 60.49 19.27 40.57 37.40 26.30 31.7
2013-01-11 60.30 19.32 39.53 37.27 25.98 31.8
2013-01-14 60.75 19.21 39.77 37.88 26.93 32.2
2013-01-15 60.52 19.27 39.65 37.30 26.70 31.8
2013-01-16 60.54 19.34 39.68 37.49 26.86 32.1
2013-01-17 61.01 19.21 39.75 38.11 26.81 32.6
2013-01-18 60.79 19.05 39.20 38.25 26.20 32.7
2013-01-21 60.75 18.87 39.15 38.09 25.93 32.5
2013-01-22 60.55 19.12 39.59 38.40 25.64 32.9
...
Visualização de Séries de Preço
import vincent as v
line = v.Line(series)
line.axis_titles(x='Date', y='Price')
line.legend(title='Series')
Exporta a Gramática de Gráficos
>>> print line.to_json()
{
"axes": [
{ "scale": "x", "title": "Date", "type": "x" },
{ "scale": "y", "title": "Price", "type": "y" }
],
"data": [
{
"name": "table",
"values": [
{ "col": "BOVESPA", "idx": 1357092000000, "val": 61.27 },
{ "col": "PETROBRAS","idx": 1357092000000, "val": 19.0 },
{ "col": "VALE", "idx": 1357092000000, "val": 42.6 },
...
Variação acumulada
logreturns = np.log(series).diff().dropna(inplace=False)
cumreturns = np.cumprod(np.exp(logreturns))
line = v.Line(cumreturns)
line.axis_titles(x='Date', y='')
line.legend(title='Series')
Séries de Preço Empilhadas
stacked = v.StackedArea(series)
stacked.axis_titles(x='Date', y='Price')
stacked.legend(title='Series')
stacked.colors(brew='Spectral')
Qual a correlação entre séries?
# com seaborn
sb.corrplot(np.log(series).diff().dropna())
Mais no detalhe
sb.jointplot("VALE", "BOVESPA",
series, kind="kde");
sb.jointplot("VALE", "BOVESPA",
series, kind="reg");
Rentabilidade de ações
● Qual a rentabilidade de cada ação por mês?
● Como visualizar tudo no mesmo gráfico?
● Como fazer isso higienicamente?
● Vamos considerar 18 ações no ano de 2013
Rentabilidade de ações
from bokeh.plotting import *
output_notebook()
import numpy as np
import Quandl
import pandas as pd
from collections import OrderedDict
tickers = ["GOOG/BVMF_BOVA11.4", "GOOG/BVMF_PETR3.4", "GOOG/BVMF_VALE5.4", 'GOOG/BVMF_BBDC4.4', 'GOOG/BVMF_BBAS3.4', 'GOOG/BVMF_ITUB4.4', 'GOOG/BVMF_USIM5.4', 'GOOG/BVMF_CIEL3.
4', 'GOOG/BVMF_CMIG4.4', 'GOOG/BVMF_CTIP3.4', 'GOOG/BVMF_CPLE6.4', 'GOOG/BVMF_ELET6.4', 'GOOG/BVMF_GGBR4.4', 'GOOG/BVMF_GOLL4.4', 'GOOG/BVMF_HGTX3.4', 'GOOG/BVMF_JBSS3.4',
'GOOG/BVMF_MRFG3.4', 'GOOG/BVMF_PDGR3.4']
series = Quandl.get(tickers, trim_start="2013-01-01", trim_end="2013-12-31", authtoken="nJ1NhTYdEs2p3MsS4CVd")
series.columns = ['BOVESPA', 'PETROBRAS', 'VALE', 'BRADESCO', 'BB', 'ITAU', 'USIMINAS', 'CIELO', 'CEMIG', 'CETIP', 'COPEL', 'ELETROPAULO', 'GERDAU', 'GOL', 'HERING', 'JBS',
'MAFRIG', 'PDGR']
series.dropna(inplace=True)
logreturns = np.log(series).diff().dropna()
months = ["Jan", "Fev", "Mar", "Abr", "Mai", "Jun", "Jul", "Ago", "Set", "Out", "Nov", "Dez"]
series_mr = logreturns.resample('M', how=sum)*100
series_mr['Month'] = [months[d.month-1] for d in series_mr.index]
series_mr.set_index('Month', inplace=True)
months = list(series_mr.index)
stocks = list(series_mr.columns)
import brewer2mpl
bmap = brewer2mpl.get_map('RdYlGn', 'diverging', 3)
colors = bmap.hex_colors
levels = np.array([0, 1])
month = []
stock = []
color = []
rate = []
for y in stocks:
for m in months:
month.append(m)
stock.append(y)
monthly_rate = series_mr[y][m]
rate.append(monthly_rate)
color.append(colors[sum(levels < monthly_rate)])
source = ColumnDataSource(data=dict(month=month, year=stock, color=color, rate=rate, ))
figure()
rect('year', 'month', 0.95, 0.95, source=source, x_range=stocks, y_range=list(reversed(months)), color='color', line_color=None, tools="resize,hover,save", title="Rentabilidade
de ações", plot_width=300, plot_height=400)
grid().grid_line_color = None
axis().axis_line_color = None
axis().major_tick_line_color = None
axis().major_label_text_font_size = "5pt"
axis().major_label_standoff = 0
xaxis().location = "top"
xaxis().major_label_orientation = np.pi/3
from bokeh.objects import HoverTool
hover = [t for t in curplot().tools if isinstance(t, HoverTool)][0]
hover.tooltips = OrderedDict([('date', '@month'), ('rate', '@rate'),])
show()
Rentabilidade de ações
Esse gráfico já não é tão simples!
;-)
Referências
● seaborn (github)
● vincent (github)
○ Trifacta - vega
● ggplot (github)
○ ggplot tutorial
○ ggplot2 no R
● AboutWilson.net: Refinando o estilo do matplotlib
● AboutWilson.net: Refinando o estilo do matplotlib com seaborn
● EuroPython 2014: Combining the powerful worlds of Python and R
● EuroPython 2014: Scientific Visualization with GR
● bokeh (github)
● mpld3 (github)
● prettyplotlib (github)
● plot.ly
● Quandl
● brewer2mpl (github)
https://github.com/wilsonfreitas/tdc-2014
Wilson Freitas
http://aboutwilson.net
@aboutwilson

More Related Content

PPTX
PDF
3 Polynomials Feb16
PPTX
Data and time
PDF
Ensaio sobre testes automatizados
PDF
Processamento de tweets em tempo real com Python, Django e Celery - TDC 2014
PDF
Python, the next Brazilian generation
PDF
Quokka CMS - Content Management with Flask and Mongo #tdc2014
PDF
Finanças Quantitativas com python
3 Polynomials Feb16
Data and time
Ensaio sobre testes automatizados
Processamento de tweets em tempo real com Python, Django e Celery - TDC 2014
Python, the next Brazilian generation
Quokka CMS - Content Management with Flask and Mongo #tdc2014
Finanças Quantitativas com python

Viewers also liked (7)

PDF
Web Crawling Modeling with Scrapy Models #TDC2014
ODP
(2014-08-09) [TDC] AudioLazy 0.6 will robotize you!
PDF
Algoritmos Genéticos
PDF
Testando Aplicações Django: Quando, Como e Onde?
PDF
TDD com Python
PDF
import pybr12: experiencias de inclusión en la última PyCon Brazil
PDF
Curso de Python e Django
Web Crawling Modeling with Scrapy Models #TDC2014
(2014-08-09) [TDC] AudioLazy 0.6 will robotize you!
Algoritmos Genéticos
Testando Aplicações Django: Quando, Como e Onde?
TDD com Python
import pybr12: experiencias de inclusión en la última PyCon Brazil
Curso de Python e Django
Ad

Similar to APIs de Visualização em Python (20)

PDF
Code Management
PDF
Getting more out of Matplotlib with GR
PDF
DevFest Kuala Lumpur - Implementing Google Analytics - 2011-09-29.ppt
PPTX
Digital analytics with R - Sydney Users of R Forum - May 2015
PDF
GTUG Philippines - Implementing Google Analytics - 2011-10-11
PPTX
Google Optimize for testing and personalization
PDF
Supercharge your data analytics with BigQuery
PDF
Business Dashboards using Bonobo ETL, Grafana and Apache Airflow
PDF
implemetning google analytics - 2011-09-24 Google Devfest Chiangmai
PDF
DevFest Chiang Mai - Implementing Google Analytics - 2011-09-24.ppt
PPTX
A Big (Query) Frog in a Small Pond, Jakub Motyl, BuffPanel
PDF
Google BigQuery for Everyday Developer
PPTX
Apache Pinot Meetup Sept02, 2020
PPTX
Behaviour Driven Development
PDF
EP2016 - Moving Away From Nodejs To A Pure Python Solution For Assets
PPTX
Criteo Infrastructure (Platform) Meetup
PPTX
How to Realize an Additional 270% ROI on Snowflake
PDF
web design company in bangalore
PPTX
Real time ecommerce analytics with MongoDB at Gilt Groupe (Michael Bryzek & M...
PDF
Google Analytics for Beginners - Training
Code Management
Getting more out of Matplotlib with GR
DevFest Kuala Lumpur - Implementing Google Analytics - 2011-09-29.ppt
Digital analytics with R - Sydney Users of R Forum - May 2015
GTUG Philippines - Implementing Google Analytics - 2011-10-11
Google Optimize for testing and personalization
Supercharge your data analytics with BigQuery
Business Dashboards using Bonobo ETL, Grafana and Apache Airflow
implemetning google analytics - 2011-09-24 Google Devfest Chiangmai
DevFest Chiang Mai - Implementing Google Analytics - 2011-09-24.ppt
A Big (Query) Frog in a Small Pond, Jakub Motyl, BuffPanel
Google BigQuery for Everyday Developer
Apache Pinot Meetup Sept02, 2020
Behaviour Driven Development
EP2016 - Moving Away From Nodejs To A Pure Python Solution For Assets
Criteo Infrastructure (Platform) Meetup
How to Realize an Additional 270% ROI on Snowflake
web design company in bangalore
Real time ecommerce analytics with MongoDB at Gilt Groupe (Michael Bryzek & M...
Google Analytics for Beginners - Training
Ad

More from Wilson Freitas (7)

PDF
bizdays: Dias Úteis em Qualquer Calendário
PDF
Análise dos campeões da Corrida de São Silvestre com Python
PDF
Um modelo de formação de preços para um mercado artificial com redes neurais ...
PDF
Sobre o comportamento endógeno do mercado de ações: simulações e experimentos
PDF
Apreçando Opções Utilizando a Função Característica
PDF
Redes neurais em finanças
PDF
Expansão em caos polinomial
bizdays: Dias Úteis em Qualquer Calendário
Análise dos campeões da Corrida de São Silvestre com Python
Um modelo de formação de preços para um mercado artificial com redes neurais ...
Sobre o comportamento endógeno do mercado de ações: simulações e experimentos
Apreçando Opções Utilizando a Função Característica
Redes neurais em finanças
Expansão em caos polinomial

Recently uploaded (20)

PDF
Fluorescence-microscope_Botany_detailed content
PPTX
IB Computer Science - Internal Assessment.pptx
PPTX
Business Ppt On Nestle.pptx huunnnhhgfvu
PDF
TRAFFIC-MANAGEMENT-AND-ACCIDENT-INVESTIGATION-WITH-DRIVING-PDF-FILE.pdf
PDF
22.Patil - Early prediction of Alzheimer’s disease using convolutional neural...
PPTX
oil_refinery_comprehensive_20250804084928 (1).pptx
PPT
Reliability_Chapter_ presentation 1221.5784
PPTX
MODULE 8 - DISASTER risk PREPAREDNESS.pptx
PPTX
Introduction to machine learning and Linear Models
PPT
Miokarditis (Inflamasi pada Otot Jantung)
PPTX
AI Strategy room jwfjksfksfjsjsjsjsjfsjfsj
PDF
168300704-gasification-ppt.pdfhghhhsjsjhsuxush
PPT
ISS -ESG Data flows What is ESG and HowHow
PPTX
ALIMENTARY AND BILIARY CONDITIONS 3-1.pptx
PPTX
Qualitative Qantitative and Mixed Methods.pptx
PPTX
Introduction-to-Cloud-ComputingFinal.pptx
PDF
Recruitment and Placement PPT.pdfbjfibjdfbjfobj
PPTX
STUDY DESIGN details- Lt Col Maksud (21).pptx
PPTX
Introduction to Firewall Analytics - Interfirewall and Transfirewall.pptx
Fluorescence-microscope_Botany_detailed content
IB Computer Science - Internal Assessment.pptx
Business Ppt On Nestle.pptx huunnnhhgfvu
TRAFFIC-MANAGEMENT-AND-ACCIDENT-INVESTIGATION-WITH-DRIVING-PDF-FILE.pdf
22.Patil - Early prediction of Alzheimer’s disease using convolutional neural...
oil_refinery_comprehensive_20250804084928 (1).pptx
Reliability_Chapter_ presentation 1221.5784
MODULE 8 - DISASTER risk PREPAREDNESS.pptx
Introduction to machine learning and Linear Models
Miokarditis (Inflamasi pada Otot Jantung)
AI Strategy room jwfjksfksfjsjsjsjsjfsjfsj
168300704-gasification-ppt.pdfhghhhsjsjhsuxush
ISS -ESG Data flows What is ESG and HowHow
ALIMENTARY AND BILIARY CONDITIONS 3-1.pptx
Qualitative Qantitative and Mixed Methods.pptx
Introduction-to-Cloud-ComputingFinal.pptx
Recruitment and Placement PPT.pdfbjfibjdfbjfobj
STUDY DESIGN details- Lt Col Maksud (21).pptx
Introduction to Firewall Analytics - Interfirewall and Transfirewall.pptx

APIs de Visualização em Python

  • 1. APIs de Visualização em Python TDC 2014 Wilson Freitas http://aboutwilson.net @aboutwilson
  • 6. seaborn ● Biblioteca de visualização baseada no matplotlib " seaborn is a library for making attractive and informative statistical graphics in Python " ● Boa para análise exploratória de dados ● Fortemente integrada com pandas e numpy
  • 7. " Bringing Matplotlib to the Browser " ● Integra matplotlib com D3js ● Cria visualização de dados interativa no navegador mpld3
  • 8. Vincent " A Python to Vega translator " " Vega is a visualization grammar, a declarative format for creating, saving and sharing visualization designs. " ● Estruturas de dados Python ● Visualização em JavaScript ● Integração com pandas
  • 9. Bokeh ● Biblioteca de visualização interativa ● Integração com pandas ● Foco: explorar a visualização dos navegadores
  • 10. dados: Top 100 Wines of the World 2014
  • 12. Quantos vinhos por País? ggplot(wty, aes("Country")) + geom_bar(fill="red", colour="black") + theme(axis_text_x=element_text(angle=90))
  • 13. Quantos vinhos por País? (com estilo) ggplot(wty, aes("Country")) + geom_bar(fill="red", colour="black") + theme_xkcd() + theme(axis_text_x=element_text(angle=90))
  • 14. Qual a relação entre Pontos e Prêmios? ggplot(wty, aes(x='Prizes', y='Points')) + geom_point()
  • 15. Qual a relação entre Pontos e Prêmios? ggplot(wty, aes(y="Points", x="Prizes")) + geom_point(size=60, shape=6) + stat_smooth(method='lm', colour='red')
  • 16. Qual a relação entre Pontos e Prêmios por País? idx = (wty.Country == 'AUSTRALIA') | (wty.Country == 'FRANCE') | (wty.Country == 'SPAIN') | (wty.Country == 'USA') | (wty.Country == 'ARGENTINA') gplot(wty[idx], aes(x="Prizes", y="Points")) + geom_point() + stat_smooth(method='lm', color='blue') + facet_wrap("Country", scales="fixed")
  • 17. Como se distribuem os prêmios por País? wty_country_prizes = wty['Prizes'].groupby(wty['Country']) idx = wty_country_prizes.count() == 1 wty_t = wty.ix[[c not in idx[idx].index for c in wty.Country], ['Country','Prizes']] wty_t_country_mean = wty_t.groupby(wty_t.Country).mean() country_names = wty_t_country_mean.Prizes.sort(inplace=False).index import seaborn as sb sb.set(style="ticks") f, ax = plt.subplots() sb.offset_spines() sb.violinplot(wty_t['Prizes'], wty_t['Country'], order=country_names) locs, labels = plt.xticks() plt.setp(labels, rotation=45) sb.despine(trim=True)
  • 18. wty_country = wty.groupby('Country') country_count = wty_country['Wine'].aggregate(len) country_count.sort(inplace=True) fig, ax = plt.subplots(figsize=(12,8)) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.spines['left'].set_visible(False) ax.get_xaxis().tick_bottom() ax.get_yaxis().tick_left() plt.subplot(221) g = wty_country['Prizes'].count() x = np.arange(len(g)) countries = list(g.index.values) y = g.values plt.bar(x+1, y, align='center', width=0.5, alpha=0.4) plt.xticks(x+1, [' ']*10, rotation=90) plt.title('Contagem') plt.ylabel('Prizes') plt.subplot(222) g = wty_country['Prizes'].mean() y = g.values plt.bar(x+1, y, align='center', color='red', width=0.5, alpha=0.4) plt.xticks(x+1, [' ']*10, rotation=90); plt.title(u'Média') plt.subplot(223) g = wty_country['Prizes'].max() y = g.values plt.bar(x+1, y, align='center', color='darkgreen', width=0.5, alpha=0.4) plt.xticks(x+1, countries, rotation=90); plt.title(u'Máx') plt.ylabel('Prizes') plt.subplot(224) g = wty_country['Prizes'].min() y = g.values plt.bar(x+1, y, align='center', color='magenta', width=0.5, alpha=0.4) plt.xticks(x+1, countries, rotation=90); plt.title(u'Mín') Mas qual País possui o melhor vinho?
  • 19. >>> mpld3.save_html(fig, 'fig.html') <div id="fig_el672345700473765832459779"></div> <script> function mpld3_load_lib(url, callback){ var s = document.createElement('script'); s.src = url; s.async = true; s.onreadystatechange = s.onload = callback; s.onerror = function(){console.warn("failed to load library " + url);}; document.getElementsByTagName("head")[0].appendChild(s); } if(typeof(mpld3) !== "undefined" && mpld3._mpld3IsLoaded){ // already loaded: just create the figure !function(mpld3){ ... mpld3 salva fig em html
  • 20. import Quandl tickers = [ "GOOG/BVMF_BOVA11.4", "YAHOO/SA_PETR3.6", "GOOG/BVMF_VALE5.4", "GOOG/BVMF_BBDC4.4", "GOOG/BVMF_BBAS3.4", "GOOG/BVMF_ITUB4.4"] series = Quandl.get(tickers, trim_start="2013-01-01", trim_end="2013-12-31") series.columns = [ "BOVESPA", "PETROBRAS", "VALE", "BRADESCO", "BB", "ITAU"] series.dropna(inplace=True) dados: Ações (Quandl) >>> print series BOVESPA PETROBRAS VALE BRADESCO BB ITAU Date 2013-01-02 61.27 19.00 42.60 36.02 25.80 31.3 2013-01-03 61.92 19.70 42.09 38.12 26.31 32.2 2013-01-04 61.32 19.77 41.17 37.45 26.00 31.9 2013-01-07 60.75 19.51 40.35 37.29 26.15 31.5 2013-01-08 59.91 18.96 40.10 37.42 26.45 31.6 2013-01-09 60.25 19.15 40.30 37.90 26.70 32.1 2013-01-10 60.49 19.27 40.57 37.40 26.30 31.7 2013-01-11 60.30 19.32 39.53 37.27 25.98 31.8 2013-01-14 60.75 19.21 39.77 37.88 26.93 32.2 2013-01-15 60.52 19.27 39.65 37.30 26.70 31.8 2013-01-16 60.54 19.34 39.68 37.49 26.86 32.1 2013-01-17 61.01 19.21 39.75 38.11 26.81 32.6 2013-01-18 60.79 19.05 39.20 38.25 26.20 32.7 2013-01-21 60.75 18.87 39.15 38.09 25.93 32.5 2013-01-22 60.55 19.12 39.59 38.40 25.64 32.9 ...
  • 21. Visualização de Séries de Preço import vincent as v line = v.Line(series) line.axis_titles(x='Date', y='Price') line.legend(title='Series')
  • 22. Exporta a Gramática de Gráficos >>> print line.to_json() { "axes": [ { "scale": "x", "title": "Date", "type": "x" }, { "scale": "y", "title": "Price", "type": "y" } ], "data": [ { "name": "table", "values": [ { "col": "BOVESPA", "idx": 1357092000000, "val": 61.27 }, { "col": "PETROBRAS","idx": 1357092000000, "val": 19.0 }, { "col": "VALE", "idx": 1357092000000, "val": 42.6 }, ...
  • 23. Variação acumulada logreturns = np.log(series).diff().dropna(inplace=False) cumreturns = np.cumprod(np.exp(logreturns)) line = v.Line(cumreturns) line.axis_titles(x='Date', y='') line.legend(title='Series')
  • 24. Séries de Preço Empilhadas stacked = v.StackedArea(series) stacked.axis_titles(x='Date', y='Price') stacked.legend(title='Series') stacked.colors(brew='Spectral')
  • 25. Qual a correlação entre séries? # com seaborn sb.corrplot(np.log(series).diff().dropna())
  • 26. Mais no detalhe sb.jointplot("VALE", "BOVESPA", series, kind="kde"); sb.jointplot("VALE", "BOVESPA", series, kind="reg");
  • 27. Rentabilidade de ações ● Qual a rentabilidade de cada ação por mês? ● Como visualizar tudo no mesmo gráfico? ● Como fazer isso higienicamente? ● Vamos considerar 18 ações no ano de 2013
  • 29. from bokeh.plotting import * output_notebook() import numpy as np import Quandl import pandas as pd from collections import OrderedDict tickers = ["GOOG/BVMF_BOVA11.4", "GOOG/BVMF_PETR3.4", "GOOG/BVMF_VALE5.4", 'GOOG/BVMF_BBDC4.4', 'GOOG/BVMF_BBAS3.4', 'GOOG/BVMF_ITUB4.4', 'GOOG/BVMF_USIM5.4', 'GOOG/BVMF_CIEL3. 4', 'GOOG/BVMF_CMIG4.4', 'GOOG/BVMF_CTIP3.4', 'GOOG/BVMF_CPLE6.4', 'GOOG/BVMF_ELET6.4', 'GOOG/BVMF_GGBR4.4', 'GOOG/BVMF_GOLL4.4', 'GOOG/BVMF_HGTX3.4', 'GOOG/BVMF_JBSS3.4', 'GOOG/BVMF_MRFG3.4', 'GOOG/BVMF_PDGR3.4'] series = Quandl.get(tickers, trim_start="2013-01-01", trim_end="2013-12-31", authtoken="nJ1NhTYdEs2p3MsS4CVd") series.columns = ['BOVESPA', 'PETROBRAS', 'VALE', 'BRADESCO', 'BB', 'ITAU', 'USIMINAS', 'CIELO', 'CEMIG', 'CETIP', 'COPEL', 'ELETROPAULO', 'GERDAU', 'GOL', 'HERING', 'JBS', 'MAFRIG', 'PDGR'] series.dropna(inplace=True) logreturns = np.log(series).diff().dropna() months = ["Jan", "Fev", "Mar", "Abr", "Mai", "Jun", "Jul", "Ago", "Set", "Out", "Nov", "Dez"] series_mr = logreturns.resample('M', how=sum)*100 series_mr['Month'] = [months[d.month-1] for d in series_mr.index] series_mr.set_index('Month', inplace=True) months = list(series_mr.index) stocks = list(series_mr.columns) import brewer2mpl bmap = brewer2mpl.get_map('RdYlGn', 'diverging', 3) colors = bmap.hex_colors levels = np.array([0, 1]) month = [] stock = [] color = [] rate = [] for y in stocks: for m in months: month.append(m) stock.append(y) monthly_rate = series_mr[y][m] rate.append(monthly_rate) color.append(colors[sum(levels < monthly_rate)]) source = ColumnDataSource(data=dict(month=month, year=stock, color=color, rate=rate, )) figure() rect('year', 'month', 0.95, 0.95, source=source, x_range=stocks, y_range=list(reversed(months)), color='color', line_color=None, tools="resize,hover,save", title="Rentabilidade de ações", plot_width=300, plot_height=400) grid().grid_line_color = None axis().axis_line_color = None axis().major_tick_line_color = None axis().major_label_text_font_size = "5pt" axis().major_label_standoff = 0 xaxis().location = "top" xaxis().major_label_orientation = np.pi/3 from bokeh.objects import HoverTool hover = [t for t in curplot().tools if isinstance(t, HoverTool)][0] hover.tooltips = OrderedDict([('date', '@month'), ('rate', '@rate'),]) show() Rentabilidade de ações Esse gráfico já não é tão simples! ;-)
  • 30. Referências ● seaborn (github) ● vincent (github) ○ Trifacta - vega ● ggplot (github) ○ ggplot tutorial ○ ggplot2 no R ● AboutWilson.net: Refinando o estilo do matplotlib ● AboutWilson.net: Refinando o estilo do matplotlib com seaborn ● EuroPython 2014: Combining the powerful worlds of Python and R ● EuroPython 2014: Scientific Visualization with GR ● bokeh (github) ● mpld3 (github) ● prettyplotlib (github) ● plot.ly ● Quandl ● brewer2mpl (github)