bokeh 1st: fundamentals python data visualization

Let’s dive into the simple but powerful Bokeh—-create sophisticated D3.js like graphs with few Python codes!

myscatter.py example

 

bokeh_new

What are Glyphs

  • Visual shapes
    • circles, squares, triangles
    • rectangles, lines, wedges
  • with properties attached to data
    • coordinates (x,y)
    • size, color, transparency

Glyphs properties

  • lists, arrays, sequences of values
  • single fixed values
In [1]:
from bokeh.io import output_file, output_notebook, show
from bokeh.plotting import figure
output_notebook()
Loading BokehJS ...
In [2]:
plot = figure(plot_width=300,plot_height=300, tools='pan, box_zoom')
plot.circle([1,2,3,4,5],[4,2,5,2.7,5,3,7],size=7,color='red')
show(plot)
In [3]:
# single fixed values
plot=figure(plot_width=300,plot_height=300)
plot.circle_cross(x=10, y=[2,5,8,12],size=[10,20,30,40],alpha=.4)
show(plot)

how to desplay html

<div align='left'></div>
Markers
asterisk() circle() circle_cross() circle_x() cross() diamond() diamond_cross()
inverted_triangle() square() square_cross() xquare_x() trangle() x()
In [4]:
import numpy as np
In [5]:
N=777
x_ran=np.random.random(size=N)*100
y_ran=np.random.random(size=N)*100
radii = np.random.random(size=N) * 17
TOOLS="hover,crosshair,pan,wheel_zoom,box_zoom,undo,redo,reset,tap,save,box_select,poly_select,lasso_select,"
colors = [
    "#%02x%02x%02x" % (int(r), int(g), 150) for r, g in zip(50+2*x_ran, 30+2*y_ran)
]
In [6]:
p = figure(x_axis_label='test between 1 and 10 ', y_axis_label='test value between 1 and 100',\
           plot_width=500,plot_height=500, tools=TOOLS)
p.diamond_cross(x_ran, y_ran, size=radii, color=colors,alpha=.3)

show(p)
# output_file('file_to_save.html')
In [7]:
p2 = figure(x_axis_label='test2x ', y_axis_label='test2y',\
           plot_width=500,plot_height=500)
p2.circle(np.random.random(size=100),np.random.random(size=200),color='red',size=6,alpha=.5)
p2.x(np.random.random(size=200),np.random.random(size=100),color='silver',size=15)
show(p2)

draw lines

In [8]:
p = figure(x_axis_label='line_x ', y_axis_label='line_y',\
           plot_width=500,plot_height=500)
p.line([1,2,3,4,5,6,7],np.random.random(size=20)*1.5,color='purple',line_width=3,alpha=.5)
p.line([2,4,6,8],np.random.random(size=10),color='gold',line_width=2)
show(p)

Patches

  • list of lists
    • one list of lists is the for X coordinate
    • the other is for the Y coordinate
      • each sublist contains the X or Y coordinate for one patch
      • the corresponding sublist for each individual patch must match in length
      • the sequence of each dot is clock-wise
In [9]:
xs=[[1,1,3,1.5],[2,3,7],[2.5,2.5,4.5,4.5]]
ys=[[2,5,5,3],[3,5,5],[2,4,4,2.5]]
plot=figure(x_axis_label='X_axis ', y_axis_label='Y_axis',\
           plot_width=350,plot_height=350)
plot.patches(xs,ys,fill_color=['red','blue','green'],line_color='silver', alpha=.6)
show(plot)
In [10]:
from yahoo_finance import Share
apple=Share('BTE')
In [11]:
data=apple.get_historical('2012-01-01','2017-01-01')
In [12]:
print type(data),len(data)
print data[0]
<type 'list'> 1258
{'Volume': '1722400', 'Symbol': 'BTE', 'Adj_Close': '4.88', 'High': '4.98', 'Low': '4.86', 'Date': '2016-12-30', 'Close': '4.88', 'Open': '4.91'}
  • convert string to datatime
In [13]:
import pandas as pd
df=pd.DataFrame(data)
df.Date=pd.to_datetime(df.Date)
df.index=df.Date
print df.info()
df.head()
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1258 entries, 2016-12-30 to 2012-01-03
Data columns (total 8 columns):
Adj_Close    1258 non-null object
Close        1258 non-null object
Date         1258 non-null datetime64[ns]
High         1258 non-null object
Low          1258 non-null object
Open         1258 non-null object
Symbol       1258 non-null object
Volume       1258 non-null object
dtypes: datetime64[ns](1), object(7)
memory usage: 88.5+ KB
None
Out[13]:
Adj_Close Close Date High Low Open Symbol Volume
Date
2016-12-30 4.88 4.88 2016-12-30 4.98 4.86 4.91 BTE 1722400
2016-12-29 4.92 4.92 2016-12-29 4.98 4.89 4.98 BTE 1657900
2016-12-28 4.99 4.99 2016-12-28 5.05 4.91 4.95 BTE 2300800
2016-12-27 4.96 4.96 2016-12-27 4.97 4.87 4.89 BTE 1452800
2016-12-23 4.82 4.82 2016-12-23 4.90 4.79 4.86 BTE 2243500
In [14]:
indexes,values = df.resample('M')['Close'].max().index,df.resample('M')['Close'].max().values
df.resample('M')['Close'].max().head()
Out[14]:
Date
2012-01-31        58.73
2012-02-29        58.50
2012-03-31    58.279999
2012-04-30        53.66
2012-05-31    53.959999
Freq: M, Name: Close, dtype: object
  • pass x_axis_type="datetime" to figure()
In [15]:
plot=figure(x_axis_label='Date', y_axis_label='Price',\
           plot_width=750,plot_height=400, x_axis_type="datetime")
plot.line(df.Date, df.Close,line_width=1.5,color='red',alpha=.5)
plot.circle(indexes, values, fill_color='silver',size=7,alpha=.5)
show(plot)

use Numpy Arrays

In [16]:
import numpy as np

numpy.linspace

  • linspace(start,end,count)
  • if you want each step equals to (end-start)/N, you should set N=count+1
In [17]:
# for example, I want each step = 2, so N=(100-0)/2+1 = 51
np.linspace(0,100,51)
Out[17]:
array([   0.,    2.,    4.,    6.,    8.,   10.,   12.,   14.,   16.,
         18.,   20.,   22.,   24.,   26.,   28.,   30.,   32.,   34.,
         36.,   38.,   40.,   42.,   44.,   46.,   48.,   50.,   52.,
         54.,   56.,   58.,   60.,   62.,   64.,   66.,   68.,   70.,
         72.,   74.,   76.,   78.,   80.,   82.,   84.,   86.,   88.,
         90.,   92.,   94.,   96.,   98.,  100.])
In [18]:
x = np.linspace(0,10,1001)
y = np.sin(x)+np.random.random(1001)*0.2
plot = figure(plot_width=400,plot_height=200)
plot.circle(np.linspace(0,10,51),np.cos(np.linspace(0,10,51)),color='red',alpha=.5,size=7)
plot.line(x,y,line_width=1.5,alpha=.5,color='green')
show(plot)

use Pandas

In [19]:
import pandas as pd
In [20]:
from bokeh.sampledata.iris import flowers
print type(flowers)
flowers.head()
<class 'pandas.core.frame.DataFrame'>
Out[20]:
sepal_length sepal_width petal_length petal_width species
0 5.1 3.5 1.4 0.2 setosa
1 4.9 3.0 1.4 0.2 setosa
2 4.7 3.2 1.3 0.2 setosa
3 4.6 3.1 1.5 0.2 setosa
4 5.0 3.6 1.4 0.2 setosa
In [21]:
p = figure(plot_width=300,plot_height=300)
p.circle(flowers.petal_length, flowers['sepal_length'],size=5,alpha=.6)
show(p)

Column Data Sourse

In [22]:
from bokeh.models import ColumnDataSource
In [23]:
ex1=ColumnDataSource(data={'x':[1,2,3,4,5],'y':[7,5,6,2,7]})
ex1.data
Out[23]:
{'x': [1, 2, 3, 4, 5], 'y': [7, 5, 6, 2, 7]}
In [24]:
p = figure(plot_width=300,plot_height=300)
p.line('x','y',source=ex1,line_width=3,color='orange',alpha=.7)
show(p)
In [25]:
ex2=ColumnDataSource(df)
df.head(1)
Out[25]:
Adj_Close Close Date High Low Open Symbol Volume
Date
2016-12-30 4.88 4.88 2016-12-30 4.98 4.86 4.91 BTE 1722400
In [26]:
plot=figure(x_axis_label='Date', y_axis_label='Price',\
           plot_width=750,plot_height=400, x_axis_type="datetime")
plot.line(x='Date', y='Close',source=ex2, line_width=1.5,color='cyan',alpha=.4)
show(plot)

Customizing Glyphs

  • figure(tools='a,b,c,...')
    • tools
      • box_select, lasso_select
    • plot=figure(...)
      • plot.circle(...,
        selection_color='red',
        nonselection_fill_alpha=0.2,
        nonselection_fill_color='grey',...)
In [27]:
p = figure(plot_width=400,plot_height=400,tools='box_select, lasso_select')
p.circle(flowers.petal_length, flowers['sepal_length'],size=5,alpha=.6,\
        selection_color='red',nonselection_fill_alpha=.2,nonselection_fill_color='grey')
show(p)

Hover appearance

In [28]:
from bokeh.models import HoverTool
In [29]:
H = HoverTool(tooltips=None, mode='vline')
In [30]:
p = figure(plot_width=400,plot_height=400,tools=[H,'crosshair'])
p.circle(flowers.petal_length, flowers['sepal_length'],size=10,alpha=.6,\
        hover_color='green')
show(p)

color mapping

In [31]:
colormap = {'setosa': 'red', 'versicolor': 'green', 'virginica': 'blue'}
In [32]:
flowers.species.value_counts()
Out[32]:
setosa        50
versicolor    50
virginica     50
Name: species, dtype: int64
In [33]:
flow=ColumnDataSource(flowers)
flowers.head(1)
Out[33]:
sepal_length sepal_width petal_length petal_width species
0 5.1 3.5 1.4 0.2 setosa
In [34]:
colors = [colormap[x] for x in flowers['species']]
len(colors)
Out[34]:
150
In [36]:
plot=figure(plot_width=400,plot_height=300,title='assign title')
plot.xaxis.axis_label='X_lable'
plot.yaxis.axis_label='Y_lable'

plot.circle('petal_length','sepal_length',source=flow,\
            color=colors,size=10,alpha=.5)
show(plot)
In [ ]:
 
In [ ]:
 

Leave a Reply

Your email address will not be published.