Create Waterfall chart
Tags: #matplotlib #chart #warterfall #dataviz #snippet
Author: Jeremy Ravenel

Input

Import library

1
import numpy as np
2
import pandas as pd
3
import matplotlib.pyplot as plt
4
from matplotlib.ticker import FuncFormatter
Copied!

Model

Create the waterfall chart

1
#Use python 2.7+ syntax to format currency
2
def money(x, pos):
3
'The two args are the value and tick position'
4
return "${:,.0f}".format(x)
5
formatter = FuncFormatter(money)
6
7
#Data to plot. Do not include a total, it will be calculated
8
index = ['sales','returns','credit fees','rebates','late charges','shipping']
9
data = {'amount': [350000,-30000,-7500,-25000,95000,-7000]}
10
11
#Store data and create a blank series to use for the waterfall
12
trans = pd.DataFrame(data=data,index=index)
13
blank = trans.amount.cumsum().shift(1).fillna(0)
14
15
#Get the net total number for the final element in the waterfall
16
total = trans.sum().amount
17
trans.loc["net"]= total
18
blank.loc["net"] = total
19
20
#The steps graphically show the levels as well as used for label placement
21
step = blank.reset_index(drop=True).repeat(3).shift(-1)
22
step[1::3] = np.nan
23
24
#When plotting the last element, we want to show the full bar,
25
#Set the blank to 0
26
blank.loc["net"] = 0
27
28
#Plot and label
29
my_plot = trans.plot(kind='bar', stacked=True, bottom=blank,legend=None, figsize=(10, 5), title="2014 Sales Waterfall")
30
my_plot.plot(step.index, step.values,'k')
31
my_plot.set_xlabel("Transaction Types")
32
33
#Format the axis for dollars
34
my_plot.yaxis.set_major_formatter(formatter)
35
36
#Get the y-axis position for the labels
37
y_height = trans.amount.cumsum().shift(1).fillna(0)
38
39
#Get an offset so labels don't sit right on top of the bar
40
max = trans.max()
41
neg_offset = max / 25
42
pos_offset = max / 50
43
plot_offset = int(max / 15)
44
45
#Start label loop
46
loop = 0
47
for index, row in trans.iterrows():
48
# For the last item in the list, we don't want to double count
49
if row['amount'] == total:
50
y = y_height[loop]
51
else:
52
y = y_height[loop] + row['amount']
53
# Determine if we want a neg or pos offset
54
if row['amount'] > 0:
55
y += pos_offset
56
else:
57
y -= neg_offset
58
my_plot.annotate("{:,.0f}".format(row['amount']),(loop,y),ha="center")
59
loop+=1
Copied!

Output

Display result

1
#Scale up the y axis so there is room for the labels
2
my_plot.set_ylim(0,blank.max()+int(plot_offset))
3
#Rotate the labels
4
my_plot.set_xticklabels(trans.index,rotation=0)
5
my_plot.get_figure().savefig("waterfall.png",dpi=200,bbox_inches='tight')
Copied!
Copy link
Edit on GitHub