%matplotlib inline

import pandas as pd
from collections import OrderedDict
import matplotlib.pyplot as plt
import matplotlib.patches as patches

import seaborn as sns
sns.set_style('whitegrid')
# start with some dummy data
df = pd.DataFrame({
    'date':['2019-08-01','2019-09-01'],
    'website_traffic':[530771,558652],
    'new_accounts':[15720,15900],
    'account_verify':[12670,13884],
    'product_activated':[10812,12909],
    'paid_conversion':[654,908],
    'some_other_metric':[13270,13678]
})
df
date website_traffic new_accounts account_verify product_activated paid_conversion some_other_metric
0 2019-08-01 530771 15720 12670 10812 654 13270
1 2019-09-01 558652 15900 13884 12909 908 13678
df['date'] = pd.to_datetime(df['date'],format='%Y-%m-%d')
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2 entries, 0 to 1
Data columns (total 7 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   date               2 non-null      datetime64[ns]
 1   website_traffic    2 non-null      int64         
 2   new_accounts       2 non-null      int64         
 3   account_verify     2 non-null      int64         
 4   product_activated  2 non-null      int64         
 5   paid_conversion    2 non-null      int64         
 6   some_other_metric  2 non-null      int64         
dtypes: datetime64[ns](1), int64(6)
memory usage: 240.0 bytes
# define only the KPIs we want to see (since there are cases where other metrics are present in the dataframe)
all_kpis = ['website_traffic','new_accounts','account_verify','product_activated','paid_conversion']
all_names = tuple([x.replace('_','\n').title() for x in all_kpis])
def get_it_in_order(df,mask,month):
    """
    Biggest assumption: there is a 'date' column (not index!) in the dataframe that is *literally named* 'date'
    Accepts entire dataframe
    Supply a column mask
    Supply a list of column names
    """
    kpis = OrderedDict()
    frame_2_dict = df.set_index('date').loc[month,mask].to_dict()
    for i in mask:
        kpis[i] = frame_2_dict[i]
    return kpis
def add_arrow(x_adjust,y_adjust,ratio):
    bbox_props = dict(boxstyle="rarrow,pad=0.5", facecolor="grey", edgecolor="grey", alpha=0.75, lw=1)
    return plt.text(x_adjust,y_adjust,'{0:.1f}%'.format(ratio * 100),
            verticalalignment='center',
            horizontalalignment='left',
            fontsize=16,
            weight='black',
            color='white',
            bbox=bbox_props)
def plot_acquisition_funnel(df, month, names, metrics):
    data = get_it_in_order(df, metrics, month)
    count = range(len(data))
    fig, ax = plt.subplots(figsize=(10, 6))
    rects = ax.bar(count, data.values(), width=.5, align='center', color=sns.color_palette("Blues", len(data)))
    plt.xticks(count, names)
    plt.ylim(bottom=0, top=data[metrics[1]]*2)
    plt.title('{0} Acquisition Funnel'.format(month), fontsize=16)
    plt.tick_params(labelsize=16)
    add_arrow(0.27, data[metrics[3]]/1.7, data[metrics[1]]/data[metrics[0]])
    add_arrow(1.27, data[metrics[3]]/1.7, data[metrics[2]]/data[metrics[1]])
    add_arrow(2.27, data[metrics[3]]/1.7, data[metrics[3]]/data[metrics[2]])
    add_arrow(3.27, data[metrics[3]]/1.7, data[metrics[4]]/data[metrics[3]])
    for rect, val in zip(rects,data.values()):
        height = rect.get_height()
        if height > 250:
            label_color = 'grey'
        else:
            label_color = 'white'
        ax.text(rect.get_x() + rect.get_width()/2,250,'{:,}'.format(val),ha='center', va='bottom',fontsize=12,weight='bold',color=label_color)
    plt.grid(False)
    plt.show()
plot_acquisition_funnel(df,'2019-08-01',all_names,all_kpis)