Arguments in Pandas `pipe`-lines

Arguments in Pandas `pipe`-lines

Pandas pipe-lines allow you to pass arguments to their internal functions. This adds a whole lot of flexibility.

This is the final pipeline from the video.

import pandas as pd

df = pd.read_csv('')

def set_dtypes(dataf):
    return (dataf
            .assign(date=lambda d: pd.to_datetime(d['date']))
            .sort_values(['currency_code', 'date']))

def remove_outliers(dataf, min_row_country=32):
    countries = (dataf
                .agg(n=('name', 'count'))
                .loc[lambda d: d['n'] >= min_row_country]
    return (dataf
            .loc[lambda d: d['currency_code'].isin(countries)])

  .pipe(remove_outliers, min_row_country=32))