The python code in the beginning of this notebook is;
import patsy as ps
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
from sklearn.linear_model import LinearRegression
df = pd.read_csv("birthdays.csv")
def clean_data(dataf):
return (dataf
.drop(columns=['Unnamed: 0'])
.assign(date = lambda d: pd.to_datetime(d['date']))
.groupby(['date', 'wday', 'month'])
.agg(n_born=('births', 'sum'))
.reset_index()
.assign(yday = lambda d: d['date'].dt.dayofyear))
df_clean = df.pipe(clean_data)
To run the patsy code, run;
import patsy as ps
y, X = ps.dmatrices("n_born ~ wday + yday", df_clean)
print(X[:5])