To use scikit-lego you'll need to install it first;
pip install scikit-lego
You can now use it in the pipeline.
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklego.preprocessing import PatsyTransformer
import matplotlib.pylab as plt
X = (df_clean
.head(2000)
.loc[lambda d: d['n_born'] > 2000]
.assign(num_date = lambda d: date_to_num(d['date'])))
y = X['n_born']
pipe = Pipeline([
("patsy", PatsyTransformer("(cc(yday, df=12) + wday + num_date)**2")),
("scale", StandardScaler()),
("model", LinearRegression())
])
np.mean(np.abs(pipe.fit(X, y).predict(X) - y))
Feel free to check out the documentation for more information about the patsy component.