scikit prep: predict

# Calmcode - scikit prep: Predict

Here's all the code you need to repeat the exercise.

``````import numpy as np
import pandas as pd
import matplotlib.pylab as plt

from sklearn.preprocessing import StandardScaler, QuantileTransformer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline

X = df[['x', 'y']].values
y = df['z'] == "a"
``````

This is the code that handles all the plotting.

``````def plot_output(scaler):
pipe = Pipeline([
("scale", scaler),
("model", KNeighborsClassifier(n_neighbors=20, weights='distance'))
])

pred = pipe.fit(X, y).predict(X)

plt.figure(figsize=(9, 3))
plt.subplot(131)
plt.scatter(X[:, 0], X[:, 1], c=y)
plt.title("Original Data")
plt.subplot(132)
X_tfm = scaler.transform(X)
plt.scatter(X_tfm[:, 0], X_tfm[:, 1], c=y)
plt.title("Transformed Data")
plt.subplot(133)
X_new = np.concatenate([
np.random.uniform(0, X[:, 0].max(), (5000, 1)),
np.random.uniform(0, X[:, 1].max(), (5000, 1))
], axis=1)
y_proba = pipe.predict_proba(X_new)
plt.scatter(X_new[:, 0], X_new[:, 1], c=y_proba[:, 1], alpha=0.7)
plt.title("Predicted Data")
``````

You can see the effect of both scalers by running;

``````plot_output(scaler=QuantileTransformer(n_quantiles=100))
plot_output(scaler=StandardScaler())
``````