Here's all the code you need to repeat the exercise.
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
from sklearn.preprocessing import StandardScaler, QuantileTransformer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline
df = pd.read_csv("drawndata1.csv")
X = df[['x', 'y']].values
y = df['z'] == "a"
This is the code that handles all the plotting.
def plot_output(scaler):
pipe = Pipeline([
("scale", scaler),
("model", KNeighborsClassifier(n_neighbors=20, weights='distance'))
])
pred = pipe.fit(X, y).predict(X)
plt.figure(figsize=(9, 3))
plt.subplot(131)
plt.scatter(X[:, 0], X[:, 1], c=y)
plt.title("Original Data")
plt.subplot(132)
X_tfm = scaler.transform(X)
plt.scatter(X_tfm[:, 0], X_tfm[:, 1], c=y)
plt.title("Transformed Data")
plt.subplot(133)
X_new = np.concatenate([
np.random.uniform(0, X[:, 0].max(), (5000, 1)),
np.random.uniform(0, X[:, 1].max(), (5000, 1))
], axis=1)
y_proba = pipe.predict_proba(X_new)
plt.scatter(X_new[:, 0], X_new[:, 1], c=y_proba[:, 1], alpha=0.7)
plt.title("Predicted Data")
You can see the effect of both scalers by running;
plot_output(scaler=QuantileTransformer(n_quantiles=100))
plot_output(scaler=StandardScaler())