The code below also contains code from the previous video, but it should give you the correct chart. Be mindful that the simulation might take a while.
import pandas as pd
df = pd.read_csv("birthdays.csv")
plot_df = (df
.assign(date = lambda d: pd.to_datetime(d['date']))
.assign(day_of_year = lambda d: d['date'].dt.dayofyear)
.groupby('day_of_year')
.agg(n_births=('births', 'sum'))
.assign(p = lambda d: d['n_births']/d['n_births'].sum()))
def sim_real_once(room = 20):
r = np.random.choice(probabilities.index, p=probabilities, size=room)
return np.unique(r).shape[0] != room
def simulate_real(room = 20, n_sim = 1000):
return np.mean([sim_real_once(room) for _ in range(n_sim)])
plt.plot([calculate(r) for r in range(1, 35)], label="calculated")
plt.plot([simulate_real(room = i, n_sim=10_000) for i in range(1, 35)], label="simulated_real")
plt.legend();