This is the code that we start with.
import polars as pl
# First "read" in the data.
df = pl.read_csv("wowah_data.csv", parse_dates=False)
df.columns = [c.replace(" ", "") for c in df.columns]
df = df.lazy()
def set_types(dataf):
return (dataf.with_columns([
pl.col("guild") != -1,
pl.col("timestamp").str.strptime(pl.Datetime, fmt="%m/%d/%y %H:%M:%S"),
]))
# We can re-use this function in a pipeline.
df.pipe(set_types).collect()
Sorting via .sort
Polars can also sort the dataframe. Let's sort the dataset.
df.pipe(set_types).sort(["char", "timestamp"]).collect()
Create a subset with .filter
We can also limit our view by creating a subset. We can use the filter method for that.
(df
.filter(pl.col("char") == 21)
.pipe(set_types)
.sort(["char", "timestamp"])
.collect())