76 lines
2 KiB
Python
76 lines
2 KiB
Python
from datetime import timedelta
|
|
import polars as pl
|
|
|
|
|
|
def transform_prices(df: pl.DataFrame) -> pl.DataFrame:
|
|
return df.with_columns(
|
|
[
|
|
pl.col("timestamp")
|
|
.cast(pl.Datetime("ms"))
|
|
.dt.replace_time_zone("UTC")
|
|
.alias("timestamp")
|
|
]
|
|
)
|
|
|
|
|
|
def transform_weather(df: pl.DataFrame) -> pl.DataFrame:
|
|
return df.with_columns(
|
|
[
|
|
pl.col("timestamp")
|
|
.str.to_datetime(
|
|
format="%Y-%m-%dT%H:%M:%S%z",
|
|
time_zone="UTC",
|
|
)
|
|
.cast(pl.Datetime("ms", time_zone="UTC"))
|
|
.alias("timestamp")
|
|
]
|
|
)
|
|
|
|
|
|
def join_dataframes(df_prices: pl.DataFrame, df_weather: pl.DataFrame) -> pl.DataFrame:
|
|
return df_prices.join(df_weather, on="timestamp", how="inner").select(
|
|
[
|
|
pl.col("timestamp"),
|
|
pl.col("price"),
|
|
pl.col(
|
|
[
|
|
"temperature",
|
|
"wind_speed",
|
|
"solar",
|
|
"sunshine",
|
|
"cloud_cover",
|
|
"precipitation",
|
|
]
|
|
),
|
|
]
|
|
)
|
|
|
|
|
|
#
|
|
#
|
|
# def join_dataframes(df_prices: pl.DataFrame, df_weather: pl.DataFrame) -> pl.DataFrame:
|
|
# return (
|
|
# df_prices.sort("timestamp") # ← Pflicht!
|
|
# .join_asof(
|
|
# df_weather.sort("timestamp"), # ← Pflicht!
|
|
# on="timestamp",
|
|
# strategy="nearest", # ← nearest statt backward!
|
|
# tolerance=timedelta(hours=1),
|
|
# )
|
|
# .select(
|
|
# [
|
|
# pl.col("timestamp"),
|
|
# pl.col("price"),
|
|
# pl.col(
|
|
# [
|
|
# "temperature",
|
|
# "wind_speed",
|
|
# "solar",
|
|
# "sunshine",
|
|
# "cloud_cover",
|
|
# "precipitation",
|
|
# ]
|
|
# ),
|
|
# ]
|
|
# )
|
|
# )
|