strompreis/transformators/transformator.py

76 lines
2 KiB
Python

from datetime import timedelta
import polars as pl
def transform_prices(df: pl.DataFrame) -> pl.DataFrame:
return df.with_columns(
[
pl.col("timestamp")
.cast(pl.Datetime("ms"))
.dt.replace_time_zone("UTC")
.alias("timestamp")
]
)
def transform_weather(df: pl.DataFrame) -> pl.DataFrame:
return df.with_columns(
[
pl.col("timestamp")
.str.to_datetime(
format="%Y-%m-%dT%H:%M:%S%z",
time_zone="UTC",
)
.cast(pl.Datetime("ms", time_zone="UTC"))
.alias("timestamp")
]
)
def join_dataframes(df_prices: pl.DataFrame, df_weather: pl.DataFrame) -> pl.DataFrame:
return df_prices.join(df_weather, on="timestamp", how="inner").select(
[
pl.col("timestamp"),
pl.col("price"),
pl.col(
[
"temperature",
"wind_speed",
"solar",
"sunshine",
"cloud_cover",
"precipitation",
]
),
]
)
#
#
# def join_dataframes(df_prices: pl.DataFrame, df_weather: pl.DataFrame) -> pl.DataFrame:
# return (
# df_prices.sort("timestamp") # ← Pflicht!
# .join_asof(
# df_weather.sort("timestamp"), # ← Pflicht!
# on="timestamp",
# strategy="nearest", # ← nearest statt backward!
# tolerance=timedelta(hours=1),
# )
# .select(
# [
# pl.col("timestamp"),
# pl.col("price"),
# pl.col(
# [
# "temperature",
# "wind_speed",
# "solar",
# "sunshine",
# "cloud_cover",
# "precipitation",
# ]
# ),
# ]
# )
# )