refactor: modernize project and improve error handling and documentation
This commit is contained in:
parent
e27bf2a098
commit
d87d0cce11
17 changed files with 1793 additions and 188 deletions
|
|
@ -1,76 +1,40 @@
|
|||
"""
|
||||
Data transformation logic for price and weather data.
|
||||
"""
|
||||
|
||||
from datetime import timedelta
|
||||
import polars as pl
|
||||
|
||||
def transform_weather(df: pl.DataFrame) -> pl.DataFrame:
|
||||
"""
|
||||
Cleans and standardizes raw weather data.
|
||||
"""
|
||||
if df.is_empty():
|
||||
return pl.DataFrame()
|
||||
|
||||
relevant_cols = [
|
||||
"timestamp", "temperature", "wind_speed", "solar",
|
||||
"sunshine", "cloud_cover", "precipitation"
|
||||
]
|
||||
|
||||
# Filter for existing relevant columns
|
||||
cols_to_keep = [c for c in relevant_cols if c in df.columns]
|
||||
|
||||
return df.select(cols_to_keep).with_columns(
|
||||
pl.col("timestamp")
|
||||
.str.to_datetime(format="%Y-%m-%dT%H:%M:%S%z", time_zone="UTC")
|
||||
.cast(pl.Datetime("ms", time_zone="UTC"))
|
||||
).sort("timestamp")
|
||||
|
||||
def transform_prices(df: pl.DataFrame) -> pl.DataFrame:
|
||||
"""
|
||||
Transforms raw SMARD timestamps to UTC Datetime.
|
||||
"""
|
||||
if df.is_empty():
|
||||
return pl.DataFrame()
|
||||
|
||||
return df.with_columns(
|
||||
[
|
||||
pl.col("timestamp")
|
||||
.cast(pl.Datetime("ms"))
|
||||
.dt.replace_time_zone("UTC")
|
||||
.alias("timestamp")
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def transform_weather(df: pl.DataFrame) -> pl.DataFrame:
|
||||
return df.with_columns(
|
||||
[
|
||||
pl.col("timestamp")
|
||||
.str.to_datetime(
|
||||
format="%Y-%m-%dT%H:%M:%S%z",
|
||||
time_zone="UTC",
|
||||
)
|
||||
.cast(pl.Datetime("ms", time_zone="UTC"))
|
||||
.alias("timestamp")
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def join_dataframes(df_prices: pl.DataFrame, df_weather: pl.DataFrame) -> pl.DataFrame:
|
||||
return df_prices.join(df_weather, on="timestamp", how="inner").select(
|
||||
[
|
||||
pl.col("timestamp"),
|
||||
pl.col("price"),
|
||||
pl.col(
|
||||
[
|
||||
"temperature",
|
||||
"wind_speed",
|
||||
"solar",
|
||||
"sunshine",
|
||||
"cloud_cover",
|
||||
"precipitation",
|
||||
]
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
#
|
||||
#
|
||||
# def join_dataframes(df_prices: pl.DataFrame, df_weather: pl.DataFrame) -> pl.DataFrame:
|
||||
# return (
|
||||
# df_prices.sort("timestamp") # ← Pflicht!
|
||||
# .join_asof(
|
||||
# df_weather.sort("timestamp"), # ← Pflicht!
|
||||
# on="timestamp",
|
||||
# strategy="nearest", # ← nearest statt backward!
|
||||
# tolerance=timedelta(hours=1),
|
||||
# )
|
||||
# .select(
|
||||
# [
|
||||
# pl.col("timestamp"),
|
||||
# pl.col("price"),
|
||||
# pl.col(
|
||||
# [
|
||||
# "temperature",
|
||||
# "wind_speed",
|
||||
# "solar",
|
||||
# "sunshine",
|
||||
# "cloud_cover",
|
||||
# "precipitation",
|
||||
# ]
|
||||
# ),
|
||||
# ]
|
||||
# )
|
||||
# )
|
||||
pl.col("timestamp")
|
||||
.cast(pl.Datetime("ms"))
|
||||
.dt.replace_time_zone("UTC")
|
||||
).sort("timestamp")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue