strompreis/transformators/transformator.py

40 lines
1.1 KiB
Python

"""
Data transformation logic for price and weather data.
"""
from datetime import timedelta
import polars as pl
def transform_weather(df: pl.DataFrame) -> pl.DataFrame:
"""
Cleans and standardizes raw weather data.
"""
if df.is_empty():
return pl.DataFrame()
relevant_cols = [
"timestamp", "temperature", "wind_speed", "solar",
"sunshine", "cloud_cover", "precipitation"
]
# Filter for existing relevant columns
cols_to_keep = [c for c in relevant_cols if c in df.columns]
return df.select(cols_to_keep).with_columns(
pl.col("timestamp")
.str.to_datetime(format="%Y-%m-%dT%H:%M:%S%z", time_zone="UTC")
.cast(pl.Datetime("ms", time_zone="UTC"))
).sort("timestamp")
def transform_prices(df: pl.DataFrame) -> pl.DataFrame:
"""
Transforms raw SMARD timestamps to UTC Datetime.
"""
if df.is_empty():
return pl.DataFrame()
return df.with_columns(
pl.col("timestamp")
.cast(pl.Datetime("ms"))
.dt.replace_time_zone("UTC")
).sort("timestamp")