40 lines
1.1 KiB
Python
40 lines
1.1 KiB
Python
"""
|
|
Data transformation logic for price and weather data.
|
|
"""
|
|
|
|
from datetime import timedelta
|
|
import polars as pl
|
|
|
|
def transform_weather(df: pl.DataFrame) -> pl.DataFrame:
|
|
"""
|
|
Cleans and standardizes raw weather data.
|
|
"""
|
|
if df.is_empty():
|
|
return pl.DataFrame()
|
|
|
|
relevant_cols = [
|
|
"timestamp", "temperature", "wind_speed", "solar",
|
|
"sunshine", "cloud_cover", "precipitation"
|
|
]
|
|
|
|
# Filter for existing relevant columns
|
|
cols_to_keep = [c for c in relevant_cols if c in df.columns]
|
|
|
|
return df.select(cols_to_keep).with_columns(
|
|
pl.col("timestamp")
|
|
.str.to_datetime(format="%Y-%m-%dT%H:%M:%S%z", time_zone="UTC")
|
|
.cast(pl.Datetime("ms", time_zone="UTC"))
|
|
).sort("timestamp")
|
|
|
|
def transform_prices(df: pl.DataFrame) -> pl.DataFrame:
|
|
"""
|
|
Transforms raw SMARD timestamps to UTC Datetime.
|
|
"""
|
|
if df.is_empty():
|
|
return pl.DataFrame()
|
|
|
|
return df.with_columns(
|
|
pl.col("timestamp")
|
|
.cast(pl.Datetime("ms"))
|
|
.dt.replace_time_zone("UTC")
|
|
).sort("timestamp")
|