from datetime import timedelta import polars as pl def transform_prices(df: pl.DataFrame) -> pl.DataFrame: return df.with_columns( [ pl.col("timestamp") .cast(pl.Datetime("ms")) .dt.replace_time_zone("UTC") .alias("timestamp") ] ) def transform_weather(df: pl.DataFrame) -> pl.DataFrame: return df.with_columns( [ pl.col("timestamp") .str.to_datetime( format="%Y-%m-%dT%H:%M:%S%z", time_zone="UTC", ) .cast(pl.Datetime("ms", time_zone="UTC")) .alias("timestamp") ] ) def join_dataframes(df_prices: pl.DataFrame, df_weather: pl.DataFrame) -> pl.DataFrame: return df_prices.join(df_weather, on="timestamp", how="inner").select( [ pl.col("timestamp"), pl.col("price"), pl.col( [ "temperature", "wind_speed", "solar", "sunshine", "cloud_cover", "precipitation", ] ), ] ) # # # def join_dataframes(df_prices: pl.DataFrame, df_weather: pl.DataFrame) -> pl.DataFrame: # return ( # df_prices.sort("timestamp") # ← Pflicht! # .join_asof( # df_weather.sort("timestamp"), # ← Pflicht! # on="timestamp", # strategy="nearest", # ← nearest statt backward! # tolerance=timedelta(hours=1), # ) # .select( # [ # pl.col("timestamp"), # pl.col("price"), # pl.col( # [ # "temperature", # "wind_speed", # "solar", # "sunshine", # "cloud_cover", # "precipitation", # ] # ), # ] # ) # )