refactor: seperate system into single Dockerfiles

This commit is contained in:
Patryk Hegenberg 2026-02-18 12:47:22 +01:00
parent ad87f702f1
commit ed803a2ca5
26 changed files with 238 additions and 85 deletions

View file

View file

@ -1,15 +1,8 @@
import sys
from pathlib import Path
import duckdb import duckdb
import polars as pl import polars as pl
import streamlit as st import streamlit as st
project_root = str(Path(__file__).parent.parent) from common.utils.config_loader import settings
if project_root not in sys.path:
sys.path.append(project_root)
from utils.config_loader import settings
st.set_page_config(page_title="Strompreis & Netz Dashboard", layout="wide") st.set_page_config(page_title="Strompreis & Netz Dashboard", layout="wide")

View file

@ -0,0 +1,15 @@
[project]
name = "dashboard"
version = "0.1.0"
description = "Streamlit dashboard for electricity price data"
dependencies = [
"streamlit>=1.54.0",
"common",
]
[tool.uv.sources]
common = { workspace = true }
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

View file

@ -2,17 +2,9 @@
REST API for accessing processed electricity price and network data. REST API for accessing processed electricity price and network data.
""" """
import sys
from pathlib import Path
from fastapi import FastAPI, HTTPException from fastapi import FastAPI, HTTPException
from common.utils import database as db
# Add project root to sys.path from common.utils.config_loader import settings
project_root = str(Path(__file__).parent.parent)
if project_root not in sys.path:
sys.path.append(project_root)
from utils import database as db
from utils.config_loader import settings
app = FastAPI( app = FastAPI(
title="Strompreis API", title="Strompreis API",

View file

@ -7,10 +7,10 @@ import sys
import logging import logging
import click import click
import polars as pl import polars as pl
from collectors import smard, weather from common.collectors import smard, weather
from transformators import transformator from common.transformators import transformator
from utils import database as db from common.utils import database as db
from utils.config_loader import settings from common.utils.config_loader import settings
# Structured logging configuration # Structured logging configuration
logging.basicConfig( logging.basicConfig(

View file

@ -0,0 +1,20 @@
[project]
name = "pipeline_api"
version = "0.1.0"
description = "ETL Pipeline and FastAPI for electricity price data"
dependencies = [
"click>=8.3.1",
"fastapi>=0.128.7",
"pyarrow>=23.0.0",
"requests>=2.32.5",
"tenacity>=9.1.4",
"uvicorn>=0.40.0",
"common",
]
[tool.uv.sources]
common = { workspace = true }
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

View file

@ -1,36 +1,35 @@
services: services:
pipeline: pipeline:
build: . build:
context: .
dockerfile: docker/pipeline-api.Dockerfile
user: "1000:1000" user: "1000:1000"
volumes: volumes:
- ./output:/app/output:z - ./output:/app/output:z
- ./config:/app/config:z
command: ["/bin/bash", "scripts/scheduler.sh"] command: ["/bin/bash", "scripts/scheduler.sh"]
environment: environment:
- INTERVAL=3600 - INTERVAL=3600
restart: unless-stopped restart: unless-stopped
api: api:
build: . build:
context: .
dockerfile: docker/pipeline-api.Dockerfile
user: "1000:1000" user: "1000:1000"
ports: ports:
- "8000:8000" - "8000:8000"
volumes: volumes:
- ./output:/app/output:z - ./output:/app/output:z
- ./config:/app/config:z command: ["/uvbin/uv", "run", "--frozen", "--no-sync", "--package", "pipeline_api", "python", "-m", "uvicorn", "pipeline_api.api.main:app", "--host", "0.0.0.0", "--port", "8000"]
command: ["python", "-m", "uvicorn", "api.main:app", "--host", "0.0.0.0", "--port", "8000"]
restart: unless-stopped restart: unless-stopped
dashboard: dashboard:
build: . build:
context: .
dockerfile: docker/dashboard.Dockerfile
user: "1000:1000" user: "1000:1000"
ports: ports:
- "8501:8501" - "8501:8501"
volumes: volumes:
- ./output:/app/output:z - ./output:/app/output:z
- ./config:/app/config:z
environment:
- STREAMLIT_BROWSER_GATHER_USAGE_STATS=false
- STREAMLIT_USAGE_STATS_ENABLED=false
command: ["streamlit", "run", "dashboard/app.py", "--server.port", "8501", "--server.address", "0.0.0.0"]
restart: unless-stopped restart: unless-stopped

View file

@ -0,0 +1,42 @@
# Stage 1: Builder
FROM python:3.11-slim-bookworm AS builder
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvbin/uv
WORKDIR /app
ENV UV_CACHE_DIR=/app/.cache/uv
COPY pyproject.toml uv.lock ./
COPY packages/common ./packages/common
COPY apps/dashboard ./apps/dashboard
# Create a VALID dummy for the other workspace member for validation
RUN mkdir -p apps/pipeline-api/pipeline_api && \
echo '[project]\nname = "pipeline_api"\nversion = "0.1.0"\n[build-system]\nrequires = ["hatchling"]\nbuild-backend = "hatchling.build"' > apps/pipeline-api/pyproject.toml
# Install dependencies into the virtualenv
RUN /uvbin/uv sync --frozen --no-dev --package dashboard
# Stage 2: Final
FROM python:3.11-slim-bookworm
WORKDIR /app
# Copy project configuration for uv run
COPY pyproject.toml uv.lock ./
# Copy only necessary parts from builder
COPY --from=builder /app/.venv /app/.venv
COPY --from=builder /app/packages/common /app/packages/common
COPY --from=builder /app/apps/dashboard /app/apps/dashboard
COPY --from=builder /uvbin/uv /usr/local/bin/uv
# Create output directory with proper permissions
RUN mkdir -p output && chmod -R 777 output
EXPOSE 8501
ENV STREAMLIT_BROWSER_GATHER_USAGE_STATS=false
ENV STREAMLIT_USAGE_STATS_ENABLED=false
ENV PATH="/app/.venv/bin:$PATH"
# --no-sync is required as we don't have the uv cache in the final image
CMD ["uv", "run", "--frozen", "--no-sync", "--package", "dashboard", "streamlit", "run", "apps/dashboard/dashboard/app.py", "--server.port", "8501", "--server.address", "0.0.0.0"]

View file

@ -0,0 +1,41 @@
# Stage 1: Builder
FROM python:3.11-slim-bookworm AS builder
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvbin/uv
WORKDIR /app
ENV UV_CACHE_DIR=/app/.cache/uv
COPY pyproject.toml uv.lock ./
COPY packages/common ./packages/common
COPY apps/pipeline-api ./apps/pipeline-api
# Create a VALID dummy for the other workspace member for validation
RUN mkdir -p apps/dashboard/dashboard && \
echo '[project]\nname = "dashboard"\nversion = "0.1.0"\n[build-system]\nrequires = ["hatchling"]\nbuild-backend = "hatchling.build"' > apps/dashboard/pyproject.toml
# Install dependencies into the virtualenv
RUN /uvbin/uv sync --frozen --no-dev --package pipeline_api
# Stage 2: Final
FROM python:3.11-slim-bookworm
WORKDIR /app
# Copy project configuration for uv run
COPY pyproject.toml uv.lock ./
# Copy only necessary parts from builder
COPY --from=builder /app/.venv /app/.venv
COPY --from=builder /app/packages/common /app/packages/common
COPY --from=builder /app/apps/pipeline-api /app/apps/pipeline-api
COPY --from=builder /uvbin/uv /usr/local/bin/uv
COPY scripts ./scripts
# Create output directory with proper permissions
RUN mkdir -p output && chmod -R 777 output
ENV PYTHONPATH=/app/apps/pipeline-api
ENV INTERVAL=3600
ENV PATH="/app/.venv/bin:$PATH"
# --no-sync is required as we don't have the uv cache in the final image
CMD ["uv", "run", "--frozen", "--no-sync", "--package", "pipeline_api", "python", "-m", "pipeline_api.main", "run"]

View file

View file

@ -5,8 +5,8 @@ Collector for SMARD (Electricity Market Data) API.
import time import time
import logging import logging
import polars as pl import polars as pl
from utils import request_utils from ..utils import request_utils
from utils.config_loader import settings from ..utils.config_loader import settings
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)

View file

@ -6,9 +6,8 @@ import logging
from datetime import datetime, timedelta, timezone from datetime import datetime, timedelta, timezone
import polars as pl import polars as pl
from ..utils import request_utils
from utils import request_utils from ..utils.config_loader import settings
from utils.config_loader import settings
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)

View file

@ -39,8 +39,15 @@ class Settings(BaseSettings):
database: DatabaseConfig = DatabaseConfig() database: DatabaseConfig = DatabaseConfig()
def load_config(config_path: str = "config/config.yaml") -> Settings: def load_config(config_path: str | None = None) -> Settings:
path = Path(config_path) if config_path:
path = Path(config_path)
else:
# Try local first (dev) then package relative
local_path = Path("config/config.yaml")
pkg_path = Path(__file__).parent.parent / "config" / "config.yaml"
path = local_path if local_path.exists() else pkg_path
if not path.exists(): if not path.exists():
return Settings() return Settings()

View file

@ -5,7 +5,7 @@ DuckDB database interface for Bronze (Raw) and Gold (Combined) layers.
import duckdb import duckdb
import polars as pl import polars as pl
from contextlib import contextmanager from contextlib import contextmanager
from utils.config_loader import settings from .config_loader import settings
@contextmanager @contextmanager

View file

@ -0,0 +1,15 @@
[project]
name = "common"
version = "0.1.0"
description = "Shared logic and utilities for the Strompreis Pipeline"
dependencies = [
"duckdb>=1.4.4",
"polars>=1.38.1",
"pydantic>=2.12.5",
"pydantic-settings>=2.12.0",
"pyyaml>=6.0.3",
]
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

View file

@ -1,23 +1,12 @@
[project] [project]
name = "strompreis-pipline" name = "strompreis-pipline"
version = "0.1.0" version = "0.1.0"
description = "Add your description here" description = "Strompreis Pipeline Workspace"
readme = "README.md" readme = "README.md"
requires-python = ">=3.11" requires-python = ">=3.11"
dependencies = [
"click>=8.3.1", [tool.uv.workspace]
"duckdb>=1.4.4", members = ["packages/*", "apps/*"]
"fastapi>=0.128.7",
"polars>=1.38.1",
"pyarrow>=23.0.0",
"pydantic>=2.12.5",
"pydantic-settings>=2.12.0",
"pyyaml>=6.0.3",
"requests>=2.32.5",
"streamlit>=1.54.0",
"tenacity>=9.1.4",
"uvicorn>=0.40.0",
]
[tool.ruff] [tool.ruff]
line-length = 88 line-length = 88

View file

@ -2,7 +2,7 @@
echo "Starte Pipeline Scheduler (Intervall: $INTERVAL Sekunden)" echo "Starte Pipeline Scheduler (Intervall: $INTERVAL Sekunden)"
while true; do while true; do
echo "Führe Pipeline aus: $(date)" echo "Führe Pipeline aus: $(date)"
python main.py run /app/.venv/bin/python -m pipeline_api.main run
echo "Pipeline beendet. Warte $INTERVAL Sekunden..." echo "Pipeline beendet. Warte $INTERVAL Sekunden..."
sleep ${INTERVAL:-3600} sleep ${INTERVAL:-3600}
done done

97
uv.lock generated
View file

@ -6,6 +6,14 @@ resolution-markers = [
"python_full_version < '3.12'", "python_full_version < '3.12'",
] ]
[manifest]
members = [
"common",
"dashboard",
"pipeline-api",
"strompreis-pipline",
]
[[package]] [[package]]
name = "altair" name = "altair"
version = "6.0.0" version = "6.0.0"
@ -183,6 +191,42 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
] ]
[[package]]
name = "common"
version = "0.1.0"
source = { editable = "packages/common" }
dependencies = [
{ name = "duckdb" },
{ name = "polars" },
{ name = "pydantic" },
{ name = "pydantic-settings" },
{ name = "pyyaml" },
]
[package.metadata]
requires-dist = [
{ name = "duckdb", specifier = ">=1.4.4" },
{ name = "polars", specifier = ">=1.38.1" },
{ name = "pydantic", specifier = ">=2.12.5" },
{ name = "pydantic-settings", specifier = ">=2.12.0" },
{ name = "pyyaml", specifier = ">=6.0.3" },
]
[[package]]
name = "dashboard"
version = "0.1.0"
source = { editable = "apps/dashboard" }
dependencies = [
{ name = "common" },
{ name = "streamlit" },
]
[package.metadata]
requires-dist = [
{ name = "common", editable = "packages/common" },
{ name = "streamlit", specifier = ">=1.54.0" },
]
[[package]] [[package]]
name = "duckdb" name = "duckdb"
version = "1.4.4" version = "1.4.4"
@ -665,6 +709,31 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/f2/26/c56ce33ca856e358d27fda9676c055395abddb82c35ac0f593877ed4562e/pillow-12.1.1-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:cb9bb857b2d057c6dfc72ac5f3b44836924ba15721882ef103cecb40d002d80e", size = 7029880, upload-time = "2026-02-11T04:23:04.783Z" }, { url = "https://files.pythonhosted.org/packages/f2/26/c56ce33ca856e358d27fda9676c055395abddb82c35ac0f593877ed4562e/pillow-12.1.1-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:cb9bb857b2d057c6dfc72ac5f3b44836924ba15721882ef103cecb40d002d80e", size = 7029880, upload-time = "2026-02-11T04:23:04.783Z" },
] ]
[[package]]
name = "pipeline-api"
version = "0.1.0"
source = { editable = "apps/pipeline-api" }
dependencies = [
{ name = "click" },
{ name = "common" },
{ name = "fastapi" },
{ name = "pyarrow" },
{ name = "requests" },
{ name = "tenacity" },
{ name = "uvicorn" },
]
[package.metadata]
requires-dist = [
{ name = "click", specifier = ">=8.3.1" },
{ name = "common", editable = "packages/common" },
{ name = "fastapi", specifier = ">=0.128.7" },
{ name = "pyarrow", specifier = ">=23.0.0" },
{ name = "requests", specifier = ">=2.32.5" },
{ name = "tenacity", specifier = ">=9.1.4" },
{ name = "uvicorn", specifier = ">=0.40.0" },
]
[[package]] [[package]]
name = "pluggy" name = "pluggy"
version = "1.6.0" version = "1.6.0"
@ -1266,20 +1335,6 @@ wheels = [
name = "strompreis-pipline" name = "strompreis-pipline"
version = "0.1.0" version = "0.1.0"
source = { virtual = "." } source = { virtual = "." }
dependencies = [
{ name = "click" },
{ name = "duckdb" },
{ name = "fastapi" },
{ name = "polars" },
{ name = "pyarrow" },
{ name = "pydantic" },
{ name = "pydantic-settings" },
{ name = "pyyaml" },
{ name = "requests" },
{ name = "streamlit" },
{ name = "tenacity" },
{ name = "uvicorn" },
]
[package.dev-dependencies] [package.dev-dependencies]
dev = [ dev = [
@ -1291,20 +1346,6 @@ dev = [
] ]
[package.metadata] [package.metadata]
requires-dist = [
{ name = "click", specifier = ">=8.3.1" },
{ name = "duckdb", specifier = ">=1.4.4" },
{ name = "fastapi", specifier = ">=0.128.7" },
{ name = "polars", specifier = ">=1.38.1" },
{ name = "pyarrow", specifier = ">=23.0.0" },
{ name = "pydantic", specifier = ">=2.12.5" },
{ name = "pydantic-settings", specifier = ">=2.12.0" },
{ name = "pyyaml", specifier = ">=6.0.3" },
{ name = "requests", specifier = ">=2.32.5" },
{ name = "streamlit", specifier = ">=1.54.0" },
{ name = "tenacity", specifier = ">=9.1.4" },
{ name = "uvicorn", specifier = ">=0.40.0" },
]
[package.metadata.requires-dev] [package.metadata.requires-dev]
dev = [ dev = [