commit for version used in evaluation of thesis

This commit is contained in:
Patryk Hegenberg 2026-03-29 10:03:18 +02:00
commit 72635dc7b9
27 changed files with 6084 additions and 0 deletions

1091
internal/transform/engine.go Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,106 @@
package transform
import (
"context"
"testing"
"time"
"codeberg.org/pata1704/guenther/internal/config"
"codeberg.org/pata1704/guenther/pkg/types"
"github.com/stretchr/testify/assert"
)
func TestTransformEngine_Fusion(t *testing.T) {
logChan := make(chan types.LogEvent, 100)
metricChan := make(chan types.MetricSnapshot, 100)
serviceStatusChan := make(chan types.ServiceStatus, 100)
featureChan := make(chan types.FeatureVector, 100)
healthChan := make(chan types.StageHealth, 10)
cfg := &config.Config{}
cfg.Transformation.WindowSize = 1 * time.Second
cfg.Transformation.DbPath = ":memory:"
engine, err := NewTransformEngine(cfg, logChan, metricChan, serviceStatusChan, featureChan, healthChan)
assert.NoError(t, err)
baseTime := time.Date(2026, 1, 1, 12, 0, 0, 0, time.Local)
// 1. Send data for first window
metricChan <- types.MetricSnapshot{
Timestamp: baseTime,
CPUPercent: 50.0,
MemoryUsedMB: 1000,
MemoryDirtyMB: 100,
NetworkInMBps: 10.0,
NetworkOutMBps: 20.0,
TCPRetransPerS: 5,
NetPacketsInPerS: 100,
NetPacketsOutPerS: 200,
}
// 2. Start engine and wait for first window
ctx, cancel := context.WithCancel(context.Background())
engine.Start(ctx)
defer func() {
cancel()
engine.Wait()
}()
select {
case fv := <-featureChan:
assert.Equal(t, 50.0, fv.AvgCPUPercent)
// Deltas are absolute value on first window because tracker starts at 0
assert.Equal(t, 10.0, fv.DeltaNetIn)
case <-time.After(2 * time.Second):
t.Fatal("Timeout waiting for first FeatureVector")
}
// 3. Send data for second window (triggers deltas)
secondTime := baseTime.Add(cfg.Transformation.WindowSize)
metricChan <- types.MetricSnapshot{
Timestamp: secondTime,
CPUPercent: 60.0,
MemoryUsedMB: 1000,
MemoryDirtyMB: 200,
NetworkInMBps: 15.0, // DeltaNetIn = 15.0 - 10.0 = 5.0
NetworkOutMBps: 20.0,
TCPRetransPerS: 10, // DeltaTCPRetrans = 10.0 - 5.0 = 5.0
NetPacketsInPerS: 150,
NetPacketsOutPerS: 200,
}
select {
case fv := <-featureChan:
// Check original logic
assert.Equal(t, 60.0, fv.AvgCPUPercent)
// Check new delta features
assert.Equal(t, 5.0, fv.DeltaNetIn)
assert.Equal(t, 5.0, fv.DeltaTCPRetrans)
// Check ratio features
// MemPressure = dirty / (used + 1) = 200/1001
expectedPressure := 200.0 / 1001.0
assert.InDelta(t, expectedPressure, fv.MemPressure, 1e-9)
// NetAsymmetry = in / (out + 1e-3) = 15/20.001
expectedAsym := 15.0 / 20.001
assert.InDelta(t, expectedAsym, fv.NetAsymmetry, 1e-9)
// Check NormalizedVector length (should be 45 base + params)
assert.GreaterOrEqual(t, len(fv.NormalizedVector), 45)
// Verify slots 39-44 (Engineered Features tail)
nv := fv.NormalizedVector
assert.Equal(t, 5.0, nv[39]) // DeltaNetIn
assert.Equal(t, 5.0, nv[40]) // DeltaTCPRetrans
// TcpRollStd and NetRollStd will have values (even if just 2 pts)
assert.Greater(t, nv[41], 0.0) // TcpRollStd (10 and 5)
assert.Equal(t, 0.0, nv[42]) // NetRollStd (20 and 20 -> std=0)
assert.InDelta(t, expectedPressure, nv[43], 1e-9) // MemPressure
assert.InDelta(t, expectedAsym, nv[44], 1e-9) // NetAsymmetry
case <-time.After(2 * time.Second):
t.Fatal("Timeout waiting for second FeatureVector")
}
}

View file

@ -0,0 +1,230 @@
// Package transform contains the DuckDB-backed Tumbling Window Engine.
package transform
import (
"fmt"
"strings"
"codeberg.org/pata1704/guenther/internal/config"
)
// they are derived from already-scaled inputs or are ratio/delta features).
var scalerFeatureNames = []string{
// CPU (3)
"avg_cpu", "max_cpu", "std_cpu",
// System/Kernel (7)
"avg_iowait", "std_iowait", "avg_softirq", "avg_ctx_switches", "avg_interrupts", "avg_softnet_dropped", "avg_softnet_squeeze",
// Network (8)
"avg_net_in", "std_net_in", "avg_net_out", "std_net_out", "sum_tcp_retrans", "sum_tcp_fast_retrans", "sum_tcp_timeouts", "avg_net_drops",
// Disk (4)
"avg_disk_read", "avg_disk_write", "avg_disk_io_ticks", "std_disk_io_ticks",
// Log (2)
"error_count", "severity_score",
}
// ScalerFeatureNames returns the ordered list of feature names stored in
// scaler_params.
func ScalerFeatureNames() []string { return scalerFeatureNames }
func BuildScalerParamsTable() string {
return `CREATE TABLE IF NOT EXISTS scaler_params (
feature_name VARCHAR PRIMARY KEY,
mean DOUBLE NOT NULL,
std DOUBLE NOT NULL
)`
}
func BuildFitScalerQuery() string {
return `
INSERT OR REPLACE INTO scaler_params (feature_name, mean, std)
WITH stats AS (
SELECT
-- CPU
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY cpu_percent) AS m_avg_cpu,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY cpu_percent) AS m_max_cpu, -- Approximation
0.0 AS m_std_cpu, -- Baseline std is often 0 or low
-- System
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY cpu_iowait_percent) AS m_avg_iowait,
0.0 AS m_std_iowait,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY cpu_softirq_percent) AS m_avg_softirq,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY context_switches_s) AS m_avg_ctx_switches,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY interrupts_s) AS m_avg_interrupts,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY softnet_dropped_s) AS m_avg_softnet_dropped,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY softnet_time_squeeze_s) AS m_avg_softnet_squeeze,
-- Network
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY net_in_mbps) AS m_avg_net_in,
0.0 AS m_std_net_in,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY net_out_mbps) AS m_avg_net_out,
0.0 AS m_std_net_out,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY tcp_retrans_s) AS m_sum_tcp_retrans,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY tcp_fast_retrans_s) AS m_sum_tcp_fast_retrans,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY tcp_timeouts_s) AS m_sum_tcp_timeouts,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY network_drops_s) AS m_avg_net_drops,
-- Disk
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY disk_read_mbps) AS m_avg_disk_read,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY disk_write_mbps) AS m_avg_disk_write,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY disk_io_ticks_s) AS m_avg_disk_io_ticks,
0.0 AS m_std_disk_io_ticks,
-- IQRs for scaling
(PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY cpu_percent) - PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY cpu_percent)) AS s_avg_cpu,
(PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY cpu_iowait_percent) - PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY cpu_iowait_percent)) AS s_avg_iowait,
(PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY net_in_mbps) - PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY net_in_mbps)) AS s_avg_net_in,
(PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY net_out_mbps) - PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY net_out_mbps)) AS s_avg_net_out,
(PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY disk_io_ticks_s) - PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY disk_io_ticks_s)) AS s_avg_disk_io_ticks
FROM raw_metrics
WHERE timestamp >= $1 AND timestamp < $2
)
SELECT feature_name, mean, std FROM (
SELECT 'avg_cpu' AS feature_name, s.m_avg_cpu AS mean, GREATEST(s.s_avg_cpu, 1e-9) AS std FROM stats s UNION ALL
SELECT 'max_cpu', s.m_max_cpu, GREATEST(s.s_avg_cpu, 1e-9) FROM stats s UNION ALL
SELECT 'std_cpu', 0.0, 1.0 FROM stats s UNION ALL
SELECT 'avg_iowait', s.m_avg_iowait, GREATEST(s.s_avg_iowait, 1e-9) FROM stats s UNION ALL
SELECT 'std_iowait', 0.0, 1.0 FROM stats s UNION ALL
SELECT 'avg_softirq', s.m_avg_softirq, 1.0 FROM stats s UNION ALL
SELECT 'avg_ctx_switches', s.m_avg_ctx_switches, 1.0 FROM stats s UNION ALL
SELECT 'avg_interrupts', s.m_avg_interrupts, 1.0 FROM stats s UNION ALL
SELECT 'avg_softnet_dropped', s.m_avg_softnet_dropped, 1.0 FROM stats s UNION ALL
SELECT 'avg_softnet_squeeze', s.m_avg_softnet_squeeze, 1.0 FROM stats s UNION ALL
SELECT 'avg_net_in', s.m_avg_net_in, GREATEST(s.s_avg_net_in, 1e-9) FROM stats s UNION ALL
SELECT 'std_net_in', 0.0, 1.0 FROM stats s UNION ALL
SELECT 'avg_net_out', s.m_avg_net_out, GREATEST(s.s_avg_net_out, 1e-9) FROM stats s UNION ALL
SELECT 'std_net_out', 0.0, 1.0 FROM stats s UNION ALL
SELECT 'sum_tcp_retrans', s.m_sum_tcp_retrans, 1.0 FROM stats s UNION ALL
SELECT 'sum_tcp_fast_retrans', s.m_sum_tcp_fast_retrans, 1.0 FROM stats s UNION ALL
SELECT 'sum_tcp_timeouts', s.m_sum_tcp_timeouts, 1.0 FROM stats s UNION ALL
SELECT 'avg_net_drops', s.m_avg_net_drops, 1.0 FROM stats s UNION ALL
SELECT 'avg_disk_read', s.m_avg_disk_read, 1.0 FROM stats s UNION ALL
SELECT 'avg_disk_write', s.m_avg_disk_write, 1.0 FROM stats s UNION ALL
SELECT 'avg_disk_io_ticks', s.m_avg_disk_io_ticks, GREATEST(s.s_avg_disk_io_ticks, 1e-9) FROM stats s UNION ALL
SELECT 'std_disk_io_ticks', 0.0, 1.0 FROM stats s UNION ALL
SELECT 'error_count', 0.0, 1.0 UNION ALL
SELECT 'severity_score', 0.0, 1.0
) t`
}
func BuildFusionQuery(maskingPatterns []config.MaskingPattern, systemctlServices []string, windowInterval string) string {
numericCols := collectNumericCols(maskingPatterns)
paramCTE := ""
paramSelect := ""
paramJoin := ""
if len(numericCols) > 0 {
var aggs []string
for _, col := range numericCols {
aggs = append(aggs, fmt.Sprintf("AVG(%s) AS avg_%s", col, col))
paramSelect += fmt.Sprintf(", COALESCE(p.avg_%s, 0.0) AS avg_%s", col, col)
}
paramCTE = fmt.Sprintf(`, param_agg AS (SELECT time_bucket(INTERVAL '%s', event_time) AS ws, %s FROM log_params GROUP BY 1)`, windowInterval, strings.Join(aggs, ", "))
paramJoin = "LEFT JOIN param_agg p ON m.ws = p.ws"
}
svcCTE := ""
svcSelect := ""
svcJoin := ""
if len(systemctlServices) > 0 {
var svcAggs []string
for _, svc := range systemctlServices {
safeName := strings.ReplaceAll(strings.ReplaceAll(svc, ".", "_"), "-", "_")
svcAggs = append(svcAggs, fmt.Sprintf(`MODE(CASE WHEN active_state = 'active' THEN 1 WHEN active_state = 'failed' THEN -1 ELSE 0 END) AS state_%s`, safeName))
svcSelect += fmt.Sprintf(", COALESCE(s.state_%s, 0) AS svc_%s", safeName, safeName)
}
svcCTE = fmt.Sprintf(`, svc_agg AS (SELECT time_bucket(INTERVAL '%s', timestamp) AS ws, %s FROM service_status GROUP BY 1)`, windowInterval, strings.Join(svcAggs, ", "))
svcJoin = "LEFT JOIN svc_agg s ON m.ws = s.ws"
}
var scFields []string
for _, name := range scalerFeatureNames {
scFields = append(scFields, fmt.Sprintf("COALESCE(MAX(CASE WHEN feature_name='%s' THEN mean END),0) AS m_%s, COALESCE(MAX(CASE WHEN feature_name='%s' THEN std END),1) AS s_%s", name, name, name, name))
}
var normVecFields []string
for _, name := range scalerFeatureNames {
// DuckDB aggregation aliases match these exactly (see metric_agg and log_agg below)
src := name
if name == "severity_score" || name == "error_count" {
src = "l." + name
} else {
src = "m." + name
}
normVecFields = append(normVecFields, fmt.Sprintf("(COALESCE(%s, 0.0) - sc.m_%s) / sc.s_%s AS sc_%s", src, name, name, name))
}
return fmt.Sprintf(`
WITH metric_agg AS (
SELECT
time_bucket(INTERVAL '%[1]s', timestamp) AS ws,
AVG(cpu_percent) AS avg_cpu, MAX(cpu_percent) AS max_cpu, STDDEV_SAMP(cpu_percent) AS std_cpu,
AVG(cpu_iowait_percent) AS avg_iowait, STDDEV_SAMP(cpu_iowait_percent) AS std_iowait,
AVG(cpu_softirq_percent) AS avg_softirq, AVG(context_switches_s) AS avg_ctx_switches,
AVG(interrupts_s) AS avg_interrupts, AVG(softnet_dropped_s) AS avg_softnet_dropped,
AVG(softnet_time_squeeze_s) AS avg_softnet_squeeze,
AVG(memory_used_mb) AS avg_mem_used, AVG(memory_cached_mb) AS avg_mem_cached, MAX(memory_dirty_mb) AS max_mem_dirty,
AVG(net_in_mbps) AS avg_net_in, STDDEV_SAMP(net_in_mbps) AS std_net_in,
AVG(net_out_mbps) AS avg_net_out, STDDEV_SAMP(net_out_mbps) AS std_net_out,
SUM(tcp_retrans_s) AS sum_tcp_retrans, SUM(tcp_fast_retrans_s) AS sum_tcp_fast_retrans,
SUM(tcp_timeouts_s) AS sum_tcp_timeouts, AVG(network_drops_s) AS avg_net_drops,
AVG(disk_read_mbps) AS avg_disk_read, AVG(disk_write_mbps) AS avg_disk_write,
AVG(disk_io_ticks_s) AS avg_disk_io_ticks, STDDEV_SAMP(disk_io_ticks_s) AS std_disk_io_ticks,
SUM(disk_read_time_s) AS sum_disk_read_time, SUM(disk_write_time_s) AS sum_disk_write_time,
SUM(disk_reads_s) AS sum_disk_reads, SUM(disk_writes_s) AS sum_disk_writes,
SUM(net_packets_in_s) AS sum_packets_in, SUM(net_packets_out_s) AS sum_packets_out
FROM raw_metrics GROUP BY 1
),
log_agg AS (
SELECT
time_bucket(INTERVAL '%[1]s', timestamp) AS ws,
COUNT(*) AS log_event_count, COUNT(DISTINCT template_id) AS unique_templates,
SUM(CASE WHEN severity = 'ERROR' THEN 1 ELSE 0 END) AS error_count,
SUM(CASE
WHEN severity = 'ERROR' THEN 10
WHEN severity = 'WARN' THEN 3
ELSE 1
END) AS severity_score
FROM log_events GROUP BY 1
)%[2]s%[3]s,
scaler AS (
SELECT %[4]s FROM scaler_params
)
SELECT m.ws,
m.*, l.log_event_count, l.unique_templates, l.error_count, l.severity_score%[5]s%[6]s,
%[7]s
FROM metric_agg m
LEFT JOIN log_agg l ON m.ws = l.ws
%[8]s %[9]s
CROSS JOIN scaler sc
ORDER BY m.ws DESC LIMIT 1`,
windowInterval, paramCTE, svcCTE, strings.Join(scFields, ", "), paramSelect, svcSelect, strings.Join(normVecFields, ", "), paramJoin, svcJoin)
}
func BuildLogParamsSchema(patterns []config.MaskingPattern) string {
cols := []string{"event_time TIMESTAMP WITH TIME ZONE"}
for _, mp := range patterns {
if mp.Name == "" {
continue
}
cols = append(cols, fmt.Sprintf("param_%s %s", mp.Name, sqlType(mp.Type)))
}
return fmt.Sprintf("CREATE TABLE IF NOT EXISTS log_params (\n\t%s\n)", strings.Join(cols, ",\n\t"))
}
func sqlType(t string) string {
switch t {
case "float":
return "DOUBLE"
case "int":
return "BIGINT"
default:
return "VARCHAR"
}
}
func collectNumericCols(patterns []config.MaskingPattern) []string {
var cols []string
for _, mp := range patterns {
if mp.Name == "" || mp.Type == "string" {
continue
}
cols = append(cols, "param_"+mp.Name)
}
return cols
}