commit for version used in evaluation of thesis

This commit is contained in:
Patryk Hegenberg 2026-03-29 10:03:18 +02:00
commit 72635dc7b9
27 changed files with 6084 additions and 0 deletions

302
pkg/types/types.go Normal file
View file

@ -0,0 +1,302 @@
// Package types defines the shared data structures that flow between pipeline
// stages. All types are value-safe to copy and JSON-serialisable.
package types
import "time"
// ── LogEvent ─────────────────────────────────────────────────────────────────
// LogEvent represents a single parsed log line after Drain3 template mining.
type LogEvent struct {
Timestamp time.Time `json:"timestamp"`
TemplateID int `json:"template_id"`
Params map[string]string `json:"params"`
Severity string `json:"severity"`
RawLine string `json:"raw_line"`
}
// ── ServiceStatus ─────────────────────────────────────────────────────────────
// ServiceStatus represents the state of a systemd service.
type ServiceStatus struct {
Timestamp time.Time `json:"timestamp"`
ServiceName string `json:"service_name"`
ActiveState string `json:"active_state"` // e.g. "active", "inactive", "failed"
SubState string `json:"sub_state"` // e.g. "running", "dead", "exited"
}
// ── MetricSnapshot ────────────────────────────────────────────────────────────
// MetricSnapshot is a 1 Hz sample of Linux system metrics collected from /proc.
type MetricSnapshot struct {
Timestamp time.Time `json:"timestamp"`
CPUPercent float64 `json:"cpu_percent"`
CPUIoWaitPercent float64 `json:"cpu_iowait_percent"`
CPUSoftIrqPercent float64 `json:"cpu_softirq_percent"`
ContextSwitchesPerS float64 `json:"context_switches_s"`
InterruptsPerS float64 `json:"interrupts_s"`
MemoryUsedMB float64 `json:"memory_used_mb"`
MemoryCachedMB float64 `json:"memory_cached_mb"`
MemoryDirtyMB float64 `json:"memory_dirty_mb"`
NetworkInMBps float64 `json:"net_in_mbps"`
NetworkOutMBps float64 `json:"net_out_mbps"`
NetErrorsPerS float64 `json:"network_errors_s"`
NetDropsPerS float64 `json:"network_drops_s"`
TCPRetransPerS float64 `json:"tcp_retrans_s"`
TCPTimeoutsPerS float64 `json:"tcp_timeouts_s"`
TCPLostRetransmitPerS float64 `json:"tcp_lost_retransmit_s"`
TCPFastRetransPerS float64 `json:"tcp_fast_retrans_s"`
DiskReadMBps float64 `json:"disk_read_mbps"`
DiskWriteMBps float64 `json:"disk_write_mbps"`
DiskReadTimeMsPerS float64 `json:"disk_read_time_s"`
DiskWriteTimeMsPerS float64 `json:"disk_write_time_s"`
DiskIOTicksPerS float64 `json:"disk_io_ticks_s"`
SoftnetDroppedPerS float64 `json:"softnet_dropped_s"`
SoftnetTimeSqueezePerS float64 `json:"softnet_time_squeeze_s"`
NetPacketsInPerS float64 `json:"net_packets_in_s"`
NetPacketsOutPerS float64 `json:"net_packets_out_s"`
DiskReadsCompletedPerS float64 `json:"disk_reads_s"`
DiskWritesCompletedPerS float64 `json:"disk_writes_s"`
}
// ── FeatureVector ─────────────────────────────────────────────────────────────
// FeatureVector is the output of the DuckDB Tumbling-Window fusion layer.
//
// # NormalizedVector layout
//
// Slot 0 4: CPU (DuckDB RobustScaled)
// 0=avg_cpu 1=max_cpu 2=avg_iowait 3=avg_softirq 4=avg_ctx_switches
// Slot 5 7: Memory (DuckDB RobustScaled)
// 5=avg_mem_used 6=avg_mem_cached 7=max_mem_dirty
// Slot 8: Disk (DuckDB RobustScaled)
// 8=avg_disk_io_ticks
// Slot 912: Network (DuckDB RobustScaled)
// 9=avg_net_in 10=avg_net_out 11=avg_net_drops 12=avg_softnet_squeeze
// Slot 1316: TCP (DuckDB RobustScaled)
// 13=max_tcp_retrans 14=sum_tcp_fast_retrans
// 15=sum_tcp_timeouts 16=sum_tcp_lost_retrans
// Slot 1720: Log (DuckDB RobustScaled)
// 17=log_event_count 18=error_count 19=unique_templates 20=error_rate
// Slot 21: CPUDelta Δavg_cpu vs previous window, %-points (unscaled)
// Slot 22: RatioTCPNet sum_tcp_retrans / (avg_net_out + 1e-3), CV=10 (NEW)
// Slot 23: DeltaCtx Δavg_ctx_switches vs previous window, CV=6.2 (NEW)
// Slot 24: NetDelta Δavg_net_out vs previous window, MBps (unscaled)
// Slot 25: CPURollStd rolling σ(avg_cpu, 12 windows) (unscaled)
// Slot 26: CPUEfficiency avg_cpu / (avg_net_out + 1) (unscaled)
// Slot 27: IOWaitProxy avg_disk_io_ticks / (avg_cpu + 1) (unscaled)
// Slot 28: LogDensity unique_templates / (log_count + 1) (unscaled)
// Slot 29: DeltaNetIn Δavg_net_in vs previous window, MBps (unscaled)
// Slot 30: DeltaTCPRetrans Δsum_tcp_retrans vs previous window (unscaled)
// Slot 31: TcpRollStd rolling σ(sum_tcp_retrans, 5 windows) (unscaled)
// Slot 32: NetRollStd rolling σ(avg_net_out, 5 windows) (unscaled)
// Slot 33: MemPressure avg_dirty_mb / (avg_mem_used + 1) (unscaled)
// Slot 34: NetAsymmetry avg_net_in / (avg_net_out + 1e-3) (unscaled)
// Slot 35+: Drain param averages (unscaled)
type FeatureVector struct {
Timestamp time.Time `json:"timestamp"`
WindowStart time.Time `json:"window_start"`
WindowEnd time.Time `json:"window_end"`
// CPU aggregations
AvgCPUPercent float64 `json:"avg_cpu"`
MaxCPUPercent float64 `json:"max_cpu"`
StdCPUPercent float64 `json:"std_cpu"`
AvgCPUIoWait float64 `json:"avg_iowait"`
StdCPUIoWait float64 `json:"std_iowait"`
AvgCPUSoftIrq float64 `json:"avg_softirq"`
AvgCtxSwitches float64 `json:"avg_ctx_switches"`
AvgInterrupts float64 `json:"avg_interrupts"`
// Memory aggregations
AvgMemUsedMB float64 `json:"avg_mem_used"`
AvgMemCachedMB float64 `json:"avg_mem_cached"`
MaxMemDirtyMB float64 `json:"max_mem_dirty"`
// Disk aggregations
AvgDiskIOTicks float64 `json:"avg_disk_io_ticks"`
StdDiskIOTicks float64 `json:"std_disk_io_ticks"`
AvgDiskReadMBps float64 `json:"avg_disk_read"`
AvgDiskWriteMBps float64 `json:"avg_disk_write"`
// Network aggregations
AvgNetInMBps float64 `json:"avg_net_in"`
StdNetInMBps float64 `json:"std_net_in"`
AvgNetOutMBps float64 `json:"avg_net_out"`
StdNetOutMBps float64 `json:"std_net_out"`
AvgNetDrops float64 `json:"avg_net_drops"`
AvgSoftnetDropped float64 `json:"avg_softnet_dropped"`
AvgSoftnetSqueeze float64 `json:"avg_softnet_squeeze"`
// TCP aggregations
SumTCPRetrans float64 `json:"sum_tcp_retrans"`
SumTCPFastRetrans float64 `json:"sum_tcp_fast_retrans"`
SumTCPTimeouts float64 `json:"sum_tcp_timeouts"`
// Log aggregations
ErrorCount int `json:"error_count"`
SeverityScore float64 `json:"severity_score"`
// Engineered / Derived features
CPUDelta float64 `json:"cpu_delta"`
CPURollStd float64 `json:"cpu_roll_std"`
CPUEfficiency float64 `json:"cpu_efficiency"`
DeltaCtx float64 `json:"delta_ctx"`
NetDelta float64 `json:"net_delta"`
AvgNetThroughput float64 `json:"avg_net_throughput"`
CPUPerMB float64 `json:"cpu_per_mb"`
NetworkDiskRatio float64 `json:"network_disk_ratio"`
RetransPerPacket float64 `json:"retrans_per_packet"`
RetransPerMB float64 `json:"retrans_per_mb"`
AvgDiskLatencyMS float64 `json:"avg_disk_latency_ms"`
LogCountTotal int `json:"log_count_total"`
UniqueTemplates int `json:"unique_templates"`
LogDensity float64 `json:"log_density"`
IOWaitProxy float64 `json:"io_wait_proxy"`
DeltaNetIn float64 `json:"delta_net_in"`
DeltaTCPRetrans float64 `json:"delta_tcp_retrans"`
TcpRollStd float64 `json:"tcp_roll_std"`
NetRollStd float64 `json:"net_roll_std"`
MemPressure float64 `json:"mem_pressure"`
NetAsymmetry float64 `json:"net_asymmetry"`
// Drain parameter aggregations
ParamAvg map[string]float64 `json:"param_avg"`
// ServiceStatuses maps service names to their encoded state (active=1, inactive=0, failed=-1).
ServiceStatuses map[string]float64 `json:"service_statuses"`
// NormalizedVector is the flat float64 slice consumed by anomaly detectors.
NormalizedVector []float64 `json:"normalized_vector"`
}
// ToFloatSlice serialises fv to a deterministic []float64 for offline EDA.
// Returns raw (unscaled) values; use NormalizedVector for ML inference.
//
// [avg_cpu, max_cpu, std_cpu,
// avg_iowait, std_iowait, avg_softirq, avg_ctx_switches, avg_interrupts,
// avg_softnet_dropped, avg_softnet_squeeze,
// avg_net_in, std_net_in, avg_net_out, std_net_out,
// sum_tcp_retrans, sum_tcp_fast_retrans, sum_tcp_timeouts, avg_net_drops,
// avg_disk_read, avg_disk_write, avg_disk_io_ticks, std_disk_io_ticks,
// error_count, severity_score,
// cpu_delta, cpu_roll_std, cpu_efficiency, delta_ctx, net_delta,
// avg_net_throughput, cpu_per_mb, network_disk_ratio, retrans_per_packet,
// retrans_per_mb, avg_disk_latency_ms, log_count_total, unique_templates,
// log_density, io_wait_proxy, delta_net_in, delta_tcp_retrans,
// tcp_roll_std, net_roll_std, mem_pressure, net_asymmetry,
// param_*]
func (fv FeatureVector) ToFloatSlice(paramNames []string) []float64 {
out := make([]float64, 0, 45+len(paramNames))
out = append(out,
// Base Aggregates (24)
fv.AvgCPUPercent, fv.MaxCPUPercent, fv.StdCPUPercent,
fv.AvgCPUIoWait, fv.StdCPUIoWait, fv.AvgCPUSoftIrq, fv.AvgCtxSwitches, fv.AvgInterrupts,
fv.AvgSoftnetDropped, fv.AvgSoftnetSqueeze,
fv.AvgNetInMBps, fv.StdNetInMBps, fv.AvgNetOutMBps, fv.StdNetOutMBps,
fv.SumTCPRetrans, fv.SumTCPFastRetrans, fv.SumTCPTimeouts, fv.AvgNetDrops,
fv.AvgDiskReadMBps, fv.AvgDiskWriteMBps, fv.AvgDiskIOTicks, fv.StdDiskIOTicks,
float64(fv.ErrorCount), fv.SeverityScore,
// Engineered Features (21)
fv.CPUDelta, fv.CPURollStd, fv.CPUEfficiency, fv.DeltaCtx, fv.NetDelta,
fv.AvgNetThroughput, fv.CPUPerMB, fv.NetworkDiskRatio, fv.RetransPerPacket,
fv.RetransPerMB, fv.AvgDiskLatencyMS, float64(fv.LogCountTotal),
float64(fv.UniqueTemplates), fv.LogDensity, fv.IOWaitProxy,
fv.DeltaNetIn, fv.DeltaTCPRetrans, fv.TcpRollStd, fv.NetRollStd,
fv.MemPressure, fv.NetAsymmetry,
)
for _, name := range paramNames {
out = append(out, fv.ParamAvg[name])
}
return out
}
// ToNamedMap returns the feature vector as map[string]float64
func (fv FeatureVector) ToNamedMap(paramNames []string) map[string]float64 {
m := map[string]float64{
"avg_cpu": fv.AvgCPUPercent,
"max_cpu": fv.MaxCPUPercent,
"std_cpu": fv.StdCPUPercent,
"avg_iowait": fv.AvgCPUIoWait,
"std_iowait": fv.StdCPUIoWait,
"avg_softirq": fv.AvgCPUSoftIrq,
"avg_ctx_switches": fv.AvgCtxSwitches,
"avg_interrupts": fv.AvgInterrupts,
"avg_softnet_dropped": fv.AvgSoftnetDropped,
"avg_softnet_squeeze": fv.AvgSoftnetSqueeze,
"avg_net_in": fv.AvgNetInMBps,
"std_net_in": fv.StdNetInMBps,
"avg_net_out": fv.AvgNetOutMBps,
"std_net_out": fv.StdNetOutMBps,
"avg_net_drops": fv.AvgNetDrops,
"sum_tcp_retrans": fv.SumTCPRetrans,
"sum_tcp_fast_retrans": fv.SumTCPFastRetrans,
"sum_tcp_timeouts": fv.SumTCPTimeouts,
"avg_disk_read": fv.AvgDiskReadMBps,
"avg_disk_write": fv.AvgDiskWriteMBps,
"avg_disk_io_ticks": fv.AvgDiskIOTicks,
"std_disk_io_ticks": fv.StdDiskIOTicks,
"error_count": float64(fv.ErrorCount),
"severity_score": fv.SeverityScore,
"cpu_delta": fv.CPUDelta,
"cpu_roll_std": fv.CPURollStd,
"cpu_efficiency": fv.CPUEfficiency,
"delta_ctx": fv.DeltaCtx,
"net_delta": fv.NetDelta,
"avg_net_throughput": fv.AvgNetThroughput,
"cpu_per_mb": fv.CPUPerMB,
"network_disk_ratio": fv.NetworkDiskRatio,
"retrans_per_packet": fv.RetransPerPacket,
"retrans_per_mb": fv.RetransPerMB,
"avg_disk_latency_ms": fv.AvgDiskLatencyMS,
"log_count_total": float64(fv.LogCountTotal),
"unique_templates": float64(fv.UniqueTemplates),
"log_density": fv.LogDensity,
"io_wait_proxy": fv.IOWaitProxy,
"delta_net_in": fv.DeltaNetIn,
"delta_tcp_retrans": fv.DeltaTCPRetrans,
"tcp_roll_std": fv.TcpRollStd,
"net_roll_std": fv.NetRollStd,
"mem_pressure": fv.MemPressure,
"net_asymmetry": fv.NetAsymmetry,
}
for _, name := range paramNames {
m["avg_param_"+name] = fv.ParamAvg[name]
}
return m
}
// ── AnomalyResult ─────────────────────────────────────────────────────────────
// AnomalyResult is the final output of the detection layer.
type AnomalyResult struct {
Timestamp time.Time `json:"timestamp"`
Score float64 `json:"score"`
IsAnomaly bool `json:"is_anomaly"`
Confidence float64 `json:"confidence"`
Method string `json:"method"`
Details string `json:"details,omitempty"`
}
// ── StageHealth ───────────────────────────────────────────────────────────────
// StageHealth stores per-stage monitoring counters.
type StageHealth struct {
StageName string `json:"stage_name"`
EventsProcessed uint64 `json:"events_processed"`
EventsDropped uint64 `json:"events_dropped"`
AvgLatencyMs float64 `json:"avg_latency_ms"`
Throughput float64 `json:"throughput_eps"`
LastUpdate time.Time `json:"last_update"`
}