// Package config provides the pipeline configuration loaded from YAML. package config import ( "fmt" "os" "regexp" "time" "gopkg.in/yaml.v3" ) // MaskingPattern is a single entry in drain.masking_patterns. type MaskingPattern struct { Name string `yaml:"name"` Pattern string `yaml:"pattern"` Replace string `yaml:"replace"` Type string `yaml:"type"` Re *regexp.Regexp } // MADConfig defines parameters for the MAD detector. type MADConfig struct { // Threshold is the modified Z-score cutoff for IsAnomaly. // Recommended: 3.0–4.0. Default: 3.5. Threshold float64 `yaml:"threshold"` // CalibrationSize is the number of NormalizedVectors to buffer before // automatic per-feature median/MAD calibration runs. // Default (if 0): 100. CalibrationSize int `yaml:"calibration_size"` } // COPODConfig defines the parameters for the Copula-Based Outlier detector. type COPODConfig struct { Threshold float64 `yaml:"threshold"` BufferSize int `yaml:"buffer_size"` } // RRCFConfig defines the parameters for the Robust Random Cut Forest detector. // Used for the standalone RRCF detector and the classic AVG/MAX/MEDIAN ensemble paths. type RRCFConfig struct { NumTrees int `yaml:"num_trees"` TreeSize int `yaml:"tree_size"` ThresholdPercentile float64 `yaml:"threshold_percentile"` } // RRCFVariantConfig holds parameters for a single named RRCF variant inside // the SEAD multi-horizon ensemble. type RRCFVariantConfig struct { // NumTrees controls score stability: more trees → smoother/conservative. NumTrees int `yaml:"num_trees"` // TreeSize sets the sliding-window capacity per tree. TreeSize int `yaml:"tree_size"` // ThresholdPercentile is the per-model decision threshold (standalone use). ThresholdPercentile float64 `yaml:"threshold_percentile"` } // RRCFVariantsConfig groups the three RRCF variants used by the SEAD ensemble. // Each variant captures anomalies at a different time-horizon: // - Fast: short memory, reactive to transient spikes // - Mid: medium memory, balanced sensitivity // - Slow: long memory, detects sustained / slow-drift events type RRCFVariantsConfig struct { Fast RRCFVariantConfig `yaml:"fast"` Mid RRCFVariantConfig `yaml:"mid"` Slow RRCFVariantConfig `yaml:"slow"` } // SEADConfig holds tunable parameters for the SEAD ensemble. // Only used when EnsembleConfig.Method == "sead". type SEADConfig struct { // Eta is the MWU learning rate η ∈ (0, 1]. // Higher values react faster to distribution shifts but are noisier. // Recommended: 0.05–0.20. Default (if 0): 0.10. Eta float64 `yaml:"eta"` // Lambda is the KL-divergence regularisation strength. // 0 = pure MWU (uniform prior). Recommended: 0.0–0.05. Default: 0.01. Lambda float64 `yaml:"lambda"` // QuantileWindow is the number of past scores retained per detector for // streaming quantile normalisation. Default (if 0): 300. QuantileWindow int `yaml:"quantile_window"` // MinDataPoints is the cold-start guard: no anomaly is flagged until at // least this many windows have been scored. Default (if 0): 20. MinDataPoints int `yaml:"min_data_points"` } // EnsembleConfig manages the routing for the multi-model detector. type EnsembleConfig struct { Enabled bool `yaml:"enabled"` // Method selects the score-aggregation strategy. // Allowed values: "avg" (default), "max", "median", "sead". // "sead": adaptive Multiplicative Weights Update ensemble (Shah et al., ICML 2025). Method string `yaml:"method"` // Contamination is the expected fraction of anomalous windows ∈ [0, 0.5). // Determines the decision threshold as quantile(1-contamination) of // the rolling combined score history. Contamination float64 `yaml:"contamination"` // SEAD tuning parameters (only applied when Method == "sead"). SEAD SEADConfig `yaml:"sead"` } // AutoScalingConfig holds thresholds and durations for dynamic detector switching. type AutoScalingConfig struct { Enabled bool `yaml:"enabled"` HighThreshold float64 `yaml:"high_threshold"` // e.g. 0.75 (Normal -> High) CritThreshold float64 `yaml:"critical_threshold"` // e.g. 0.90 (High -> Critical) HighDuration float64 `yaml:"high_duration"` // e.g. 30.0 (seconds) CritDuration float64 `yaml:"critical_duration"` // e.g. 15.0 (seconds) DownThreshold float64 `yaml:"down_threshold"` // e.g. 0.50 (back to Normal) DownDuration float64 `yaml:"down_duration"` // e.g. 60.0 (seconds) } // DetectorConfig groups all anomaly detection configurations. type DetectorConfig struct { Method string `yaml:"method"` Ensemble EnsembleConfig `yaml:"ensemble"` MAD MADConfig `yaml:"mad"` COPOD COPODConfig `yaml:"copod"` // RRCF is used by the standalone detector and the AVG/MAX/MEDIAN ensemble paths. RRCF RRCFConfig `yaml:"rrcf"` // RRCFVariants configures the three-horizon RRCF instances for the SEAD ensemble. // Defaults are applied automatically when fields are zero. RRCFVariants RRCFVariantsConfig `yaml:"rrcf_variants"` AutoScaling AutoScalingConfig `yaml:"auto_scaling"` } // Config is the top-level pipeline configuration. type Config struct { Ingestion struct { LogPath string `yaml:"log_path"` NetInterface string `yaml:"net_interface"` DiskDevice string `yaml:"disk_device"` SystemctlServices []string `yaml:"systemctl_services"` } `yaml:"ingestion"` Transformation struct { WindowSize time.Duration `yaml:"window_size"` DbPath string `yaml:"db_path"` } `yaml:"transformation"` Drain struct { Depth int `yaml:"depth"` SimThreshold float64 `yaml:"sim_threshold"` MaxChildren int `yaml:"max_children"` MaxClusters int `yaml:"max_clusters"` MaskingPatterns []MaskingPattern `yaml:"masking_patterns"` } `yaml:"drain"` Detection DetectorConfig `yaml:"detector"` Output struct { FeatureLogPath string `yaml:"feature_log_path"` AnomalyLogPath string `yaml:"anomaly_log_path"` } `yaml:"output"` } // LoadConfig reads and decodes the YAML file at path. func LoadConfig(path string) (*Config, error) { f, err := os.Open(path) if err != nil { return nil, fmt.Errorf("config: open %q: %w", path, err) } defer f.Close() var cfg Config dec := yaml.NewDecoder(f) dec.KnownFields(false) if err := dec.Decode(&cfg); err != nil { return nil, fmt.Errorf("config: decode %q: %w", path, err) } return &cfg, nil } // Compile compiles all MaskingPattern.Pattern strings into *regexp.Regexp. func (c *Config) Compile() error { for i := range c.Drain.MaskingPatterns { mp := &c.Drain.MaskingPatterns[i] re, err := regexp.Compile(mp.Pattern) if err != nil { return fmt.Errorf("config: compile pattern %q: %w", mp.Name, err) } mp.Re = re } return nil } // NumericPatternNames returns the ordered list of MaskingPattern names whose // Type is "float" or "int". func (c *Config) NumericPatternNames() []string { names := make([]string, 0, len(c.Drain.MaskingPatterns)) for _, mp := range c.Drain.MaskingPatterns { if mp.Name != "" && (mp.Type == "float" || mp.Type == "int") { names = append(names, mp.Name) } } return names }