package main import ( "codeberg.org/pata1704/drain3" "context" "log/slog" "os" "os/signal" "sync" "syscall" "time" "watch-tool/helpers" "watch-tool/models" "watch-tool/patterns" ) var currentHostname string func init() { var err error currentHostname, err = os.Hostname() if err != nil { currentHostname = "unknown" slog.Warn("Could not determine hostname, using fallback", "fallback", currentHostname) } } func main() { cfg, err := LoadConfig() if err != nil { slog.Error("Startup failed: configuration error", "error", err) os.Exit(1) } slog.Info("System Monitor started", "hostname", currentHostname) if err := patterns.GetInstance().Load(cfg.PatternsFile); err != nil { slog.Error("Startup failed: could not load patterns", "file", cfg.PatternsFile, "error", err) os.Exit(1) } slog.Info("Regex patterns loaded successfully", "file", cfg.PatternsFile) var d3Cfg *drain3.Config if cfg.Drain3.Enabled { d3Cfg = &drain3.Config{ Depth: cfg.Drain3.Depth, SimTh: cfg.Drain3.SimThreshold, MaxChildren: cfg.Drain3.MaxChildren, } slog.Info("Drain3 anomaly detection enabled", "state_dir", cfg.Drain3.StateDir) } else { slog.Info("Drain3 anomaly detection disabled") } var storage StorageInterface if cfg.LocalStorage.Enable { rotationConfig := StorageRotationConfig{ MaxSizeBytes: cfg.LocalStorage.RotationConfig.MaxSizeBytes, MaxAgeHours: cfg.LocalStorage.RotationConfig.GetMaxAge(), MaxFiles: cfg.LocalStorage.RotationConfig.MaxFiles, CheckIntervalMinutes: cfg.LocalStorage.RotationConfig.GetCheckInterval(), ArchiveDir: cfg.LocalStorage.RotationConfig.ArchiveDir, } sqliteStorage, err := NewSQLiteStorageWithRotation(cfg.LocalStorage.DBPath, rotationConfig) if err != nil { slog.Error("failed to initialize SQLite storage", "error", err) os.Exit(1) } storage = sqliteStorage defer storage.Close() slog.Info("SQLite storage initialized", "path", cfg.LocalStorage.DBPath) } else { slog.Error("Local storage is disabled, but it's required for the new architecture") os.Exit(1) } var exportManager *ExportManager if cfg.Export.Enabled { exportConfig := ExportManagerConfig{ BatchSize: cfg.Export.BatchSize, ExportInterval: cfg.Export.ExportInterval, RetryAttempts: cfg.Export.RetryAttempts, RetryBackoff: cfg.Export.RetryBackoff, HealthCheckInterval: cfg.Export.HealthCheckInterval, } exportManager = NewExportManager(storage, exportConfig) if cfg.Elasticsearch.Enabled { esExporter, err := NewElasticsearchExporter(cfg.Elasticsearch) if err != nil { slog.Error("failed to create Elasticsearch exporter", "error", err) os.Exit(1) } if err := esExporter.HealthCheck(context.Background()); err != nil { slog.Error("Elasticsearch health check failed", "error", err) os.Exit(1) } exportManager.RegisterExporter("elasticsearch", esExporter) slog.Info("Elasticsearch exporter registered") } } logChan := make(chan models.LogMessage, 1000) ctx, cancel := context.WithCancel(context.Background()) defer cancel() var wg sync.WaitGroup wg.Add(1) helpers.SafeGo(ctx, "LogProcessor", func() { defer wg.Done() processor := NewLogProcessor(storage) processor.Start(ctx, logChan) }) if exportManager != nil { wg.Add(1) helpers.SafeGo(ctx, "ExportManager", func() { defer wg.Done() exportManager.Start(ctx) }) } for _, service := range cfg.Services { if !service.Enabled { slog.Debug("Service deactivated, skipping...", "service", service.Name) continue } wg.Add(1) srv := service helpers.SafeGo(ctx, "ServiceMonitor-"+srv.Name, func() { defer wg.Done() monitor := NewServiceMonitor(srv, currentHostname, d3Cfg, cfg.Drain3.StateDir) if err := monitor.Start(ctx, logChan); err != nil { slog.Error("Error watching service", "service", srv.Name, "error", err) } }) slog.Info("Started watching Service-Log", "service", service.Name) } for _, tool := range cfg.Tools { if !tool.Enabled { slog.Debug("Tool is deactivated, skipping...", "tool", tool.Name) continue } wg.Add(1) t := tool helpers.SafeGo(ctx, "FileMonitor-"+t.Name, func() { defer wg.Done() monitor := NewFileMonitor(t, currentHostname, d3Cfg, cfg.Drain3.StateDir) if err := monitor.Start(ctx, logChan); err != nil { slog.Error("Error watching tool", "tool", t.Name, "error", err) } }) slog.Info("Started watching logs", "tool", tool.Name, "file", tool.LogFile) } if cfg.SystemMetrics.Enabled { wg.Add(1) helpers.SafeGo(ctx, "SystemMetrics", func() { defer wg.Done() collector := NewSystemMetricsCollector(cfg.SystemMetrics, cfg.PollIntervalSeconds, currentHostname) collector.Start(ctx, storage, logChan) }) slog.Info("Started collecting System-Metrics") } if cfg.WebService.Enabled { wg.Add(1) helpers.SafeGo(ctx, "WebService", func() { defer wg.Done() webService := NewWebService(cfg, storage) if err := webService.Start(ctx); err != nil { slog.Error("Web service error", "error", err) } }) slog.Info("Web service started", "host", cfg.WebService.Host, "port", cfg.WebService.Port) } sigCh := make(chan os.Signal, 1) signal.Notify(sigCh, os.Interrupt, syscall.SIGTERM) s := <-sigCh slog.Info("Shutdown signal received, stopping threads...", "signal", s) cancel() done := make(chan struct{}) go func() { wg.Wait() close(done) }() select { case <-done: close(logChan) slog.Info("All threads closed gracefully") case <-time.After(10 * time.Second): slog.Error("Shutdown timeout reached, force quitting") os.Exit(1) } slog.Info("Program stopped") }