217 lines
5.6 KiB
Go
217 lines
5.6 KiB
Go
package main
|
|
|
|
import (
|
|
"codeberg.org/pata1704/drain3"
|
|
"context"
|
|
"log/slog"
|
|
"os"
|
|
"os/signal"
|
|
"sync"
|
|
"syscall"
|
|
"time"
|
|
"watch-tool/helpers"
|
|
"watch-tool/models"
|
|
"watch-tool/patterns"
|
|
)
|
|
|
|
var currentHostname string
|
|
|
|
func init() {
|
|
var err error
|
|
currentHostname, err = os.Hostname()
|
|
if err != nil {
|
|
currentHostname = "unknown"
|
|
slog.Warn("Could not determine hostname, using fallback", "fallback", currentHostname)
|
|
}
|
|
}
|
|
|
|
func main() {
|
|
cfg, err := LoadConfig()
|
|
if err != nil {
|
|
slog.Error("Startup failed: configuration error", "error", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
slog.Info("System Monitor started", "hostname", currentHostname)
|
|
|
|
if err := patterns.GetInstance().Load(cfg.PatternsFile); err != nil {
|
|
slog.Error("Startup failed: could not load patterns", "file", cfg.PatternsFile, "error", err)
|
|
os.Exit(1)
|
|
}
|
|
slog.Info("Regex patterns loaded successfully", "file", cfg.PatternsFile)
|
|
|
|
var d3Cfg *drain3.Config
|
|
if cfg.Drain3.Enabled {
|
|
d3Cfg = &drain3.Config{
|
|
Depth: cfg.Drain3.Depth,
|
|
SimTh: cfg.Drain3.SimThreshold,
|
|
MaxChildren: cfg.Drain3.MaxChildren,
|
|
}
|
|
slog.Info("Drain3 anomaly detection enabled", "state_dir", cfg.Drain3.StateDir)
|
|
} else {
|
|
slog.Info("Drain3 anomaly detection disabled")
|
|
}
|
|
|
|
var storage StorageInterface
|
|
if cfg.LocalStorage.Enable {
|
|
rotationConfig := StorageRotationConfig{
|
|
MaxSizeBytes: cfg.LocalStorage.RotationConfig.MaxSizeBytes,
|
|
MaxAgeHours: cfg.LocalStorage.RotationConfig.GetMaxAge(),
|
|
MaxFiles: cfg.LocalStorage.RotationConfig.MaxFiles,
|
|
CheckIntervalMinutes: cfg.LocalStorage.RotationConfig.GetCheckInterval(),
|
|
ArchiveDir: cfg.LocalStorage.RotationConfig.ArchiveDir,
|
|
}
|
|
|
|
sqliteStorage, err := NewSQLiteStorageWithRotation(cfg.LocalStorage.DBPath, rotationConfig)
|
|
if err != nil {
|
|
slog.Error("failed to initialize SQLite storage", "error", err)
|
|
os.Exit(1)
|
|
}
|
|
storage = sqliteStorage
|
|
defer storage.Close()
|
|
slog.Info("SQLite storage initialized", "path", cfg.LocalStorage.DBPath)
|
|
} else {
|
|
slog.Error("Local storage is disabled, but it's required for the new architecture")
|
|
os.Exit(1)
|
|
}
|
|
|
|
var exportManager *ExportManager
|
|
if cfg.Export.Enabled {
|
|
exportConfig := ExportManagerConfig{
|
|
BatchSize: cfg.Export.BatchSize,
|
|
ExportInterval: cfg.Export.ExportInterval,
|
|
RetryAttempts: cfg.Export.RetryAttempts,
|
|
RetryBackoff: cfg.Export.RetryBackoff,
|
|
HealthCheckInterval: cfg.Export.HealthCheckInterval,
|
|
}
|
|
|
|
exportManager = NewExportManager(storage, exportConfig)
|
|
|
|
if cfg.Elasticsearch.Enabled {
|
|
esExporter, err := NewElasticsearchExporter(cfg.Elasticsearch)
|
|
if err != nil {
|
|
slog.Error("failed to create Elasticsearch exporter", "error", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
if err := esExporter.HealthCheck(context.Background()); err != nil {
|
|
slog.Error("Elasticsearch health check failed", "error", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
exportManager.RegisterExporter("elasticsearch", esExporter)
|
|
slog.Info("Elasticsearch exporter registered")
|
|
}
|
|
}
|
|
|
|
logChan := make(chan models.LogMessage, 1000)
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
var wg sync.WaitGroup
|
|
|
|
wg.Add(1)
|
|
helpers.SafeGo(ctx, "LogProcessor", func() {
|
|
defer wg.Done()
|
|
processor := NewLogProcessor(storage)
|
|
processor.Start(ctx, logChan)
|
|
})
|
|
|
|
if exportManager != nil {
|
|
wg.Add(1)
|
|
helpers.SafeGo(ctx, "ExportManager", func() {
|
|
defer wg.Done()
|
|
exportManager.Start(ctx)
|
|
})
|
|
}
|
|
|
|
for _, service := range cfg.Services {
|
|
if !service.Enabled {
|
|
slog.Debug("Service deactivated, skipping...", "service", service.Name)
|
|
continue
|
|
}
|
|
|
|
wg.Add(1)
|
|
srv := service
|
|
|
|
helpers.SafeGo(ctx, "ServiceMonitor-"+srv.Name, func() {
|
|
defer wg.Done()
|
|
monitor := NewServiceMonitor(srv, currentHostname, d3Cfg, cfg.Drain3.StateDir)
|
|
|
|
if err := monitor.Start(ctx, logChan); err != nil {
|
|
slog.Error("Error watching service", "service", srv.Name, "error", err)
|
|
}
|
|
})
|
|
|
|
slog.Info("Started watching Service-Log", "service", service.Name)
|
|
}
|
|
|
|
for _, tool := range cfg.Tools {
|
|
if !tool.Enabled {
|
|
slog.Debug("Tool is deactivated, skipping...", "tool", tool.Name)
|
|
continue
|
|
}
|
|
|
|
wg.Add(1)
|
|
t := tool
|
|
|
|
helpers.SafeGo(ctx, "FileMonitor-"+t.Name, func() {
|
|
defer wg.Done()
|
|
|
|
monitor := NewFileMonitor(t, currentHostname, d3Cfg, cfg.Drain3.StateDir)
|
|
|
|
if err := monitor.Start(ctx, logChan); err != nil {
|
|
slog.Error("Error watching tool", "tool", t.Name, "error", err)
|
|
}
|
|
})
|
|
|
|
slog.Info("Started watching logs", "tool", tool.Name, "file", tool.LogFile)
|
|
}
|
|
|
|
if cfg.SystemMetrics.Enabled {
|
|
wg.Add(1)
|
|
helpers.SafeGo(ctx, "SystemMetrics", func() {
|
|
defer wg.Done()
|
|
collector := NewSystemMetricsCollector(cfg.SystemMetrics, cfg.PollIntervalSeconds, currentHostname)
|
|
collector.Start(ctx, storage, logChan)
|
|
})
|
|
slog.Info("Started collecting System-Metrics")
|
|
}
|
|
|
|
if cfg.WebService.Enabled {
|
|
wg.Add(1)
|
|
helpers.SafeGo(ctx, "WebService", func() {
|
|
defer wg.Done()
|
|
webService := NewWebService(cfg, storage)
|
|
if err := webService.Start(ctx); err != nil {
|
|
slog.Error("Web service error", "error", err)
|
|
}
|
|
})
|
|
slog.Info("Web service started", "host", cfg.WebService.Host, "port", cfg.WebService.Port)
|
|
}
|
|
|
|
sigCh := make(chan os.Signal, 1)
|
|
signal.Notify(sigCh, os.Interrupt, syscall.SIGTERM)
|
|
|
|
s := <-sigCh
|
|
slog.Info("Shutdown signal received, stopping threads...", "signal", s)
|
|
|
|
cancel()
|
|
|
|
done := make(chan struct{})
|
|
go func() {
|
|
wg.Wait()
|
|
close(done)
|
|
}()
|
|
|
|
select {
|
|
case <-done:
|
|
close(logChan)
|
|
slog.Info("All threads closed gracefully")
|
|
case <-time.After(10 * time.Second):
|
|
slog.Error("Shutdown timeout reached, force quitting")
|
|
os.Exit(1)
|
|
}
|
|
|
|
slog.Info("Program stopped")
|
|
}
|