feat(system-metrics,service-metrics,elastic): update elastic-client to version 8, improve parser logic to add more information, improve system monitor to add more information

This commit is contained in:
Patryk Hegenberg 2025-09-15 08:25:20 +02:00
parent 6c098ed61c
commit 159df116c8
10 changed files with 587 additions and 190 deletions

View file

@ -4,28 +4,41 @@ import (
"context"
"fmt"
"log/slog"
"net"
"os"
"slices"
"sort"
"strconv"
"strings"
"syscall"
"time"
"github.com/elastic/go-elasticsearch/v7"
"github.com/elastic/go-elasticsearch/v8"
"github.com/shirou/gopsutil/cpu"
"github.com/shirou/gopsutil/disk"
"github.com/shirou/gopsutil/host"
"github.com/shirou/gopsutil/load"
"github.com/shirou/gopsutil/mem"
"github.com/shirou/gopsutil/net"
psnet "github.com/shirou/gopsutil/net"
"github.com/shirou/gopsutil/process"
"golang.org/x/sys/unix"
)
type SystemMetricsCollector struct {
config SystemMetrics
pollInterval int
config SystemMetrics
pollInterval int
lastNetworkStats map[string]NetworkStat
lastDiskStats map[string]DiskIOStat
lastMeasureTime time.Time
}
func NewSystemMetricsCollector(config SystemMetrics, pollInterval int) *SystemMetricsCollector {
return &SystemMetricsCollector{
config: config,
pollInterval: pollInterval,
config: config,
pollInterval: pollInterval,
lastNetworkStats: make(map[string]NetworkStat),
lastDiskStats: make(map[string]DiskIOStat),
lastMeasureTime: time.Now(),
}
}
@ -88,10 +101,306 @@ func (smc *SystemMetricsCollector) collectMetrics() (SystemResources, error) {
slog.Warn("failed to collect process metrics", "error", err)
}
}
if smc.config.CollectDiskIO {
if err = smc.collectDiskIOMetrics(&result); err != nil {
slog.Warn("failed to collect disk IO metrics", "error", err)
}
}
if smc.config.CollectNetworkConnections {
if err = smc.collectNetworkConnections(&result); err != nil {
slog.Warn("failed to collect network connections", "error", err)
}
}
if smc.config.CollectLoadAverage {
if err = smc.collectLoadAverage(&result); err != nil {
slog.Warn("failed to collect load average", "error", err)
}
}
if smc.config.CollectTCPStats {
if err = smc.collectTCPStats(&result); err != nil {
slog.Warn("failed to collect TCP stats", "error", err)
}
}
if smc.config.CollectNetworkLatency {
if err = smc.collectNetworkLatency(&result); err != nil {
slog.Warn("failed to collect network latency", "error", err)
}
}
if smc.config.CollectBandwidthUsage {
if err = smc.collectBandwidthUsage(&result); err != nil {
slog.Warn("failed to collect bandwidth usage", "error", err)
}
}
if smc.config.CollectFileHandles {
if err = smc.collectSystemLimits(&result); err != nil {
slog.Warn("failed to collect system limits", "error", err)
}
}
return result, nil
}
func (smc *SystemMetricsCollector) collectDiskIOMetrics(result *SystemResources) error {
diskIOStats, err := disk.IOCounters()
if err != nil {
return err
}
currentTime := time.Now()
timeDiff := currentTime.Sub(smc.lastMeasureTime).Seconds()
result.DiskIOStats = make(map[string]DiskIOStat)
for device, stats := range diskIOStats {
ioStat := DiskIOStat{
ReadBytes: stats.ReadBytes,
WriteBytes: stats.WriteBytes,
ReadOps: stats.ReadCount,
WriteOps: stats.WriteCount,
ReadTime: stats.ReadTime,
WriteTime: stats.WriteTime,
}
if stats.ReadCount > 0 {
ioStat.AvgReadLatency = float64(stats.ReadTime) / float64(stats.ReadCount)
}
if stats.WriteCount > 0 {
ioStat.AvgWriteLatency = float64(stats.WriteTime) / float64(stats.WriteCount)
}
if timeDiff > 0 {
totalTime := float64(stats.ReadTime + stats.WriteTime)
ioStat.IOUtilization = (totalTime / (timeDiff * 1000)) * 100
if ioStat.IOUtilization > 100 {
ioStat.IOUtilization = 100
}
}
result.DiskIOStats[device] = ioStat
}
return nil
}
func (smc *SystemMetricsCollector) collectNetworkConnections(result *SystemResources) error {
connections, err := psnet.Connections("all")
if err != nil {
return err
}
stats := ConnectionStats{
ConnectionsByState: make(map[string]int32),
}
for _, conn := range connections {
stats.TotalConnections++
stats.ConnectionsByState[conn.Status]++
switch conn.Status {
case "ESTABLISHED":
stats.EstablishedTCP++
case "LISTEN":
stats.ListeningTCP++
case "TIME_WAIT":
stats.TimeWaitTCP++
}
if slices.Contains(smc.config.TransferPorts, int(conn.Laddr.Port)) ||
slices.Contains(smc.config.TransferPorts, int(conn.Raddr.Port)) {
stats.TransferConnections++
}
}
result.NetworkConnections = stats
return nil
}
func (smc *SystemMetricsCollector) collectLoadAverage(result *SystemResources) error {
loadAvg, err := load.Avg()
if err != nil {
return err
}
result.LoadAverage = append(result.LoadAverage, loadAvg.Load1)
result.LoadAverage = append(result.LoadAverage, loadAvg.Load5)
result.LoadAverage = append(result.LoadAverage, loadAvg.Load15)
return nil
}
func (smc *SystemMetricsCollector) collectTCPStats(result *SystemResources) error {
tcpStats := TCPStatistics{}
if data, err := os.ReadFile("/proc/net/netstat"); err == nil {
content := string(data)
lines := strings.SplitSeq(content, "\n")
for line := range lines {
if strings.HasPrefix(line, "TcpExt:") {
}
}
}
result.TCPStats = tcpStats
return nil
}
func (smc *SystemMetricsCollector) collectNetworkLatency(result *SystemResources) error {
result.NetworkLatency = make(map[string]LatencyInfo)
for _, host := range smc.config.LatencyTestHosts {
latency := smc.measureLatency(host)
result.NetworkLatency[host] = latency
}
return nil
}
func (smc *SystemMetricsCollector) measureLatency(host string) LatencyInfo {
var latencies []time.Duration
var successful int
for range 5 {
start := time.Now()
conn, err := net.DialTimeout("tcp", host+":80", 3*time.Second)
if err == nil {
latency := time.Since(start)
latencies = append(latencies, latency)
conn.Close()
successful++
}
time.Sleep(100 * time.Millisecond)
}
if len(latencies) == 0 {
return LatencyInfo{Host: host, PacketLoss: 100.0}
}
var total time.Duration
min := latencies[0]
max := latencies[0]
for _, lat := range latencies {
total += lat
if lat < min {
min = lat
}
if lat > max {
max = lat
}
}
avg := total / time.Duration(len(latencies))
packetLoss := float64(5-successful) / 5.0 * 100.0
jitter := max - min
return LatencyInfo{
Host: host,
MinLatency: min,
MaxLatency: max,
AvgLatency: avg,
PacketLoss: packetLoss,
Jitter: jitter,
}
}
func (smc *SystemMetricsCollector) collectBandwidthUsage(result *SystemResources) error {
netStats, err := psnet.IOCounters(true)
if err != nil {
return err
}
result.BandwidthUtilization = make(map[string]BandwidthInfo)
currentTime := time.Now()
timeDiff := currentTime.Sub(smc.lastMeasureTime).Seconds()
for _, stat := range netStats {
if len(smc.config.NetworkInterfaces) > 0 &&
!slices.Contains(smc.config.NetworkInterfaces, stat.Name) {
continue
}
bandwidth := BandwidthInfo{Interface: stat.Name}
if lastStat, exists := smc.lastNetworkStats[stat.Name]; exists && timeDiff > 0 {
bytesDiffIn := float64(stat.BytesRecv - lastStat.BytesRecv)
bytesDiffOut := float64(stat.BytesSent - lastStat.BytesSent)
bandwidth.CurrentThroughputIn = (bytesDiffIn / timeDiff) / (1024 * 1024) // MB/s
bandwidth.CurrentThroughputOut = (bytesDiffOut / timeDiff) / (1024 * 1024)
bandwidth.PeakThroughputIn = bandwidth.CurrentThroughputIn
bandwidth.PeakThroughputOut = bandwidth.CurrentThroughputOut
linkCapacityMbps := 1000.0
totalThroughput := bandwidth.CurrentThroughputIn + bandwidth.CurrentThroughputOut
bandwidth.UtilizationPercent = (totalThroughput / linkCapacityMbps) * 100
}
result.BandwidthUtilization[stat.Name] = bandwidth
}
for _, stat := range netStats {
smc.lastNetworkStats[stat.Name] = NetworkStat{
BytesSent: stat.BytesSent,
BytesRecv: stat.BytesRecv,
PacketsSent: stat.PacketsSent,
PacketsRecv: stat.PacketsRecv,
}
}
smc.lastMeasureTime = currentTime
return nil
}
func (smc *SystemMetricsCollector) collectSystemLimits(result *SystemResources) error {
limits := SystemLimitInfo{}
if data, err := os.ReadFile("/proc/sys/fs/file-max"); err == nil {
if maxFiles, err := strconv.ParseUint(strings.TrimSpace(string(data)), 10, 64); err == nil {
limits.MaxOpenFiles = maxFiles
}
}
if data, err := os.ReadFile("/proc/sys/fs/file-nr"); err == nil {
fields := strings.Fields(string(data))
if len(fields) >= 1 {
if currentFiles, err := strconv.ParseUint(fields[0], 10, 64); err == nil {
limits.CurrentOpenFiles = currentFiles
if limits.MaxOpenFiles > 0 {
limits.FileDescriptorUsage = float64(currentFiles) / float64(limits.MaxOpenFiles) * 100
}
}
}
}
var rlimit syscall.Rlimit
if err := syscall.Getrlimit(unix.RLIMIT_NPROC, &rlimit); err == nil {
limits.MaxProcesses = rlimit.Max
}
result.SystemLimits = limits
return nil
}
// // Hilfsfunktionen
// func NewSystemResources() SystemResources {
// return SystemResources{
// Timestamp: time.Now(),
// DiskUsage: make(map[string]DiskUsage),
// DiskIOStats: make(map[string]DiskIOStat),
// NetworkStats: make(map[string]NetworkStat),
// NetworkLatency: make(map[string]LatencyInfo),
// BandwidthUtilization: make(map[string]BandwidthInfo),
// }
// }
func (smc *SystemMetricsCollector) collectProcessMetrics(result *SystemResources) error {
processes, err := process.Processes()
if err != nil {
@ -206,7 +515,7 @@ func (smc *SystemMetricsCollector) collectDiskMetrics(result *SystemResources) e
}
func (smc *SystemMetricsCollector) collectNetworkMetrics(result *SystemResources) error {
netStats, err := net.IOCounters(true)
netStats, err := psnet.IOCounters(true)
if err != nil {
return err
}