531 lines
13 KiB
Go
531 lines
13 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"log/slog"
|
|
"net"
|
|
"os"
|
|
"slices"
|
|
"sort"
|
|
"strconv"
|
|
"strings"
|
|
"syscall"
|
|
"time"
|
|
"watch-tool/models"
|
|
|
|
"github.com/shirou/gopsutil/cpu"
|
|
"github.com/shirou/gopsutil/disk"
|
|
"github.com/shirou/gopsutil/host"
|
|
"github.com/shirou/gopsutil/load"
|
|
"github.com/shirou/gopsutil/mem"
|
|
psnet "github.com/shirou/gopsutil/net"
|
|
"github.com/shirou/gopsutil/process"
|
|
"golang.org/x/sys/unix"
|
|
)
|
|
|
|
type SystemMetricsCollector struct {
|
|
config SystemMetrics
|
|
pollInterval int
|
|
lastNetworkStats map[string]models.NetworkStat
|
|
lastDiskStats map[string]models.DiskIOStat
|
|
lastMeasureTime time.Time
|
|
hostname string
|
|
}
|
|
|
|
func NewSystemMetricsCollector(config SystemMetrics, pollInterval int, hostname string) *SystemMetricsCollector {
|
|
return &SystemMetricsCollector{
|
|
config: config,
|
|
pollInterval: pollInterval,
|
|
lastNetworkStats: make(map[string]models.NetworkStat),
|
|
lastDiskStats: make(map[string]models.DiskIOStat),
|
|
lastMeasureTime: time.Now(),
|
|
hostname: hostname,
|
|
}
|
|
}
|
|
|
|
func (smc *SystemMetricsCollector) Start(ctx context.Context, storage StorageInterface, logChan chan<- models.LogMessage) {
|
|
ticker := time.NewTicker(time.Duration(smc.pollInterval) * time.Second)
|
|
defer ticker.Stop()
|
|
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
slog.Info("System metrics collector stopped")
|
|
return
|
|
case <-ticker.C:
|
|
metrics, err := smc.collectMetrics()
|
|
if err != nil {
|
|
slog.Error("error collecting system metrics", "error", err)
|
|
continue
|
|
}
|
|
|
|
entry := models.NewLogMessage("system_metrics", smc.hostname)
|
|
entry.Service = "system-metrics"
|
|
entry.LogLevel = "Info"
|
|
entry.SystemMetrics = metrics
|
|
|
|
select {
|
|
case logChan <- entry:
|
|
case <-ctx.Done():
|
|
return
|
|
default:
|
|
slog.Warn("Log channel is full, system metrics dropped")
|
|
}
|
|
}
|
|
}
|
|
}
|
|
func (smc *SystemMetricsCollector) collectMetrics() (models.SystemResources, error) {
|
|
result := models.NewSystemResources(smc.hostname)
|
|
|
|
var err error
|
|
|
|
if smc.config.CollectCPU {
|
|
if err = smc.collectCPUMetrics(&result); err != nil {
|
|
return result, fmt.Errorf("CPU metrics: %w", err)
|
|
}
|
|
}
|
|
|
|
if smc.config.CollectMemory {
|
|
if err = smc.collectMemoryMetrics(&result); err != nil {
|
|
return result, fmt.Errorf("memory metrics: %w", err)
|
|
}
|
|
}
|
|
|
|
if smc.config.CollectDisk {
|
|
if err = smc.collectDiskMetrics(&result); err != nil {
|
|
return result, fmt.Errorf("disk metrics: %w", err)
|
|
}
|
|
}
|
|
|
|
if smc.config.CollectNetwork {
|
|
if err = smc.collectNetworkMetrics(&result); err != nil {
|
|
return result, fmt.Errorf("network metrics: %w", err)
|
|
}
|
|
}
|
|
|
|
if smc.config.CollectProcesses {
|
|
if err := smc.collectProcessMetrics(&result); err != nil {
|
|
slog.Warn("failed to collect process metrics", "error", err)
|
|
}
|
|
}
|
|
if smc.config.CollectDiskIO {
|
|
if err = smc.collectDiskIOMetrics(&result); err != nil {
|
|
slog.Warn("failed to collect disk IO metrics", "error", err)
|
|
}
|
|
}
|
|
|
|
if smc.config.CollectNetworkConnections {
|
|
if err = smc.collectNetworkConnections(&result); err != nil {
|
|
slog.Warn("failed to collect network connections", "error", err)
|
|
}
|
|
}
|
|
|
|
if smc.config.CollectLoadAverage {
|
|
if err = smc.collectLoadAverage(&result); err != nil {
|
|
slog.Warn("failed to collect load average", "error", err)
|
|
}
|
|
}
|
|
|
|
if smc.config.CollectTCPStats {
|
|
if err = smc.collectTCPStats(&result); err != nil {
|
|
slog.Warn("failed to collect TCP stats", "error", err)
|
|
}
|
|
}
|
|
|
|
if smc.config.CollectNetworkLatency {
|
|
if err = smc.collectNetworkLatency(&result); err != nil {
|
|
slog.Warn("failed to collect network latency", "error", err)
|
|
}
|
|
}
|
|
|
|
if smc.config.CollectBandwidthUsage {
|
|
if err = smc.collectBandwidthUsage(&result); err != nil {
|
|
slog.Warn("failed to collect bandwidth usage", "error", err)
|
|
}
|
|
}
|
|
|
|
if smc.config.CollectFileHandles {
|
|
if err = smc.collectSystemLimits(&result); err != nil {
|
|
slog.Warn("failed to collect system limits", "error", err)
|
|
}
|
|
}
|
|
|
|
return result, nil
|
|
}
|
|
|
|
func (smc *SystemMetricsCollector) collectDiskIOMetrics(result *models.SystemResources) error {
|
|
diskIOStats, err := disk.IOCounters()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
currentTime := time.Now()
|
|
timeDiff := currentTime.Sub(smc.lastMeasureTime).Seconds()
|
|
|
|
result.DiskIOStats = make(map[string]models.DiskIOStat)
|
|
|
|
for device, stats := range diskIOStats {
|
|
ioStat := models.DiskIOStat{
|
|
ReadBytes: stats.ReadBytes,
|
|
WriteBytes: stats.WriteBytes,
|
|
ReadOps: stats.ReadCount,
|
|
WriteOps: stats.WriteCount,
|
|
ReadTime: stats.ReadTime,
|
|
WriteTime: stats.WriteTime,
|
|
}
|
|
|
|
if stats.ReadCount > 0 {
|
|
ioStat.AvgReadLatency = float64(stats.ReadTime) / float64(stats.ReadCount)
|
|
}
|
|
if stats.WriteCount > 0 {
|
|
ioStat.AvgWriteLatency = float64(stats.WriteTime) / float64(stats.WriteCount)
|
|
}
|
|
|
|
if timeDiff > 0 {
|
|
totalTime := float64(stats.ReadTime + stats.WriteTime)
|
|
ioStat.IOUtilization = (totalTime / (timeDiff * 1000)) * 100
|
|
if ioStat.IOUtilization > 100 {
|
|
ioStat.IOUtilization = 100
|
|
}
|
|
}
|
|
|
|
result.DiskIOStats[device] = ioStat
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (smc *SystemMetricsCollector) collectNetworkConnections(result *models.SystemResources) error {
|
|
connections, err := psnet.Connections("all")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
stats := models.ConnectionStats{
|
|
ConnectionsByState: make(map[string]int32),
|
|
}
|
|
|
|
for _, conn := range connections {
|
|
stats.TotalConnections++
|
|
|
|
stats.ConnectionsByState[conn.Status]++
|
|
|
|
switch conn.Status {
|
|
case "ESTABLISHED":
|
|
stats.EstablishedTCP++
|
|
case "LISTEN":
|
|
stats.ListeningTCP++
|
|
case "TIME_WAIT":
|
|
stats.TimeWaitTCP++
|
|
}
|
|
|
|
if slices.Contains(smc.config.TransferPorts, int(conn.Laddr.Port)) ||
|
|
slices.Contains(smc.config.TransferPorts, int(conn.Raddr.Port)) {
|
|
stats.TransferConnections++
|
|
}
|
|
}
|
|
|
|
result.NetworkConnections = stats
|
|
return nil
|
|
}
|
|
|
|
func (smc *SystemMetricsCollector) collectLoadAverage(result *models.SystemResources) error {
|
|
loadAvg, err := load.Avg()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
result.LoadAverage = append(result.LoadAverage, loadAvg.Load1)
|
|
result.LoadAverage = append(result.LoadAverage, loadAvg.Load5)
|
|
result.LoadAverage = append(result.LoadAverage, loadAvg.Load15)
|
|
|
|
return nil
|
|
}
|
|
|
|
func (smc *SystemMetricsCollector) collectTCPStats(result *models.SystemResources) error {
|
|
tcpStats := models.TCPStatistics{}
|
|
|
|
if data, err := os.ReadFile("/proc/net/netstat"); err == nil {
|
|
content := string(data)
|
|
lines := strings.SplitSeq(content, "\n")
|
|
for line := range lines {
|
|
if strings.HasPrefix(line, "TcpExt:") {
|
|
}
|
|
}
|
|
}
|
|
|
|
result.TCPStats = tcpStats
|
|
return nil
|
|
}
|
|
|
|
func (smc *SystemMetricsCollector) collectNetworkLatency(result *models.SystemResources) error {
|
|
result.NetworkLatency = make(map[string]models.LatencyInfo)
|
|
|
|
for _, host := range smc.config.LatencyTestHosts {
|
|
latency := smc.measureLatency(host)
|
|
result.NetworkLatency[host] = latency
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (smc *SystemMetricsCollector) measureLatency(host string) models.LatencyInfo {
|
|
var latencies []time.Duration
|
|
var successful int
|
|
|
|
for range 5 {
|
|
start := time.Now()
|
|
conn, err := net.DialTimeout("tcp", host+":80", 3*time.Second)
|
|
if err == nil {
|
|
latency := time.Since(start)
|
|
latencies = append(latencies, latency)
|
|
conn.Close()
|
|
successful++
|
|
}
|
|
time.Sleep(100 * time.Millisecond)
|
|
}
|
|
|
|
if len(latencies) == 0 {
|
|
return models.LatencyInfo{Host: host, PacketLoss: 100.0}
|
|
}
|
|
|
|
var total time.Duration
|
|
min := latencies[0]
|
|
max := latencies[0]
|
|
|
|
for _, lat := range latencies {
|
|
total += lat
|
|
if lat < min {
|
|
min = lat
|
|
}
|
|
if lat > max {
|
|
max = lat
|
|
}
|
|
}
|
|
|
|
avg := total / time.Duration(len(latencies))
|
|
packetLoss := float64(5-successful) / 5.0 * 100.0
|
|
jitter := max - min
|
|
|
|
return models.LatencyInfo{
|
|
Host: host,
|
|
MinLatency: min,
|
|
MaxLatency: max,
|
|
AvgLatency: avg,
|
|
PacketLoss: packetLoss,
|
|
Jitter: jitter,
|
|
}
|
|
}
|
|
|
|
func (smc *SystemMetricsCollector) collectBandwidthUsage(result *models.SystemResources) error {
|
|
netStats, err := psnet.IOCounters(true)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
result.BandwidthUtilization = make(map[string]models.BandwidthInfo)
|
|
currentTime := time.Now()
|
|
timeDiff := currentTime.Sub(smc.lastMeasureTime).Seconds()
|
|
|
|
for _, stat := range netStats {
|
|
if len(smc.config.NetworkInterfaces) > 0 &&
|
|
!slices.Contains(smc.config.NetworkInterfaces, stat.Name) {
|
|
continue
|
|
}
|
|
|
|
bandwidth := models.BandwidthInfo{Interface: stat.Name}
|
|
|
|
if lastStat, exists := smc.lastNetworkStats[stat.Name]; exists && timeDiff > 0 {
|
|
bytesDiffIn := float64(stat.BytesRecv - lastStat.BytesRecv)
|
|
bytesDiffOut := float64(stat.BytesSent - lastStat.BytesSent)
|
|
|
|
bandwidth.CurrentThroughputIn = (bytesDiffIn / timeDiff) / (1024 * 1024) // MB/s
|
|
bandwidth.CurrentThroughputOut = (bytesDiffOut / timeDiff) / (1024 * 1024)
|
|
|
|
bandwidth.PeakThroughputIn = bandwidth.CurrentThroughputIn
|
|
bandwidth.PeakThroughputOut = bandwidth.CurrentThroughputOut
|
|
|
|
linkCapacityMbps := 1000.0
|
|
totalThroughput := bandwidth.CurrentThroughputIn + bandwidth.CurrentThroughputOut
|
|
bandwidth.UtilizationPercent = (totalThroughput / linkCapacityMbps) * 100
|
|
}
|
|
|
|
result.BandwidthUtilization[stat.Name] = bandwidth
|
|
}
|
|
|
|
for _, stat := range netStats {
|
|
smc.lastNetworkStats[stat.Name] = models.NetworkStat{
|
|
BytesSent: stat.BytesSent,
|
|
BytesRecv: stat.BytesRecv,
|
|
PacketsSent: stat.PacketsSent,
|
|
PacketsRecv: stat.PacketsRecv,
|
|
}
|
|
}
|
|
|
|
smc.lastMeasureTime = currentTime
|
|
return nil
|
|
}
|
|
|
|
func (smc *SystemMetricsCollector) collectSystemLimits(result *models.SystemResources) error {
|
|
limits := models.SystemLimitInfo{}
|
|
|
|
if data, err := os.ReadFile("/proc/sys/fs/file-max"); err == nil {
|
|
if maxFiles, err := strconv.ParseUint(strings.TrimSpace(string(data)), 10, 64); err == nil {
|
|
limits.MaxOpenFiles = maxFiles
|
|
}
|
|
}
|
|
|
|
if data, err := os.ReadFile("/proc/sys/fs/file-nr"); err == nil {
|
|
fields := strings.Fields(string(data))
|
|
if len(fields) >= 1 {
|
|
if currentFiles, err := strconv.ParseUint(fields[0], 10, 64); err == nil {
|
|
limits.CurrentOpenFiles = currentFiles
|
|
if limits.MaxOpenFiles > 0 {
|
|
limits.FileDescriptorUsage = float64(currentFiles) / float64(limits.MaxOpenFiles) * 100
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
var rlimit syscall.Rlimit
|
|
if err := syscall.Getrlimit(unix.RLIMIT_NPROC, &rlimit); err == nil {
|
|
limits.MaxProcesses = rlimit.Max
|
|
}
|
|
|
|
result.SystemLimits = limits
|
|
return nil
|
|
}
|
|
|
|
func (smc *SystemMetricsCollector) collectProcessMetrics(result *models.SystemResources) error {
|
|
processes, err := process.Processes()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
var processInfos []models.ProcessInfo
|
|
var totalOpenFiles int32
|
|
|
|
for _, p := range processes {
|
|
name, err := p.Name()
|
|
if err != nil {
|
|
continue
|
|
}
|
|
|
|
cpuPercent, err := p.CPUPercent()
|
|
if err != nil {
|
|
continue
|
|
}
|
|
|
|
memInfo, err := p.MemoryInfo()
|
|
if err != nil {
|
|
continue
|
|
}
|
|
|
|
status, err := p.Status()
|
|
if err != nil {
|
|
status = ""
|
|
}
|
|
|
|
createTime, err := p.CreateTime()
|
|
if err != nil {
|
|
createTime = 0
|
|
}
|
|
|
|
if openFiles, err := p.NumFDs(); err == nil {
|
|
totalOpenFiles += openFiles
|
|
}
|
|
|
|
processInfos = append(processInfos, models.ProcessInfo{
|
|
PID: p.Pid,
|
|
Name: name,
|
|
CPUPercent: cpuPercent,
|
|
MemoryMB: float32(memInfo.RSS) / 1024 / 1024,
|
|
Status: status,
|
|
CreateTime: createTime,
|
|
})
|
|
}
|
|
|
|
sort.Slice(processInfos, func(i, j int) bool {
|
|
return processInfos[i].CPUPercent > processInfos[j].CPUPercent
|
|
})
|
|
|
|
limit := smc.config.TopProcessesLimit
|
|
if len(processInfos) > limit {
|
|
processInfos = processInfos[:limit]
|
|
}
|
|
|
|
result.TopProcesses = processInfos
|
|
result.OpenFileDescriptors = totalOpenFiles
|
|
|
|
return nil
|
|
}
|
|
|
|
func (smc *SystemMetricsCollector) collectCPUMetrics(result *models.SystemResources) error {
|
|
cpuPercents, err := cpu.Percent(time.Second, false)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if len(cpuPercents) > 0 {
|
|
result.CPUPercent = cpuPercents[0]
|
|
}
|
|
|
|
if hostStat, err := host.Info(); err == nil {
|
|
result.Uptime = hostStat.Uptime
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (smc *SystemMetricsCollector) collectMemoryMetrics(result *models.SystemResources) error {
|
|
vmStat, err := mem.VirtualMemory()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
result.MemoryUsed = vmStat.Used
|
|
result.MemoryTotal = vmStat.Total
|
|
result.MemoryPercent = vmStat.UsedPercent
|
|
|
|
return nil
|
|
}
|
|
|
|
func (smc *SystemMetricsCollector) collectDiskMetrics(result *models.SystemResources) error {
|
|
for _, path := range smc.config.DiskPaths {
|
|
diskStat, err := disk.Usage(path)
|
|
if err != nil {
|
|
slog.Error("error reading disk stats", "path", path, "error", err)
|
|
continue
|
|
}
|
|
|
|
result.DiskUsage[path] = models.DiskUsage{
|
|
Used: diskStat.Used,
|
|
Total: diskStat.Total,
|
|
UsedPercent: diskStat.UsedPercent,
|
|
Free: diskStat.Free,
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (smc *SystemMetricsCollector) collectNetworkMetrics(result *models.SystemResources) error {
|
|
netStats, err := psnet.IOCounters(true)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
for _, stat := range netStats {
|
|
if len(smc.config.NetworkInterfaces) == 0 || slices.Contains(smc.config.NetworkInterfaces, stat.Name) {
|
|
result.NetworkStats[stat.Name] = models.NetworkStat{
|
|
BytesSent: stat.BytesSent,
|
|
BytesRecv: stat.BytesRecv,
|
|
PacketsSent: stat.PacketsSent,
|
|
PacketsRecv: stat.PacketsRecv,
|
|
}
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|