feat: implement new generic parser and improve production readyness

This commit is contained in:
Patryk Hegenberg 2026-01-18 12:37:57 +01:00
parent 8364218234
commit 0830b403e0
34 changed files with 1715 additions and 2114 deletions

View file

@ -5,8 +5,8 @@ import (
"regexp"
"strings"
"time"
"tixel_watch/helpers"
"tixel_watch/models"
"watch-tool/helpers"
"watch-tool/models"
)
var (

View file

@ -3,12 +3,13 @@ package parser
import (
"strings"
"time"
"tixel_watch/models"
"watch-tool/models"
)
type DefaultParser struct {
Service string
Tool string
Service string
Tool string
Hostname string
}
func (d *DefaultParser) Parse(line string) (models.LogMessage, error) {

View file

@ -1,27 +1,10 @@
package parser
func New(serviceName, logType string) (Parser, error) {
func New(serviceName, logType, hostname string) (Parser, error) {
switch logType {
case "custom":
switch serviceName {
case "tixstream":
return &TSParser{}, nil
case "transfer-job-manager":
return &TJMParser{}, nil
case "access-manager":
return &AMParser{}, nil
case "tixel-control-center":
return &TCCParser{}, nil
case "nginx":
return &NginxParser{}, nil
case "nginx-tjm":
return &NginxTJMLogParser{ToolName: serviceName}, nil
default:
return &DefaultParser{Service: serviceName}, nil
}
case "json":
return &JSONParser{}, nil
default:
return &DefaultParser{Service: serviceName}, nil
return NewGenericParser(serviceName, hostname), nil
}
}

296
parser/generic_parser.go Normal file
View file

@ -0,0 +1,296 @@
// package parser
// import (
// "fmt"
// "strconv"
// "strings"
// "time"
// "watch-tool/models"
// "watch-tool/patterns"
// )
// type GenericParser struct {
// ServiceName string
// Hostname string
// Extractors []patterns.CompiledExtractor
// CommonExt []patterns.CompiledExtractor
// }
// func NewGenericParser(serviceName, hostname string) *GenericParser {
// repo := patterns.GetInstance()
// return &GenericParser{
// ServiceName: serviceName,
// Hostname: hostname,
// Extractors: repo.GetExtractors(serviceName),
// CommonExt: repo.GetExtractors("common"),
// }
// }
// func (p *GenericParser) Parse(line string) (models.LogMessage, error) {
// entry := models.LogMessage{
// Service: p.ServiceName,
// Host: p.Hostname,
// Timestamp: time.Now(),
// Raw: line,
// Fields: make(map[string]any),
// }
// // 1. Common Extractors laufen lassen (z.B. Syslog Header entfernen/parsen)
// // Wir nutzen eine temporäre Variable für den Rest-String, falls Header entfernt werden soll
// currentLine := line
// // Hinweis: Hier könnte man Syslog-Logik generisch einbauen.
// // Fürs Erste wenden wir Pattern einfach auf die Zeile an.
// // 2. Service Extractors anwenden
// // Wir probieren ALLE Extractors, um maximale Informationen zu gewinnen.
// // Das simuliert die Logik deiner alten Parser (erst Header, dann Details).
// allExtractors := append(p.CommonExt, p.Extractors...)
// for _, ext := range allExtractors {
// matches := ext.Pattern.FindStringSubmatch(currentLine)
// if matches == nil {
// continue
// }
// subexpNames := ext.Pattern.SubexpNames()
// for i, matchValue := range matches {
// if i == 0 || matchValue == "" {
// continue
// }
// groupName := subexpNames[i]
// if groupName == "" {
// continue
// }
// targetType := ext.Fields[groupName]
// parsedValue, err := convertType(matchValue, targetType)
// if err == nil {
// switch groupName {
// case "timestamp":
// if t, ok := parsedValue.(time.Time); ok {
// entry.Timestamp = t
// }
// case "log_level":
// entry.LogLevel = fmt.Sprintf("%v", parsedValue)
// case "message":
// entry.LogMessage = fmt.Sprintf("%v", parsedValue)
// default:
// entry.Fields[groupName] = parsedValue
// }
// }
// }
// }
// if entry.LogMessage == "" {
// entry.LogMessage = strings.TrimSpace(line)
// }
// return entry, nil
// }
// func convertType(value, typeDef string) (any, error) {
// if strings.HasPrefix(typeDef, "int") {
// return strconv.Atoi(value)
// }
// if strings.HasPrefix(typeDef, "float") {
// return strconv.ParseFloat(value, 64)
// }
// if after, ok := strings.CutPrefix(typeDef, "time:"); ok {
// layout := after
// // Workaround für Syslog (Jahr fehlt oft), hier vereinfacht:
// if layout == "Jan 02 15:04:05" {
// t, err := time.Parse(layout, value)
// if err == nil {
// return t.AddDate(time.Now().Year(), 0, 0), nil
// }
// return t, err
// }
// return time.Parse(layout, value)
// }
// // Default: String
// return value, nil
// }
package parser
import (
"fmt"
"log/slog"
"strconv"
"strings"
"time"
"watch-tool/models"
"watch-tool/patterns"
)
type GenericParser struct {
ServiceName string
Hostname string
Extractors []patterns.CompiledExtractor
CommonExt []patterns.CompiledExtractor
}
func NewGenericParser(serviceName, hostname string) *GenericParser {
repo := patterns.GetInstance()
var svcExt, commonExt []patterns.CompiledExtractor
if repo != nil {
svcExt = repo.GetExtractors(serviceName)
commonExt = repo.GetExtractors("common")
} else {
slog.Error("CRITICAL: Pattern Repository is nil. Parser will not work correctly.")
}
return &GenericParser{
ServiceName: serviceName,
Hostname: hostname,
Extractors: svcExt,
CommonExt: commonExt,
}
}
func (p *GenericParser) Parse(line string) (models.LogMessage, error) {
entry := models.LogMessage{
Service: p.ServiceName,
Host: p.Hostname,
Timestamp: time.Now(),
Raw: line,
Fields: make(map[string]any),
Type: "log_entry",
}
trimmedLine := strings.TrimSpace(line)
if trimmedLine == "" {
return entry, nil
}
allExtractors := append(p.CommonExt, p.Extractors...)
matchedAny := false
for _, ext := range allExtractors {
matches := ext.Pattern.FindStringSubmatch(trimmedLine)
if matches == nil {
continue
}
matchedAny = true
subexpNames := ext.Pattern.SubexpNames()
for i, matchValue := range matches {
if i == 0 {
continue
}
groupName := subexpNames[i]
if groupName == "" {
continue
}
cleanValue := strings.TrimSpace(matchValue)
targetType := ext.Fields[groupName]
parsedValue := p.safeConvert(cleanValue, targetType)
p.mapField(&entry, groupName, parsedValue)
}
}
if !matchedAny {
entry.LogMessage = trimmedLine
entry.Fields["_parse_status"] = "failed"
} else if entry.LogMessage == "" {
entry.LogMessage = trimmedLine
}
return entry, nil
}
func (p *GenericParser) safeConvert(value, typeDef string) any {
if value == "" || value == "-" {
if strings.HasPrefix(typeDef, "int") || strings.HasPrefix(typeDef, "float") {
return 0
}
return value
}
var err error
var result any
switch {
case strings.HasPrefix(typeDef, "int"):
var i int
i, err = strconv.Atoi(value)
result = i
case strings.HasPrefix(typeDef, "float"):
var f float64
f, err = strconv.ParseFloat(value, 64)
result = f
case strings.HasPrefix(typeDef, "time:"):
layout := strings.TrimPrefix(typeDef, "time:")
result, err = p.parseTimeRobust(value, layout)
case typeDef == "bool":
var b bool
b, err = strconv.ParseBool(value)
result = b
default:
return value
}
if err != nil {
return value
}
return result
}
func (p *GenericParser) parseTimeRobust(value, layout string) (time.Time, error) {
if layout == "Jan 02 15:04:05" {
t, err := time.Parse(layout, value)
if err != nil {
return time.Time{}, err
}
now := time.Now()
year := now.Year()
if t.Month() > now.Month() {
year--
}
return t.AddDate(year, 0, 0), nil
}
return time.Parse(layout, value)
}
func (p *GenericParser) mapField(entry *models.LogMessage, key string, value any) {
switch key {
case "timestamp", "time":
if t, ok := value.(time.Time); ok {
entry.Timestamp = t
}
case "log_level", "level":
entry.LogLevel = fmt.Sprintf("%v", value)
case "message", "msg":
entry.LogMessage = fmt.Sprintf("%v", value)
case "host", "hostname":
entry.Host = fmt.Sprintf("%v", value)
case "service":
entry.Service = fmt.Sprintf("%v", value)
case "pid":
if v, ok := value.(int); ok {
entry.PID = v
} else if vStr, ok := value.(string); ok {
if pid, err := strconv.Atoi(vStr); err == nil {
entry.PID = pid
}
}
// Mapping auf ServiceInformation Felder (Optional, falls nötig)
// case "transfer_id": ...
default:
entry.Fields[key] = value
}
}

View file

@ -3,7 +3,7 @@ package parser
import (
"encoding/json"
"log/slog"
"tixel_watch/models"
"watch-tool/models"
)
type JSONParser struct{}

View file

@ -5,7 +5,7 @@ import (
"regexp"
"strconv"
"strings"
"tixel_watch/models"
"watch-tool/models"
)
var (

View file

@ -4,12 +4,13 @@ import (
"log/slog"
"strconv"
"strings"
"tixel_watch/helpers"
"tixel_watch/models"
"watch-tool/helpers"
"watch-tool/models"
)
type NginxTJMLogParser struct {
ToolName string
Hostname string
}
func (p *NginxTJMLogParser) Parse(line string) (models.LogMessage, error) {
@ -18,11 +19,7 @@ func (p *NginxTJMLogParser) Parse(line string) (models.LogMessage, error) {
Tool: p.ToolName,
Raw: line,
}
hostname, err := helpers.GetHostname()
if err != nil {
return entry, err
}
entry.Host = hostname
entry.Host = p.Hostname
entry = p.parseNginxTJM(entry)
return entry, nil
}

View file

@ -1,11 +1,9 @@
package parser
import (
"tixel_watch/models"
"watch-tool/models"
)
type Parser interface {
//TODO: Change parsers to return an error as well
Parse(line string) (models.LogMessage, error)
// Parse(line string) models.LogMessage
}

View file

@ -1,29 +1,23 @@
package parser
import (
"log/slog"
"regexp"
"strings"
"tixel_watch/helpers"
"tixel_watch/models"
"watch-tool/models"
)
type RegexLogParser struct {
Pattern *regexp.Regexp
Fields map[string]string
Toolname string
Hostname string
}
func (p *RegexLogParser) Parse(line string) (models.LogMessage, error) {
entry := models.LogMessage{Type: "log_entry"}
entry.Tool = p.Toolname
entry.Raw = line
hostname, err := helpers.GetHostname()
if err != nil {
slog.Warn("cannot get hostname")
return entry, err
}
entry.Host = hostname
entry.Host = p.Hostname
fields := p.parseWithPattern(line)
if fields != nil {

View file

@ -5,8 +5,8 @@ import (
"regexp"
"strings"
"time"
"tixel_watch/helpers"
"tixel_watch/models"
"watch-tool/helpers"
"watch-tool/models"
)
var (

View file

@ -4,8 +4,8 @@ import (
"log/slog"
"regexp"
"strings"
"tixel_watch/helpers"
"tixel_watch/models"
"watch-tool/helpers"
"watch-tool/models"
)
var (

View file

@ -5,8 +5,8 @@ import (
"regexp"
"strconv"
"strings"
"tixel_watch/helpers"
"tixel_watch/models"
"watch-tool/helpers"
"watch-tool/models"
)
var (