feat: implement new generic parser and improve production readyness

This commit is contained in:
Patryk Hegenberg 2026-01-18 12:37:57 +01:00
parent 8364218234
commit 0830b403e0
34 changed files with 1715 additions and 2114 deletions

164
configs/patterns.yml Normal file
View file

@ -0,0 +1,164 @@
patterns:
# ===========================================================================
# Common / Shared Patterns
# ===========================================================================
common:
extractors:
- name: "syslog_header"
regex: '^(\w{3} \d{2} \d{2}:\d{2}:\d{2}) (?P<hostname>[^\s]+) (?P<process_info>[^:]+):\s*(?P<message_rest>.*)$'
fields:
syslog_timestamp: "time:Jan 02 15:04:05"
hostname: "string"
process_info: "string"
message_rest: "string"
- name: "timestamp_rfc3339"
regex: '(?P<timestamp>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?Z?)'
fields:
timestamp: "time:2006-01-02T15:04:05.000000Z"
# ===========================================================================
# TIXstream Service
# Deckt ab: tsServicePattern, tsTransferIDPattern, tsDetailPattern1-4
# ===========================================================================
tixstream:
extractors:
- name: "service_log_base"
regex: '^(?P<log_level>\S+)\s+(?P<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{6})\s+(?P<message>.*)'
fields:
log_level: "string"
timestamp: "time:2006-01-02 15:04:05.000000"
message: "string"
- name: "transfer_id_extraction"
regex: '^(?P<transfer_id>\w{8}-\w{4}-\w{4}-\w{4}-\w{12})\s+(?P<message>.*)'
fields:
transfer_id: "string"
message: "string"
- name: "transfer_start_in"
regex: 'in: Transfer start (?P<thread_info>\d+/\d+) buffers=(?P<buffers>\d+) files=(?P<file_count>\d+) size=(?P<size_mb>[0-9.]+) MByte chunksize=(?P<chunk_size>\d+) streams=(?P<streams>\d+) target-datarate=(?P<target_rate>[0-9.]+) MByte/s protocol=(?P<protocol>\w+) dest=(?P<destination>\S+) sender-id=(?P<sender_id>\S+)'
fields:
thread_info: "string" # z.B. "1/4" - Typisierung hier schwierig, also String
buffers: "int"
file_count: "int"
size_mb: "float"
chunk_size: "int"
streams: "int"
target_rate: "float"
protocol: "string"
destination: "string"
sender_id: "string"
direction: "string" # Wir können statische Felder im Parser injecten oder hier als "implizit" betrachten
- name: "transfer_start_remote_out"
regex: 'out: Start remote transfer to (?P<target>[^\s]+) request executed, duration=(?P<duration>[0-9.]+) s'
fields:
target: "string"
duration: "float"
- name: "transfer_start_out"
regex: 'out: Transfer start (?P<thread_info>\d+/\d+) buffers=(?P<buffers>\d+) files=(?P<file_count>\d+) size=(?P<size_mb>[0-9.]+) MByte chunksize=(?P<chunk_size>\d+) streams=(?P<streams>\d+) target-datarate=(?P<target_rate>[0-9.]+) MByte/s protocol=(?P<protocol>\w+) src=(?P<source>\S+) receiver=(?P<receiver>\S+)'
fields:
thread_info: "string"
buffers: "int"
file_count: "int"
size_mb: "float"
chunk_size: "int"
streams: "int"
target_rate: "float"
protocol: "string"
source: "string"
receiver: "string"
- name: "transfer_start_generic"
regex: 'out: Start transfer (?P<thread_info>\d+/\d+), src=(?P<source>[^ ]*) dest=(?P<destination>[^ ]*) item\[0\]=(?P<item0>[^ ]*) count=(?P<count>\d+)'
fields:
thread_info: "string"
source: "string"
destination: "string"
item0: "string"
count: "int"
# ===========================================================================
# Transfer Job Manager (TJM)
# Deckt ab: tjmServicePattern, tjmTransferNamePattern, tjmTransferIDPattern1/2
# ===========================================================================
transfer-job-manager:
extractors:
- name: "service_log_base"
regex: '^(?P<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3})\s+(?P<log_level>\S+)\s+(?P<pid>\d+).*?\[(?P<correlation_id>[^\]]*)\]\s+\[(?P<username>[^\]]*)\]\s+\[(?P<thread_id>[^\]]*)\]\s+(?P<java_class>.*?)\s+:\s+(?P<message>.*)'
fields:
timestamp: "time:2006-01-02 15:04:05.000"
log_level: "string"
pid: "int"
correlation_id: "string"
username: "string"
thread_id: "string"
java_class: "string"
message: "string"
- name: "transfer_name_info"
regex: '^(?P<transfer_name_raw>\d{8}T\d{6}-[A-Za-z0-9]+-.+?-(?:in|out)) ?: (?P<message>.*)$'
fields:
transfer_name_raw: "string"
message: "string"
- name: "transfer_id_mid"
regex: '(?P<transfer_id>\w{8}-\w{4}-\w{4}-\w{4}-\w{12}).*?(?P<message>.*)'
fields:
transfer_id: "string"
message: "string"
- name: "transfer_id_prefixed"
regex: '(?P<prefix>.*)(?P<transfer_id>\w{8}-\w{4}-\w{4}-\w{4}-\w{12}).*?(?P<message>.*)'
fields:
prefix: "string"
transfer_id: "string"
message: "string"
# ===========================================================================
# Access Manager & TCC
# Deckt ab: amServicePattern, tccServicePattern
# ===========================================================================
access-manager:
extractors:
- name: "spring_boot_log"
regex: '^(?P<timestamp>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?Z)\s+(?P<log_level>\w+)\s+(?P<pid>\d+)\s+---\s+\[\s*(?P<thread_id>[^\]]*)\]\s+(?P<logger>[\w\.]+)\s*:\s+(?P<message>.*)$'
fields:
timestamp: "time:2006-01-02T15:04:05.000000Z"
log_level: "string"
pid: "int"
thread_id: "string"
logger: "string"
message: "string"
tixel-control-center:
extractors:
- name: "spring_boot_log"
regex: '^(?P<timestamp>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?Z)\s+(?P<log_level>\w+)\s+(?P<pid>\d+)\s+---\s+\[\s*(?P<thread_id>[^\]]*)\]\s+(?P<logger>[\w\.]+)\s*:\s+(?P<message>.*)$'
fields:
timestamp: "time:2006-01-02T15:04:05.000000Z"
log_level: "string"
pid: "int"
thread_id: "string"
logger: "string"
message: "string"
# ===========================================================================
# Nginx
# Deckt ab: nginxAccessPattern
# ===========================================================================
nginx:
extractors:
- name: "access_log"
regex: '^(?P<client_ip>\S+)\s+\S+\s+(?P<remote_user>\S+)\s+\[(?P<timestamp_nginx>[^\]]+)\]\s+"(?P<request>[^"]+)"\s+(?P<status_code>\d+)\s+(?P<bytes_sent>\d+|-)\s*(?:"(?P<referer>[^"]*)"\s+"(?P<user_agent>[^"]*)")?'
fields:
client_ip: "string"
remote_user: "string"
timestamp_nginx: "string"
request: "string"
status_code: "int"
bytes_sent: "int"
referer: "string"
user_agent: "string"