Description of your problem
I am currently trying to rewrite a grok-based extraction configuration I have in Logstash into Graylog.
The original configuration was:
filter {
# Extract event time, log severity level, source of attack (client), and the alert message.
grok {
match => { "message" => "^(?<event_time>%{YEAR}/%{MONTHNUM2}/%{MONTHDAY}\s%{TIME})\s\[%{LOGLEVEL:log_level}\]\s%{NUMBER}\#%{NUMBER}\:\s\*%{NUMBER}\sModSecurity\:\s(?<alert_message>.*?),\sclient\:\s%{IPORHOST:src_ip},\sserver\:\s(?<server>[^,]+),\srequest\:\s\"%{WORD:method}\s%{URIPATHPARAM:request}\s(?<http_version>[a-zA-Z0-9/.]+)\"" }
}
# Extract Rules File from Alert Message
grok {
match => { "alert_message" => "(?<rulesfile>\[file \"(/.+.conf)\"\])" }
}
grok {
match => { "rulesfile" => "(?<rules_file>/.+.conf)" }
}
# Extract Attack Type from Rules File
grok {
match => { "rulesfile" => "(?<attack_type>[A-Z]+-[A-Z][^.]+)" }
}
# Extract Rule ID from Alert Message
grok {
match => { "alert_message" => "(?<ruleid>\[id \"(\d+)\"\])" }
}
grok {
match => { "ruleid" => "(?<rule_id>\d+)" }
}
# Extract Attack Message (msg) from Alert Message
grok {
match => { "alert_message" => "(?<msg>\[msg \S(.*?)\"\])" }
}
grok {
match => { "msg" => "(?<alert_msg>\"(.*?)\")" }
}
# Extract the User/Scanner Agent from Alert Message
grok {
match => { "alert_message" => "(?<scanner>User-Agent' \SValue: `(.*?)')" }
}
grok {
match => { "scanner" => "(?<user_agent>:(.*?)\')" }
}
grok {
match => { "alert_message" => "(?<matched_data>(Matched Data:+.+))\"\]\s\[severity" }
}
grok {
match => { "alert_message" => "(?<agent>User-Agent: (.*?)\')" }
}
grok {
match => { "agent" => "(?<user_agent>: (.*?)\')" }
}
# Extract the Target Host
grok {
match => { "alert_message" => "(hostname \"%{IPORHOST:dst_host})" }
}
# Extract the Request URI
grok {
match => { "alert_message" => "(uri \"%{URIPATH:request_uri})" }
}
grok {
match => { "alert_message" => "(?<ref>referer: (.*))" }
}
grok {
match => { "ref" => "(?<referer> (.*))" }
}
}
At first, I set out to do this via the grok pattern extractors, however, there seems to be no way to “chain” those - If I have an extractor that adds new fields to the message object, can a second extractor work with the fields added by the first, or do I have to extract everything in the first extractor?
Finding no clear answer, I tried approaching the issue via pipelines instead, as those seem to offer more diverse configuration variability. There, too, however, I hit a rock wall, with the following configuration:
rule "Extract modsecurity alert fields"
when
has_field("message")
then
let message_field = to_string($message.message);
let parsed_fields = grok("^(?<event_time>%{YEAR}/%{MONTHNUM2}/%{MONTHDAY}%{SPACE}%{TIME})%{SPACE}[%{LOGLEVEL:log_level}]%{SPACE}%{NUMBER}#%{NUMBER}:%{SPACE}*%{NUMBER}%{SPACE}ModSecurity:%{SPACE}%{DATA:alert_message},%{SPACE}client:%{SPACE}%{IPORHOST:src_ip},%{SPACE}server:%{SPACE}%{DATA:server},%{SPACE}request:%{SPACE}\"%{WORD:method}%{SPACE}%{URIPATHPARAM:request}%{SPACE}%{DATA:http_version}\"", message_field);
debug("Parsed fields: "+to_string(parsed_fields));
let rulesf = grok("[file \"%{DATA:rulesfile}\"]", to_string(parsed_fields.alert_message), true);
debug("Rulesf: "+to_string(rulesf));
set_field("rulesfile", rulesf);
end
The debug statements yield empty objects, suggesting that either my grok patterns are wrong, or I am understanding something wrong. Using the Simulator part of Graylog, I was able to get to a point it showed me the pipeline would add fields of “Object” – No further detail. If I tried to cast those to string, or select just a single field out of them (E.g.: set_field("Rulesfile", rulesf.rulesfile)
– Taken the example provided) no fields would be added at all.
Is there any other, more appropriate, method of achieving what I need?
Example input message:
2021/09/14 15:13:45 [info] 28140#28140: *25948 ModSecurity: Warning. Matched "Operator `PmFromFile' with parameter `lfi-os-files.data' against variable `ARGS:path' (Value: `../../../../../../../../../../etc/passwd' ) [file "/usr/share/modsecurity-crs/rules/REQUEST-930-APPLICATION-ATTACK-LFI.conf"] [line "78"] [id "930120"] [rev ""] [msg "OS File Access Attempt"] [data "Matched Data: etc/passwd found within ARGS:path: ../../../../../../../../../../etc/passwd"] [severity "2"] [ver "OWASP_CRS/3.1.0"] [maturity "0"] [accuracy "0"] [tag "application-multi"] [tag "language-multi"] [tag "platform-multi"] [tag "attack-lfi"] [tag "OWASP_CRS/WEB_ATTACK/FILE_INJECTION"] [tag "WASCTC/WASC-33"] [tag "OWASP_TOP_10/A4"] [tag "PCI/6.5.4"] [hostname "123.23.54.97"] [uri "/index.php"] [unique_id "113168532531.263690"] [ref "o30,10v21,40t:utf8toUnicode,t:urlDecodeUni,t:normalizePathWin,t:lowercase"], client: 123.23.54.97, server: web.example.org, request: "HEAD /index.php?path=../../../../../../../../../../etc/passwd HTTP/1.1", host: "web.example.org"
Operating system information
Debian 10 Buster
Package versions
- Graylog: 4.0.9-1
- MongoDB: 4.4.7
- Elasticsearch: 7.13.3