Json nested and lsit

Hello,

I try to format my json log, but so far without success.
My goal is to be able to parse the nested field like :

httpRequest, ruleListGroup

My current setup is quite easy, I defined a JSON extractor for an raw-tcp input.
And then I tried to create a pipeline with a rule.

Any help to understand what I do wrong is appreciated .

Hi, flep

please check syntax
In the rule, use the “parse_json” function to parse the nested fields. Here’s an example rule:


rule "Parse Nested Fields"
when
    has_field("message")
then
    let json_message = parse_json(to_string($message.message));
    let http_request = json_message.httpRequest;
    let rule_list_group = json_message.ruleListGroup;

    set_field("httpRequest", http_request);
    set_field("ruleListGroup", rule_list_group);
end

This pipeline rule attempts to parse the JSON in the message field and extract the httpRequest and ruleListGroup values into separate fields. Now you need to adjust the rule as needed based on your actual JSON structure.

Remember to check the Graylog server and processing pipeline simulator logs for any errors or warnings if things aren’t working as expected.

1 Like

Hello bahram,

Thank you for your reply.
I tried multiple times with different codes, and the simulator showed me the same.
The server.log returns no errors too (I’m using docker) but it does if I make a mistake.

I appreciate your first help :slight_smile:

The sample I used to test the simulator:

{
    "timestamp": 1698624183281,
    "formatVersion": 1,
    "webaclId": "arn:aws:xxxxxxxxxx77a",
    "terminatingRuleId": "Default_Action",
    "terminatingRuleType": "REGULAR",
    "action": "ALLOW",
    "terminatingRuleMatchDetails": [],
    "httpSourceName": "ALB",
    "httpSourceId": "xxxxxxxxxx",
    "ruleGroupList": [
        {
            "ruleGroupId": "xxxxxxx",
            "terminatingRule": null,
            "nonTerminatingMatchingRules": [],
            "excludedRules": null,
            "customerConfig": null
        },
        {
            "ruleGroupId": "xxxxxxxx",
            "terminatingRule": null,
            "nonTerminatingMatchingRules": [],
            "excludedRules": null,
            "customerConfig": null
        }
    ],
    "rateBasedRuleList": [
        {
            "rateBasedRuleId": "xxxxxx",
            "rateBasedRuleName": "xxxxxx",
            "limitKey": "IP",
            "maxRateAllowed": 998,
            "limitValue": "xxxxxx"
        }
    ],
    "nonTerminatingMatchingRules": [],
    "requestHeadersInserted": null,
    "responseCodeSent": null,
    "httpRequest": {
        "clientIp": "xxxxx",
        "country": "US",
        "headers": [
            {
                "name": "content-length",
                "value": "1503"
            },
            {
                "name": "sec-ch-ua",
                "value": "\"Chromium\";v=\"118\", \"Google Chrome\";v=\"118\", \"Not=A?Brand\";v=\"99\""
            },
            {
                "name": "sec-ch-ua-platform",
                "value": "\"Windows\""
            },
            {
                "name": "sec-ch-ua-mobile",
                "value": "?0"
            },
            {
                "name": "user-agent",
                "value": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36"
            },
            {
                "name": "content-type",
                "value": "application/x-www-form-urlencoded"
            },
            {
                "name": "accept",
                "value": "*/*"
            },
            {
                "name": "sec-fetch-site",
                "value": "cross-site"
            },
            {
                "name": "sec-fetch-mode",
                "value": "cors"
            },
            {
                "name": "sec-fetch-dest",
                "value": "empty"
            },
            {
                "name": "accept-encoding",
                "value": "gzip, deflate, br"
            },
            {
                "name": "accept-language",
                "value": "en-US,en;q=0.9"
            }
        ],
        "uri": "/track",
        "args": "",
        "httpVersion": "HTTP/2.0",
        "httpMethod": "POST",
        "requestId": "1-653ef2b7-xxxxx"
    },
    "requestBodySize": 1503,
    "requestBodySizeInspectedByWAF": 1503,
    "ja3Fingerprint": "xxxxxx"
}
1 Like

Hello,

It has been some months since I posted the first message.
But I’m still facing an issue.

I observed that when the to_string(json) is applied it breaks the json.

This post explain exactly what I what to do: JsonList parser - #10 by gianluca-valentini
Unfortunately I don’t have the knowledge to reproduce it.

In addition

rule "Parse Nested Fields"
when
    has_field("rateBasedRuleList")
then
    let sJson = to_string($message.httpRequest);
    let sJson = regex_replace(
        pattern: "^\\[|\\]$",
        value: sJson,
        replacement: ""
        );
    let rsJson = flatten_json(to_string(sJson), "flatten");
    set_fields(to_map(rsJson));
    remove_field("httpRequest");
    set_field("message", "parsed user data");
end
 // return nothing 
 let rsJson = flatten_json(to_string(sJson), "flatten");

Try using select_jsonpath(). Seems easier than regex.
E.g. the JSONpath to access ruleGroupId in your example is $.ruleGroupList[1].ruleGroupId.

Here’s a good online checker for JSONPath: https://jsonpath.com/

Does it implies that I need to define all fields ? and select then one by one ?

I’m using a json extractor also, should I disable it and process the json by myself ?

This is what I’m trying to parse :slight_smile:

In general, we recommend using pipeline over extractors - it is more versatile.
If you only care about a small subset of data, it would be more efficient to just target those than to extract all of the fields.

Thank you for the advise.

I solved my issue using logstash and preprocessing the nested keys before with a ruby filter.