Extract nested json issue

1. Describe your incident:
I’m feeding DUO security logs into Graylog using Elastic Agent. I’m running an initial json extraction using the code I found from @tmacgbay. But when I try to run a second json extraction further down for the nested fields it’s not working.

Data 1:

{"access_device":{"epkey":"57KUNSYFRI2UYNRFW3","hostname":null,"ip":"10.10.10.10","location":{"city":"Atlanta","country":"United States","state":"Georgia"}},"alias":"","application":{"key":"57HD233375NY29","name":"Google Workspace - Admin and Staff"},"auth_device":{"ip":"11.11.11.11","key":"DPW35H33X23752397HN","location":{"city":"Atlanta","country":"United States","state":"Georgia"},"name":"000-000-0001"},"email":"user@company.com","event_type":"authentication","factor":"verified_duo_push","isotimestamp":"2024-02-01T16:08:57.496224+00:00","ood_software":null,"reason":"verification_code_correct","result":"success","timestamp":1706803737,"trusted_endpoint_status":"unknown","txid":"8ac223a3-035e-4033-a333-9337e33dc339","user":{"groups":["duo_it (from AD sync \"AD Sync Duo1\")","duo_pilot (from AD sync \"AD Sync Duo1\")"],"key":"DUOT63333HY9ZA233","name":"uname"}}

Pipeline Rule 1:

rule "type duo-json-parser" 
when 
   has_field("filebeat_input_type") AND
   (to_string($message.filebeat_input_type)) == "httpjson"
then   

    let the_json = parse_json(to_string($message.message));
    //debug(concat("The json: ", to_string(the_json)));
    
    let the_map = to_map(the_json);
    //debug(concat("The map: ", to_string(the_map)));

   set_fields(the_map);   
end

This parses out everything except the nested data.

In a pipeline further down I tried similar code to break out nested json in message fields access_device, application, and auth_device.

Fields:

access_device
    {"hostname":null,"epkey":"57KUNSYFRI2UYNRFW3","ip":"0.10.10.10","location":{"country":"United States","city":"Atlanta","state":"Georgia"}}
application
    {"name":"Google Workspace - Admin and Staff","key":"57HD233375NY29"}
auth_device
    {"ip":"11.11.11.11","name":"000-000-0001","location":{"country":"United States","city":"Atlanta","state":"Georgia"},"key":"DPW35H33X23752397HN"}

rule "type duo-json-parser-access_device" 
when 
   has_field("access_device")
then   
    let the_json = parse_json(to_string($message.access_device));
    debug(concat("The json: ", to_string(the_json)));
    
    let the_map = to_map(the_json);
    debug(concat("The map: ", to_string(the_map)));

   set_fields(the_map);   
end

But it doesn’t parse the data.
I checked the debug logs and it’s showing:

2024-02-01T11:15:15.482-05:00 INFO [Function] PIPELINE DEBUG: The json:
2024-02-01T11:15:15.483-05:00 INFO [Function] PIPELINE DEBUG: The map:

I confirmed it’s hitting the rule with a set_field below the set_fields
set_field(“xduo_test”, “002”);

2. Describe your environment:
Graylog 5.2.3
CentOS

Any ideas on how to accomplish this? Or why it’s not working?

Interestingly when I try to Add to query from the logs access_device, application, or auth_device it add this to the query: access_device:“[object Object]”

And when I look in the fields it doesn’t show access_device, etc…

So graylog doesn’t like nested json and it needs to be flat.

Code from @jivepig found here appears to have fixed the issue:

rule "Random User Data Flatten Json Rule"
// From sample data : https://randomuser.me/api/
// Api input path: *
when
    true
then
    let sJson = to_string($message.message);
    let sJson = regex_replace(
        pattern: "^\\[|\\]$",
        value: sJson,
        replacement: ""
        );
    let rsJson = flatten_json(to_string(sJson), "flatten");
    set_fields(to_map(rsJson));
    //remove_field("result");
    //set_field("message", "parsed user data");
end
2 Likes

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.