Unable to pipeline to parse JSON logs from Hashicrop Vault

Before you post: Your responses to these questions will help the community help you. Please complete this template if you’re asking a support question.
Don’t forget to select tags to help index your topic!

1. Describe your incident:
I am piping some logs from Hashicorp Vault to Graylog and for the life of me I am unable use a pipeline to extract the keys and store them as fields and their respective values.

Vault returns a nested JSON string - see example:

Ingested

vault vault[#####]: {"auth":{"accessor":"123456","client_token":"123456","display_name":"test","policies":["test"],"policy_results":{"allowed":true,"granting_policies":[{"type":""},{"name":"test","namespace_id":"test","type":"acl"}]},"token_policies":["test","test"],"token_issue_time":"2025-01-27","token_ttl":18000,"token_type":"service"},"request":{"client_id":"+123456=","client_token":"123456","client_token_accessor":"123456","id":"123456","mount_accessor":"kv","mount_class":"sec","mount_point":"kv/","mount_running_version":"v0.20","mount_type":"kv","namespace":{"id":"test"},"operation":"read","path":"kv/data/test","remote_address":"1.1.1.1","remote_port":13979},"response":{"data":{"data":{"sec1":"123456","sec2":"123456","sec3":"123456","sec4":"123456"},"metadata":{"created_time":"123456","custom_metadata":null,"deletion_time":"123456","destroyed":false,"version":7}},"mount_accessor":"kv","mount_class":"s","mount_point":"kv/","mount_running_plugin_version":"v0.20.","mount_type":"kv"},"time":"2025-01-27","type":"response"}

User Friendly

vault vault[#####
]: {
    "auth": {
        "accessor": "123456",
        "client_token": "123456",
        "display_name": "test",
        "policies": [
            "test"
        ],
        "policy_results": {
            "allowed": true,
            "granting_policies": [
                {
                    "type": ""
                },
                {
                    "name": "test",
                    "namespace_id": "test",
                    "type": "acl"
                }
            ]
        },
        "token_policies": [
            "test",
            "test"
        ],
        "token_issue_time": "2025-01-27",
        "token_ttl": 18000,
        "token_type": "service"
    },
    "request": {
        "client_id": "+123456=",
        "client_token": "123456",
        "client_token_accessor": "123456",
        "id": "123456",
        "mount_accessor": "kv",
        "mount_class": "sec",
        "mount_point": "kv/",
        "mount_running_version": "v0.20",
        "mount_type": "kv",
        "namespace": {
            "id": "test"
        },
        "operation": "read",
        "path": "kv/data/test",
        "remote_address": "1.1.1.1",
        "remote_port": 13979
    },
    "response": {
        "data": {
            "data": {
                "sec1": "123456",
                "sec2": "123456",
                "sec3": "123456",
                "sec4": "123456"
            },
            "metadata": {
                "created_time": "123456",
                "custom_metadata": null,
                "deletion_time": "123456",
                "destroyed": false,
                "version": 7
            }
        },
        "mount_accessor": "kv",
        "mount_class": "s",
        "mount_point": "kv/",
        "mount_running_plugin_version": "v0.20.",
        "mount_type": "kv"
    },
    "time": "2025-01-27",
    "type": "response"
}

This is the pipeline rules I am trying to use but to no avail:

rule "extract_json_to_fields"
when
    has_field("message") && contains(to_string($message.message), "{\"auth\":")
then
    let json_data = parse_json(to_string($message.message));

    // Extract top-level fields
    set_field("auth_accessor", json_data.auth.accessor);
    set_field("auth_client_token", json_data.auth.client_token);
    set_field("auth_display_name", json_data.auth.display_name);
    set_field("auth_policies", json_data.auth.policies);
    set_field("auth_token_ttl", json_data.auth.token_ttl);

    // Extract nested policy results
    set_field("policy_results_allowed", json_data.auth.policy_results.allowed);
    set_field("policy_results_granting_policies", json_data.auth.policy_results.granting_policies);

    // Extract request fields
    set_field("request_client_id", json_data.request.client_id);
    set_field("request_client_token", json_data.request.client_token);
    set_field("request_client_token_accessor", json_data.request.client_token_accessor);
    set_field("request_path", json_data.request.path);
    set_field("request_remote_address", json_data.request.remote_address);
    set_field("request_remote_port", json_data.request.remote_port);

    // Extract response data
    set_field("response_data_nas_p1", json_data.response.data.data.nas_p1);
    set_field("response_data_nas_un1", json_data.response.data.data.nas_un1);
    set_field("response_data_pass1", json_data.response.data.data.pass1);
    set_field("response_data_user1", json_data.response.data.data.user1);

    // Extract metadata
    set_field("response_metadata_created_time", json_data.response.data.metadata.created_time);
    set_field("response_metadata_deletion_time", json_data.response.data.metadata.deletion_time);
    set_field("response_metadata_destroyed", json_data.response.data.metadata.destroyed);
    set_field("response_metadata_version", json_data.response.data.metadata.version);

    // Add more fields as needed for specific JSON keys
end

I do have to admit that I am still fairly new to Graylog.

2. Describe your environment:

  • OS Information: Debian

  • Package Version: 6.1

The first thing you will need to do is clean it up so that its JUST JSON, it wont be liking that stuff before the first { you can use regex to get the clean json normally. Then to test use flatten_json as it will let you make sure it likes the json but with many less steps.

Thanks for replying and suggesting using regex. I did implement that>

Still a bit stuck:

rule "extract_json_to_fields
when
  has_field("message") && contains(to_string($message.message), "{\"auth\":")
then
  // removed the string before json that will be parsed 
  let cleaned_message = regex_replace("^.*?:\\s*", "", to_string($message.message));

  // Here is where I am not sure what I am doing wrong
  // this should allow me to flatten the it as json
  let json = flatten_json(value: cleaned_message, array_handler: "flatten");

  // Attempting to to set they keys as fields however when i run the simulator nothing works. 
  set_field("auth.accessor", json.auth.accessor);
  set_field("auth.client_token", json.auth.client_token);
  set_field("auth.display_name", json.auth.display_name);
  // continuing to set fields

end
  • Could you provide any recommendations on how I can set the fields.
  • Also, I am not getting errors in the code editor.

Here is an example using flatten_json, you dont need to do the fields one by one.

rule "JSON FLATTEN"
when
   true
then
   let MyJson = flatten_json(value: to_string($message.message), array_handler: "flatten", stringify: false);
   set_fields(to_map(MyJson));
end
1 Like

Sorry, I have just gotten back to you on this. Been so busy.

Thank you so much it worked exactly as I needed!!!

I created a pipeline that stage one removes the string before the JSON and the stage two follow your example.

rule "remove_vault_string"
when
  // This line is used to ensure this only runs for a vault log
  has_field("message") && contains(to_string($message.message), "vault vault[***]:")
then
  // Here we ensure to remove the leading string so only the json log is left to be parsed. 
  let new_message = replace(to_string($message.message), "vault vault[***]:", "");
  set_field("message", new_message);
end