Parsing nested json from JSON path HTTP API input

Hi dear enthusiasts,

I am using dockerised graylog 4.2 using docker-compose.
I input json data from an api using JSON path HTTP API input. This is what the json data looks like from the api:

{
  "processGroupStatus": {
    "id": "42c68e21-017d-1000-12f0-602b3f80ebdf",
    "name": "NiFi Flow",
    "statsLastRefreshed": "13:06:30 UTC",
    "aggregateSnapshot": {
      "id": "42c68e21-017d-1000-12f0-602b3f80ebdf",
      "name": "NiFi Flow",
      "connectionStatusSnapshots": [
        {
          "id": "4cc9e230-017d-1000-d35e-d45ef3bb7970",
          "connectionStatusSnapshot": {
            "id": "4cc9e230-017d-1000-d35e-d45ef3bb7970",
            "groupId": "42c68e21-017d-1000-12f0-602b3f80ebdf",
            "name": "failure",
            "sourceName": "QueryRecord",
            "destinationName": "QueryRecord",
            "flowFilesIn": 0,
            "bytesIn": 0,
            "input": "0 (0 bytes)",
            "flowFilesOut": 0,
            "bytesOut": 0,
            "output": "0 (0 bytes)",
            "flowFilesQueued": 0,
            "bytesQueued": 0,
            "queued": "0 (0 bytes)",
            "queuedSize": "0 bytes",
            "queuedCount": "0",
            "percentUseCount": 0,
            "percentUseBytes": 0
          },
          "canRead": true
        },
        {
          "id": "017d1005-d114-1cc9-a405-b613763081f4",
          "connectionStatusSnapshot": {
            "id": "017d1005-d114-1cc9-a405-b613763081f4",
            "groupId": "42c68e21-017d-1000-12f0-602b3f80ebdf",
            "name": "query",
            "sourceName": "QueryRecord",
            "destinationName": "Funnel",
            "flowFilesIn": 0,
            "bytesIn": 0,
            "input": "0 (0 bytes)",
            "flowFilesOut": 0,
            "bytesOut": 0,
            "output": "0 (0 bytes)",
            "flowFilesQueued": 0,
            "bytesQueued": 0,
            "queued": "0 (0 bytes)",
            "queuedSize": "0 bytes",
            "queuedCount": "0",
            "percentUseCount": 0,
            "percentUseBytes": 0
          },
          "canRead": true
        },
        {
          "id": "017d1007-d114-1cc9-3afb-ea68c1dd1d71",
          "connectionStatusSnapshot": {
            "id": "017d1007-d114-1cc9-3afb-ea68c1dd1d71",
            "groupId": "42c68e21-017d-1000-12f0-602b3f80ebdf",
            "name": "failure",
            "sourceName": "QueryRecord",
            "destinationName": "Funnel",
            "flowFilesIn": 0,
            "bytesIn": 0,
            "input": "0 (0 bytes)",
            "flowFilesOut": 0,
            "bytesOut": 0,
            "output": "0 (0 bytes)",
            "flowFilesQueued": 0,
            "bytesQueued": 0,
            "queued": "0 (0 bytes)",
            "queuedSize": "0 bytes",
            "queuedCount": "0",
            "percentUseCount": 0,
            "percentUseBytes": 0
          },
          "canRead": true
        },
        {
          "id": "4cc9c107-017d-1000-7e94-3d598af1d186",
          "connectionStatusSnapshot": {
            "id": "4cc9c107-017d-1000-7e94-3d598af1d186",
            "groupId": "42c68e21-017d-1000-12f0-602b3f80ebdf",
            "name": "success",
            "sourceName": "GenerateFlowFile",
            "destinationName": "QueryRecord",
            "flowFilesIn": 0,
            "bytesIn": 0,
            "input": "0 (0 bytes)",
            "flowFilesOut": 0,
            "bytesOut": 0,
            "output": "0 (0 bytes)",
            "flowFilesQueued": 0,
            "bytesQueued": 0,
            "queued": "0 (0 bytes)",
            "queuedSize": "0 bytes",
            "queuedCount": "0",
            "percentUseCount": 0,
            "percentUseBytes": 0
          },
          "canRead": true
        },
        {
          "id": "017d1009-d114-1cc9-a097-8bcb313cc29f",
          "connectionStatusSnapshot": {
            "id": "017d1009-d114-1cc9-a097-8bcb313cc29f",
            "groupId": "42c68e21-017d-1000-12f0-602b3f80ebdf",
            "name": "query",
            "sourceName": "QueryRecord",
            "destinationName": "Funnel",
            "flowFilesIn": 0,
            "bytesIn": 0,
            "input": "0 (0 bytes)",
            "flowFilesOut": 0,
            "bytesOut": 0,
            "output": "0 (0 bytes)",
            "flowFilesQueued": 0,
            "bytesQueued": 0,
            "queued": "0 (0 bytes)",
            "queuedSize": "0 bytes",
            "queuedCount": "0",
            "percentUseCount": 0,
            "percentUseBytes": 0
          },
          "canRead": true
        }
      ],
      "processorStatusSnapshots": [
        {
          "id": "4cc95d00-017d-1000-e5bc-3cf01768c2fd",
          "processorStatusSnapshot": {
            "id": "4cc95d00-017d-1000-e5bc-3cf01768c2fd",
            "groupId": "42c68e21-017d-1000-12f0-602b3f80ebdf",
            "name": "QueryRecord",
            "type": "QueryRecord",
            "runStatus": "Stopped",
            "executionNode": "ALL",
            "bytesRead": 0,
            "bytesWritten": 0,
            "read": "0 bytes",
            "written": "0 bytes",
            "flowFilesIn": 0,
            "bytesIn": 0,
            "input": "0 (0 bytes)",
            "flowFilesOut": 0,
            "bytesOut": 0,
            "output": "0 (0 bytes)",
            "taskCount": 0,
            "tasksDurationNanos": 0,
            "tasks": "0",
            "tasksDuration": "00:00:00.000",
            "activeThreadCount": 0,
            "terminatedThreadCount": 0
          },
          "canRead": true
        },
        {
          "id": "4cc9b2b1-017d-1000-a554-56b39a4e1bb6",
          "processorStatusSnapshot": {
            "id": "4cc9b2b1-017d-1000-a554-56b39a4e1bb6",
            "groupId": "42c68e21-017d-1000-12f0-602b3f80ebdf",
            "name": "GenerateFlowFile",
            "type": "GenerateFlowFile",
            "runStatus": "Stopped",
            "executionNode": "ALL",
            "bytesRead": 0,
            "bytesWritten": 0,
            "read": "0 bytes",
            "written": "0 bytes",
            "flowFilesIn": 0,
            "bytesIn": 0,
            "input": "0 (0 bytes)",
            "flowFilesOut": 0,
            "bytesOut": 0,
            "output": "0 (0 bytes)",
            "taskCount": 0,
            "tasksDurationNanos": 0,
            "tasks": "0",
            "tasksDuration": "00:00:00.000",
            "activeThreadCount": 0,
            "terminatedThreadCount": 0
          },
          "canRead": true
        },
        {
          "id": "017d1000-5d00-1cc9-d584-c33bd37f436b",
          "processorStatusSnapshot": {
            "id": "017d1000-5d00-1cc9-d584-c33bd37f436b",
            "groupId": "42c68e21-017d-1000-12f0-602b3f80ebdf",
            "name": "QueryRecord",
            "type": "QueryRecord",
            "runStatus": "Stopped",
            "executionNode": "ALL",
            "bytesRead": 0,
            "bytesWritten": 0,
            "read": "0 bytes",
            "written": "0 bytes",
            "flowFilesIn": 0,
            "bytesIn": 0,
            "input": "0 (0 bytes)",
            "flowFilesOut": 0,
            "bytesOut": 0,
            "output": "0 (0 bytes)",
            "taskCount": 0,
            "tasksDurationNanos": 0,
            "tasks": "0",
            "tasksDuration": "00:00:00.000",
            "activeThreadCount": 0,
            "terminatedThreadCount": 0
          },
          "canRead": true
        }
      ],
      "processGroupStatusSnapshots": [],
      "remoteProcessGroupStatusSnapshots": [],
      "inputPortStatusSnapshots": [],
      "outputPortStatusSnapshots": [],
      "flowFilesIn": 0,
      "bytesIn": 0,
      "input": "0 (0 bytes)",
      "flowFilesQueued": 0,
      "bytesQueued": 0,
      "queued": "0 (0 bytes)",
      "queuedCount": "0",
      "queuedSize": "0 bytes",
      "bytesRead": 0,
      "read": "0 bytes",
      "bytesWritten": 0,
      "written": "0 bytes",
      "flowFilesOut": 0,
      "bytesOut": 0,
      "output": "0 (0 bytes)",
      "flowFilesTransferred": 0,
      "bytesTransferred": 0,
      "transferred": "0 (0 bytes)",
      "bytesReceived": 0,
      "flowFilesReceived": 0,
      "received": "0 (0 bytes)",
      "bytesSent": 0,
      "flowFilesSent": 0,
      "sent": "0 (0 bytes)",
      "activeThreadCount": 0,
      "terminatedThreadCount": 0
    }
  },
  "canRead": true
}

However, on graylog, it ends up looking like this after using a json extractor:

As you can see, the non nested fields get parsed, but for some reason the nested fields get their colons converted to equal signs.

I have already tried a number of different ways to address this using regex replace extractor, but it is not smart and I had to individually make an extractor for each nested section, which is not ideal and not very smart of me.

I was wondering if anyone has any good methods using either extractors or pipelines to parse all the json message including the nested bits?

Thanks you

Hello @jimbo && Welcome

I’m looking into this but I have a couple questions.
When you stated this…

But then you stated this…

I’m assuming on your JSON path HTTP API input you create a JSON extractor then you had to create other extractors to parse your field called “processGroupStatus_aggregateSnapshot_connectionStatusSnapshots”

How did you configure that JSON extractor? that’s one long field. To be honest that looks like two fields put together.

Some information I dug up , perhaps it will help.

Hi there,

Thank you @gsmith .

That is correct. I did create a JSON extractor with the following config:

However, the resultant parsed data which I showed you with equal signs in the nested bits, were only a result of using the JSON extractor, Not any other extractors.
That nested long field you are referring to is long and it is an array of JSON objects.

Thank you for your help. I will have a look at those links. They seem rather helpful. :smiley: