Ok I have worked out a solution if it helps anyone down the track.
I traced the queries being made by the GUI (mitmproxy) and used some help from this link to figure out how to deal with the id field (listed as optional, but required as it turns out): Graylog 3.3.9 Search API
The code we were using for Graylog 3.1 is as follows. The endpoint used here has been dropped:
def get_unique_logs(field, query):
"""Returns unique values of field for the query. Also returns number of times each value is logged."""
import requests
params = {
"query": query,
"keyword": "last 30 days",
"filter": f"streams:{spark_streamid}",
"field": field,
"batch_size": 10000
}
http_request = f"{graylog_url}/api/search/universal/keyword/terms"
headers = {"Accept":"application/json"}
auth = ( graylog_token, "token")
r = requests.get(http_request, params=params, auth=auth, headers=headers)
if r.status_code == 200:
return(r.json()["terms"])
return None
To work with Graylog 4 and the views/* endpoints, it changed to:
def get_unique_logs_401(field, query):
"""Returns unique values of field for the query. Also returns number of times each value is logged."""
import requests
import json
headers = {"Content-Type":"application/json", "X-Requested-By": "jupyter"}
auth = ( graylog_token, "token")
payload = {
"queries": [
{
"id": "?",
"timerange": {
"type": "keyword",
"keyword": "last 30 days"
},
"query": {
"type": "elasticsearch",
"query_string": query
},
"search_types": [
{
"id": "?",
"column_groups": [],
"filter": None,
"name": "chart",
"query": None,
"rollup": True,
"row_groups": [
{
"field": field,
"limit": 15,
"type": "values"
}
],
"series": [
{
"field": None,
"id": "count()",
"type": "count"
}
],
"sort": [],
"streams": [],
"timerange": None,
"type": "pivot"
}
]
}
]
}
http_request = f"{graylog_url}/api/views/search/sync"
r = requests.post(http_request, data=json.dumps(payload), auth=auth, headers=headers)
if r.status_code == 200:
return(r.json())
return None