Hey @ralfbergs,
This comes down to how the message field is analysed and tokenised within Opensearch, usefully Opensearch has an api endpoint where messages can be tested and below is an example of that. Essentially the message is broken down into individual strings, take this particular part as an example helo=<``ip-2.sib.doctolib.de. It is broken down to [“helo”, “ip”, “2”, “sib.doctolib.de”], take note that special characters are dropped and can not be used to search.
Had you searched johannis AND "sib.doctolib.de" then results would have returned as sib.doctolib.de is mapped as a token. I can see why this would not be obvious to anyone unfamiliar with what occurs when data is written to Opensearch.
Another approach would have been message:/.*doctolib.*/, a wildcard either side.
curl -X POST /_analyze?pretty -H 'Content-Type: application/json' -d '{"analyzer": "standard", "text": "Mar 4 09:52:50 cd617978469f postfix/smtpd[81057]: NOQUEUE: reject: RCPT from ``ip-2.sib.doctolib.de``[172.246.25.144]: 550 5.1.1 <johannis@example.com>: Recipient address rejected: User unknown in virtual mailbox table; from=<bounces-145167670-689042990@ip-2.sib.doctolib.de> to=<johannis@example.com> proto=ESMTP helo=<``ip-2.sib.doctolib.de``>"}'
{
"tokens" : [
{
"token" : "mar",
"start_offset" : 0,
"end_offset" : 3,
"type" : "<ALPHANUM>",
"position" : 0
},
{
"token" : "4",
"start_offset" : 5,
"end_offset" : 6,
"type" : "<NUM>",
"position" : 1
},
{
"token" : "09",
"start_offset" : 7,
"end_offset" : 9,
"type" : "<NUM>",
"position" : 2
},
{
"token" : "52",
"start_offset" : 10,
"end_offset" : 12,
"type" : "<NUM>",
"position" : 3
},
{
"token" : "50",
"start_offset" : 13,
"end_offset" : 15,
"type" : "<NUM>",
"position" : 4
},
{
"token" : "cd617978469f",
"start_offset" : 16,
"end_offset" : 28,
"type" : "<ALPHANUM>",
"position" : 5
},
{
"token" : "postfix",
"start_offset" : 29,
"end_offset" : 36,
"type" : "<ALPHANUM>",
"position" : 6
},
{
"token" : "smtpd",
"start_offset" : 37,
"end_offset" : 42,
"type" : "<ALPHANUM>",
"position" : 7
},
{
"token" : "81057",
"start_offset" : 43,
"end_offset" : 48,
"type" : "<NUM>",
"position" : 8
},
{
"token" : "noqueue",
"start_offset" : 51,
"end_offset" : 58,
"type" : "<ALPHANUM>",
"position" : 9
},
{
"token" : "reject",
"start_offset" : 60,
"end_offset" : 66,
"type" : "<ALPHANUM>",
"position" : 10
},
{
"token" : "rcpt",
"start_offset" : 68,
"end_offset" : 72,
"type" : "<ALPHANUM>",
"position" : 11
},
{
"token" : "from",
"start_offset" : 73,
"end_offset" : 77,
"type" : "<ALPHANUM>",
"position" : 12
},
{
"token" : "ip",
"start_offset" : 80,
"end_offset" : 82,
"type" : "<ALPHANUM>",
"position" : 13
},
{
"token" : "2",
"start_offset" : 83,
"end_offset" : 84,
"type" : "<NUM>",
"position" : 14
},
{
"token" : "sib.doctolib.de",
"start_offset" : 85,
"end_offset" : 100,
"type" : "<ALPHANUM>",
"position" : 15
},
{
"token" : "172.246.25.144",
"start_offset" : 103,
"end_offset" : 117,
"type" : "<NUM>",
"position" : 16
},
{
"token" : "550",
"start_offset" : 120,
"end_offset" : 123,
"type" : "<NUM>",
"position" : 17
},
{
"token" : "5.1.1",
"start_offset" : 124,
"end_offset" : 129,
"type" : "<NUM>",
"position" : 18
},
{
"token" : "johannis",
"start_offset" : 131,
"end_offset" : 139,
"type" : "<ALPHANUM>",
"position" : 19
},
{
"token" : "example.com",
"start_offset" : 140,
"end_offset" : 151,
"type" : "<ALPHANUM>",
"position" : 20
},
{
"token" : "recipient",
"start_offset" : 154,
"end_offset" : 163,
"type" : "<ALPHANUM>",
"position" : 21
},
{
"token" : "address",
"start_offset" : 164,
"end_offset" : 171,
"type" : "<ALPHANUM>",
"position" : 22
},
{
"token" : "rejected",
"start_offset" : 172,
"end_offset" : 180,
"type" : "<ALPHANUM>",
"position" : 23
},
{
"token" : "user",
"start_offset" : 182,
"end_offset" : 186,
"type" : "<ALPHANUM>",
"position" : 24
},
{
"token" : "unknown",
"start_offset" : 187,
"end_offset" : 194,
"type" : "<ALPHANUM>",
"position" : 25
},
{
"token" : "in",
"start_offset" : 195,
"end_offset" : 197,
"type" : "<ALPHANUM>",
"position" : 26
},
{
"token" : "virtual",
"start_offset" : 198,
"end_offset" : 205,
"type" : "<ALPHANUM>",
"position" : 27
},
{
"token" : "mailbox",
"start_offset" : 206,
"end_offset" : 213,
"type" : "<ALPHANUM>",
"position" : 28
},
{
"token" : "table",
"start_offset" : 214,
"end_offset" : 219,
"type" : "<ALPHANUM>",
"position" : 29
},
{
"token" : "from",
"start_offset" : 221,
"end_offset" : 225,
"type" : "<ALPHANUM>",
"position" : 30
},
{
"token" : "bounces",
"start_offset" : 227,
"end_offset" : 234,
"type" : "<ALPHANUM>",
"position" : 31
},
{
"token" : "145167670",
"start_offset" : 235,
"end_offset" : 244,
"type" : "<NUM>",
"position" : 32
},
{
"token" : "689042990",
"start_offset" : 245,
"end_offset" : 254,
"type" : "<NUM>",
"position" : 33
},
{
"token" : "ip",
"start_offset" : 255,
"end_offset" : 257,
"type" : "<ALPHANUM>",
"position" : 34
},
{
"token" : "2",
"start_offset" : 258,
"end_offset" : 259,
"type" : "<NUM>",
"position" : 35
},
{
"token" : "sib.doctolib.de",
"start_offset" : 260,
"end_offset" : 275,
"type" : "<ALPHANUM>",
"position" : 36
},
{
"token" : "to",
"start_offset" : 277,
"end_offset" : 279,
"type" : "<ALPHANUM>",
"position" : 37
},
{
"token" : "johannis",
"start_offset" : 281,
"end_offset" : 289,
"type" : "<ALPHANUM>",
"position" : 38
},
{
"token" : "example.com",
"start_offset" : 290,
"end_offset" : 301,
"type" : "<ALPHANUM>",
"position" : 39
},
{
"token" : "proto",
"start_offset" : 303,
"end_offset" : 308,
"type" : "<ALPHANUM>",
"position" : 40
},
{
"token" : "esmtp",
"start_offset" : 309,
"end_offset" : 314,
"type" : "<ALPHANUM>",
"position" : 41
},
{
"token" : "helo",
"start_offset" : 315,
"end_offset" : 319,
"type" : "<ALPHANUM>",
"position" : 42
},
{
"token" : "ip",
"start_offset" : 323,
"end_offset" : 325,
"type" : "<ALPHANUM>",
"position" : 43
},
{
"token" : "2",
"start_offset" : 326,
"end_offset" : 327,
"type" : "<NUM>",
"position" : 44
},
{
"token" : "sib.doctolib.de",
"start_offset" : 328,
"end_offset" : 343,
"type" : "<ALPHANUM>",
"position" : 45
}
]
}