Hello everyone!
I would like to share a communication problem on nodes in graylog using docker swarm.
Contextualizing:
I’m currently testing creating the graylog environment on 5 servers to have the load distributed, and I’m using swarm to make the environment easier to scale.
Swarm Servers:
root@graylog-cluster-0001:~# docker node ls
ID HOSTNAME STATUS AVAILABILITY MANAGER STATUS ENGINE VERSION
kmuryq6mo33m9nsknm8m2i0h1 * graylog-cluster-0001 Ready Active Leader 18.09.5
etpae23nhf68c11cga9rdbxnj graylog-cluster-0002 Ready Active 18.09.5
q57rve8vignpab338o9dxudpf graylog-cluster-0003 Ready Active 18.09.5
n6o7wv5ibt2qq53u5p9dnlxrk graylog-cluster-0004 Ready Active 18.09.5
ut927bkotqjntjey8uewlutfj graylog-cluster-0005 Ready Active 18.09.5
I am using the following code stack to raise the environment:
root@graylog-cluster-0001:~/docker-elk# cat docker-stack.yml
version: '3.3'
services:
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch-oss:6.6.1
ports:
- "9200:9200"
- "9300:9300"
configs:
- source: elastic_config
target: /usr/share/elasticsearch/config/elasticsearch.yml
environment:
ES_JAVA_OPTS: "-Xmx2g -Xms2g"
volumes:
- /nfs/elastic:/usr/share/elasticsearch/data
networks:
- elk
deploy:
mode: replicated
replicas: 1
mongo:
image: mongo:3
volumes:
- /nfs/mongo:/data/db
networks:
- elk
deploy:
mode: replicated
replicas: 1
graylogfwbv:
image: graylog/graylog:3.0.1
environment:
# CHANGE ME (must be at least 16 characters)!
- GRAYLOG_PASSWORD_SECRET=somepasswordpeppersdfdsfsfsadf
# Password: admin
#- GRAYLOG_ROOT_PASSWORD_SHA2=8c6976e5b5410415bde908bd4dee15dfb167a9c873fc4bb8a81f6f2ab448a918
- GRAYLOG_ROOT_PASSWORD_SHA2=8d969eef6ecad3c29a3a629280e686cf0c3f5d5a86aff3ca12020c923adc6c92
- GRAYLOG_IS_MASTER=false
depends_on:
- mongo
- elasticsearch
- graylogmaster
ports:
- 30010:30000/udp
networks:
- elk
deploy:
mode: replicated
replicas: 1
graylogfortigates:
image: graylog/graylog:3.0.1
environment:
# CHANGE ME (must be at least 16 characters)!
- GRAYLOG_PASSWORD_SECRET=somepasswordpeppersdfdsfsfsadf
# Password: admin
#- GRAYLOG_ROOT_PASSWORD_SHA2=8c6976e5b5410415bde908bd4dee15dfb167a9c873fc4bb8a81f6f2ab448a918
- GRAYLOG_ROOT_PASSWORD_SHA2=8d969eef6ecad3c29a3a629280e686cf0c3f5d5a86aff3ca12020c923adc6c92
- GRAYLOG_IS_MASTER=false
depends_on:
- mongo
- elasticsearch
- graylogmaster
ports:
- 30020:30000
networks:
- elk
deploy:
mode: replicated
replicas: 1
grayloggeral:
image: graylog/graylog:3.0.1
environment:
# CHANGE ME (must be at least 16 characters)!
- GRAYLOG_PASSWORD_SECRET=somepasswordpeppersdfdsfsfsadf
# Password: admin
#- GRAYLOG_ROOT_PASSWORD_SHA2=8c6976e5b5410415bde908bd4dee15dfb167a9c873fc4bb8a81f6f2ab448a918
- GRAYLOG_ROOT_PASSWORD_SHA2=8d969eef6ecad3c29a3a629280e686cf0c3f5d5a86aff3ca12020c923adc6c92
#- GRAYLOG_NODE_ID=de305d54-75b4-431b-adb2-eb6b9e5geral
- GRAYLOG_IS_MASTER=false
depends_on:
- mongo
- elasticsearch
- graylogmaster
ports:
- 1514:1514
- 5414:5414
- 5414:5414/udp
- 12201:12201/udp
- 30020:30020/udp
networks:
- elk
deploy:
mode: replicated
replicas: 1
graylogmaster:
image: graylog/graylog:3.0.1
environment:
# CHANGE ME (must be at least 16 characters)!
- GRAYLOG_PASSWORD_SECRET=somepasswordpeppersdfdsfsfsadf
# Password: admin 123456
#- GRAYLOG_ROOT_PASSWORD_SHA2=8c6976e5b5410415bde908bd4dee15dfb167a9c873fc4bb8a81f6f2ab448a918
- GRAYLOG_ROOT_PASSWORD_SHA2=8d969eef6ecad3c29a3a629280e686cf0c3f5d5a86aff3ca12020c923adc6c92
- GRAYLOG_HTTP_EXTERNAL_URI=http://200.xxx.xxx.xxx:9000/
- GRAYLOG_IS_MASTER=true
#- GRAYLOG_HEAP_SIZE="-Xms3g -Xmx3g"
#configs:
#- source: graylog_config
# target: /etc/graylog/server/node-id
depends_on:
- mongo
- elasticsearch
ports:
- 9000:9000
networks:
- elk
deploy:
mode: replicated
replicas: 1
kibana:
image: docker.elastic.co/kibana/kibana-oss:6.6.1
ports:
- "5601:5601"
configs:
- source: kibana_config
target: /usr/share/kibana/config/kibana.yml
networks:
- elk
deploy:
mode: replicated
replicas: 1
configs:
elastic_config:
file: ./elasticsearch/config/elasticsearch.yml
kibana_config:
file: ./kibana/config/kibana.yml
networks:
elk:
driver: overlay
To start the environment use the command
# docker stack deploy -c docker-stack.yml elk
As a test environment I’m using the docker visualizer to know where each container is located.
# docker service create --name=viz --publish=8080:8080/tcp --constraint=node.role==manager --mount=type=bind,src=/var/run/docker.sock,dst=/var/run/docker.sock dockersamples/visualizer
The main issue is the communication between the graylog nodes, in the image below everything is working correctly!
However when I remove all swarm services (simulating a physical server problem)
# docker service rm $(docker service ls -q)
I try to start the graylog stack again
# docker stack deploy -c docker-stack.yml elk
The communication between graylog nodes no longer occurs as expected:
Close the services again and start the communication again between the nodes again
Looking at the graylog master logs, it looks like they are not able to access the other nodes, log:
# docker logs elk_graylogmaster.1.2d0wduulcd33n8cs7w6jzpfde
2019-04-21 16:25:44,881 WARN : org.graylog2.shared.rest.resources.ProxiedResource - Unable to call http://172.18.0.3:9000/api/system on node <caa1f9ce-28c3-486a-949a-897423c79ddf>
java.net.ConnectException: Failed to connect to /172.18.0.3:9000
at okhttp3.internal.connection.RealConnection.connectSocket(RealConnection.java:247) ~[graylog.jar:?]
at okhttp3.internal.connection.RealConnection.connect(RealConnection.java:165) ~[graylog.jar:?]
at okhttp3.internal.connection.StreamAllocation.findConnection(StreamAllocation.java:257) ~[graylog.jar:?]
at okhttp3.internal.connection.StreamAllocation.findHealthyConnection(StreamAllocation.java:135) ~[graylog.jar:?]
at okhttp3.internal.connection.StreamAllocation.newStream(StreamAllocation.java:114) ~[graylog.jar:?]
at okhttp3.internal.connection.ConnectInterceptor.intercept(ConnectInterceptor.java:42) ~[graylog.jar:?]
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:147) ~[graylog.jar:?]
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:121) ~[graylog.jar:?]
at okhttp3.internal.cache.CacheInterceptor.intercept(CacheInterceptor.java:93) ~[graylog.jar:?]
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:147) ~[graylog.jar:?]
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:121) ~[graylog.jar:?]
at okhttp3.internal.http.BridgeInterceptor.intercept(BridgeInterceptor.java:93) ~[graylog.jar:?]
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:147) ~[graylog.jar:?]
at okhttp3.internal.http.RetryAndFollowUpInterceptor.intercept(RetryAndFollowUpInterceptor.java:126) ~[graylog.jar:?]
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:147) ~[graylog.jar:?]
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:121) ~[graylog.jar:?]
at org.graylog2.rest.RemoteInterfaceProvider.lambda$get$0(RemoteInterfaceProvider.java:61) ~[graylog.jar:?]
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:147) ~[graylog.jar:?]
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:121) ~[graylog.jar:?]
at okhttp3.RealCall.getResponseWithInterceptorChain(RealCall.java:200) ~[graylog.jar:?]
at okhttp3.RealCall.execute(RealCall.java:77) ~[graylog.jar:?]
at retrofit2.OkHttpCall.execute(OkHttpCall.java:180) ~[graylog.jar:?]
at org.graylog2.shared.rest.resources.ProxiedResource.lambda$getForAllNodes$0(ProxiedResource.java:78) ~[graylog.jar:?]
at java.util.concurrent.FutureTask.run(FutureTask.java:266) [?:1.8.0_212]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [?:1.8.0_212]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [?:1.8.0_212]
at java.lang.Thread.run(Thread.java:748) [?:1.8.0_212]
Caused by: java.net.ConnectException: Connection refused (Connection refused)
at java.net.PlainSocketImpl.socketConnect(Native Method) ~[?:1.8.0_212]
at java.net.AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:350) ~[?:1.8.0_212]
at java.net.AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:206) ~[?:1.8.0_212]
at java.net.AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:188) ~[?:1.8.0_212]
at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392) ~[?:1.8.0_212]
at java.net.Socket.connect(Socket.java:589) ~[?:1.8.0_212]
at okhttp3.internal.platform.Platform.connectSocket(Platform.java:129) ~[graylog.jar:?]
at okhttp3.internal.connection.RealConnection.connectSocket(RealConnection.java:245) ~[graylog.jar:?]
... 26 more
2019-04-21 16:25:45,846 WARN : org.graylog2.shared.rest.resources.ProxiedResource - Unable to call http://172.18.0.3:9000/api/system/metrics/multiple on node <caa1f9ce-28c3-486a-949a-897423c79ddf>
java.net.ConnectException: Failed to connect to /172.18.0.3:9000
at okhttp3.internal.connection.RealConnection.connectSocket(RealConnection.java:247) ~[graylog.jar:?]
at okhttp3.internal.connection.RealConnection.connect(RealConnection.java:165) ~[graylog.jar:?]
at okhttp3.internal.connection.StreamAllocation.findConnection(StreamAllocation.java:257) ~[graylog.jar:?]
at okhttp3.internal.connection.StreamAllocation.findHealthyConnection(StreamAllocation.java:135) ~[graylog.jar:?]
at okhttp3.internal.connection.StreamAllocation.newStream(StreamAllocation.java:114) ~[graylog.jar:?]
at okhttp3.internal.connection.ConnectInterceptor.intercept(ConnectInterceptor.java:42) ~[graylog.jar:?]
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:147) ~[graylog.jar:?]
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:121) ~[graylog.jar:?]
at okhttp3.internal.cache.CacheInterceptor.intercept(CacheInterceptor.java:93) ~[graylog.jar:?]
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:147) ~[graylog.jar:?]
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:121) ~[graylog.jar:?]
at okhttp3.internal.http.BridgeInterceptor.intercept(BridgeInterceptor.java:93) ~[graylog.jar:?]
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:147) ~[graylog.jar:?]
at okhttp3.internal.http.RetryAndFollowUpInterceptor.intercept(RetryAndFollowUpInterceptor.java:126) ~[graylog.jar:?]
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:147) ~[graylog.jar:?]
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:121) ~[graylog.jar:?]
at org.graylog2.rest.RemoteInterfaceProvider.lambda$get$0(RemoteInterfaceProvider.java:61) ~[graylog.jar:?]
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:147) ~[graylog.jar:?]
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:121) ~[graylog.jar:?]
at okhttp3.RealCall.getResponseWithInterceptorChain(RealCall.java:200) ~[graylog.jar:?]
at okhttp3.RealCall.execute(RealCall.java:77) ~[graylog.jar:?]
at retrofit2.OkHttpCall.execute(OkHttpCall.java:180) ~[graylog.jar:?]
at org.graylog2.shared.rest.resources.ProxiedResource.lambda$getForAllNodes$0(ProxiedResource.java:78) ~[graylog.jar:?]
at java.util.concurrent.FutureTask.run(FutureTask.java:266) [?:1.8.0_212]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [?:1.8.0_212]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [?:1.8.0_212]
at java.lang.Thread.run(Thread.java:748) [?:1.8.0_212]
Caused by: java.net.ConnectException: Connection refused (Connection refused)
at java.net.PlainSocketImpl.socketConnect(Native Method) ~[?:1.8.0_212]
at java.net.AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:350) ~[?:1.8.0_212]
at java.net.AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:206) ~[?:1.8.0_212]
at java.net.AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:188) ~[?:1.8.0_212]
at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392) ~[?:1.8.0_212]
at java.net.Socket.connect(Socket.java:589) ~[?:1.8.0_212]
at okhttp3.internal.platform.Platform.connectSocket(Platform.java:129) ~[graylog.jar:?]
at okhttp3.internal.connection.RealConnection.connectSocket(RealConnection.java:245) ~[graylog.jar:?]
... 26 more
2019-04-21 16:25:47,846 WARN : org.graylog2.shared.rest.resources.ProxiedResource - Unable to call http://172.18.0.3:9000/api/system/metrics/multiple on node <caa1f9ce-28c3-486a-949a-897423c79ddf>
java.net.ConnectException: Failed to connect to /172.18.0.3:9000
at okhttp3.internal.connection.RealConnection.connectSocket(RealConnection.java:247) ~[graylog.jar:?]
at okhttp3.internal.connection.RealConnection.connect(RealConnection.java:165) ~[graylog.jar:?]
at okhttp3.internal.connection.StreamAllocation.findConnection(StreamAllocation.java:257) ~[graylog.jar:?]
at okhttp3.internal.connection.StreamAllocation.findHealthyConnection(StreamAllocation.java:135) ~[graylog.jar:?]
at okhttp3.internal.connection.StreamAllocation.newStream(StreamAllocation.java:114) ~[graylog.jar:?]
at okhttp3.internal.connection.ConnectInterceptor.intercept(ConnectInterceptor.java:42) ~[graylog.jar:?]
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:147) ~[graylog.jar:?]
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:121) ~[graylog.jar:?]
at okhttp3.internal.cache.CacheInterceptor.intercept(CacheInterceptor.java:93) ~[graylog.jar:?]
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:147) ~[graylog.jar:?]
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:121) ~[graylog.jar:?]
at okhttp3.internal.http.BridgeInterceptor.intercept(BridgeInterceptor.java:93) ~[graylog.jar:?]
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:147) ~[graylog.jar:?]
at okhttp3.internal.http.RetryAndFollowUpInterceptor.intercept(RetryAndFollowUpInterceptor.java:126) ~[graylog.jar:?]
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:147) ~[graylog.jar:?]
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:121) ~[graylog.jar:?]
at org.graylog2.rest.RemoteInterfaceProvider.lambda$get$0(RemoteInterfaceProvider.java:61) ~[graylog.jar:?]
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:147) ~[graylog.jar:?]
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:121) ~[graylog.jar:?]
at okhttp3.RealCall.getResponseWithInterceptorChain(RealCall.java:200) ~[graylog.jar:?]
at okhttp3.RealCall.execute(RealCall.java:77) ~[graylog.jar:?]
at retrofit2.OkHttpCall.execute(OkHttpCall.java:180) ~[graylog.jar:?]
at org.graylog2.shared.rest.resources.ProxiedResource.lambda$getForAllNodes$0(ProxiedResource.java:78) ~[graylog.jar:?]
at java.util.concurrent.FutureTask.run(FutureTask.java:266) [?:1.8.0_212]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [?:1.8.0_212]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [?:1.8.0_212]
at java.lang.Thread.run(Thread.java:748) [?:1.8.0_212]
Caused by: java.net.ConnectException: Connection refused (Connection refused)
at java.net.PlainSocketImpl.socketConnect(Native Method) ~[?:1.8.0_212]
at java.net.AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:350) ~[?:1.8.0_212]
at java.net.AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:206) ~[?:1.8.0_212]
at java.net.AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:188) ~[?:1.8.0_212]
at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392) ~[?:1.8.0_212]
at java.net.Socket.connect(Socket.java:589) ~[?:1.8.0_212]
at okhttp3.internal.platform.Platform.connectSocket(Platform.java:129) ~[graylog.jar:?]
at okhttp3.internal.connection.RealConnection.connectSocket(RealConnection.java:245) ~[graylog.jar:?]
... 26 more
2019-04-21 16:25:49,845 WARN : org.graylog2.shared.rest.resources.ProxiedResource - Unable to call http://172.18.0.3:9000/api/system/metrics/multiple on node <caa1f9ce-28c3-486a-949a-897423c79ddf>
java.net.ConnectException: Failed to connect to /172.18.0.3:9000
at okhttp3.internal.connection.RealConnection.connectSocket(RealConnection.java:247) ~[graylog.jar:?]
at okhttp3.internal.connection.RealConnection.connect(RealConnection.java:165) ~[graylog.jar:?]
at okhttp3.internal.connection.StreamAllocation.findConnection(StreamAllocation.java:257) ~[graylog.jar:?]
at okhttp3.internal.connection.StreamAllocation.findHealthyConnection(StreamAllocation.java:135) ~[graylog.jar:?]
at okhttp3.internal.connection.StreamAllocation.newStream(StreamAllocation.java:114) ~[graylog.jar:?]
at okhttp3.internal.connection.ConnectInterceptor.intercept(ConnectInterceptor.java:42) ~[graylog.jar:?]
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:147) ~[graylog.jar:?]
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:121) ~[graylog.jar:?]
at okhttp3.internal.cache.CacheInterceptor.intercept(CacheInterceptor.java:93) ~[graylog.jar:?]
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:147) ~[graylog.jar:?]
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:121) ~[graylog.jar:?]
at okhttp3.internal.http.BridgeInterceptor.intercept(BridgeInterceptor.java:93) ~[graylog.jar:?]
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:147) ~[graylog.jar:?]
at okhttp3.internal.http.RetryAndFollowUpInterceptor.intercept(RetryAndFollowUpInterceptor.java:126) ~[graylog.jar:?]
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:147) ~[graylog.jar:?]
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:121) ~[graylog.jar:?]
at org.graylog2.rest.RemoteInterfaceProvider.lambda$get$0(RemoteInterfaceProvider.java:61) ~[graylog.jar:?]
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:147) ~[graylog.jar:?]
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:121) ~[graylog.jar:?]
at okhttp3.RealCall.getResponseWithInterceptorChain(RealCall.java:200) ~[graylog.jar:?]
at okhttp3.RealCall.execute(RealCall.java:77) ~[graylog.jar:?]
at retrofit2.OkHttpCall.execute(OkHttpCall.java:180) ~[graylog.jar:?]
at org.graylog2.shared.rest.resources.ProxiedResource.lambda$getForAllNodes$0(ProxiedResource.java:78) ~[graylog.jar:?]
at java.util.concurrent.FutureTask.run(FutureTask.java:266) [?:1.8.0_212]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [?:1.8.0_212]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [?:1.8.0_212]
at java.lang.Thread.run(Thread.java:748) [?:1.8.0_212]
Caused by: java.net.ConnectException: Connection refused (Connection refused)
at java.net.PlainSocketImpl.socketConnect(Native Method) ~[?:1.8.0_212]
at java.net.AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:350) ~[?:1.8.0_212]
at java.net.AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:206) ~[?:1.8.0_212]
at java.net.AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:188) ~[?:1.8.0_212]
at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392) ~[?:1.8.0_212]
at java.net.Socket.connect(Socket.java:589) ~[?:1.8.0_212]
at okhttp3.internal.platform.Platform.connectSocket(Platform.java:129) ~[graylog.jar:?]
at okhttp3.internal.connection.RealConnection.connectSocket(RealConnection.java:245) ~[graylog.jar:?]
... 26 more
2019-04-21 16:25:49,879 WARN : org.graylog2.shared.rest.resources.ProxiedResource - Unable to call http://172.18.0.3:9000/api/system on node <caa1f9ce-28c3-486a-949a-897423c79ddf>
java.net.ConnectException: Failed to connect to /172.18.0.3:9000
at okhttp3.internal.connection.RealConnection.connectSocket(RealConnection.java:247) ~[graylog.jar:?]
at okhttp3.internal.connection.RealConnection.connect(RealConnection.java:165) ~[graylog.jar:?]
at okhttp3.internal.connection.StreamAllocation.findConnection(StreamAllocation.java:257) ~[graylog.jar:?]
at okhttp3.internal.connection.StreamAllocation.findHealthyConnection(StreamAllocation.java:135) ~[graylog.jar:?]
at okhttp3.internal.connection.StreamAllocation.newStream(StreamAllocation.java:114) ~[graylog.jar:?]
at okhttp3.internal.connection.ConnectInterceptor.intercept(ConnectInterceptor.java:42) ~[graylog.jar:?]
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:147) ~[graylog.jar:?]
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:121) ~[graylog.jar:?]
at okhttp3.internal.cache.CacheInterceptor.intercept(CacheInterceptor.java:93) ~[graylog.jar:?]
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:147) ~[graylog.jar:?]
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:121) ~[graylog.jar:?]
at okhttp3.internal.http.BridgeInterceptor.intercept(BridgeInterceptor.java:93) ~[graylog.jar:?]
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:147) ~[graylog.jar:?]
at okhttp3.internal.http.RetryAndFollowUpInterceptor.intercept(RetryAndFollowUpInterceptor.java:126) ~[graylog.jar:?]
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:147) ~[graylog.jar:?]
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:121) ~[graylog.jar:?]
at org.graylog2.rest.RemoteInterfaceProvider.lambda$get$0(RemoteInterfaceProvider.java:61) ~[graylog.jar:?]
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:147) ~[graylog.jar:?]
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:121) ~[graylog.jar:?]
at okhttp3.RealCall.getResponseWithInterceptorChain(RealCall.java:200) ~[graylog.jar:?]
at okhttp3.RealCall.execute(RealCall.java:77) ~[graylog.jar:?]
at retrofit2.OkHttpCall.execute(OkHttpCall.java:180) ~[graylog.jar:?]
at org.graylog2.shared.rest.resources.ProxiedResource.lambda$getForAllNodes$0(ProxiedResource.java:78) ~[graylog.jar:?]
at java.util.concurrent.FutureTask.run(FutureTask.java:266) [?:1.8.0_212]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [?:1.8.0_212]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [?:1.8.0_212]
at java.lang.Thread.run(Thread.java:748) [?:1.8.0_212]
Caused by: java.net.ConnectException: Connection refused (Connection refused)
at java.net.PlainSocketImpl.socketConnect(Native Method) ~[?:1.8.0_212]
at java.net.AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:350) ~[?:1.8.0_212]
at java.net.AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:206) ~[?:1.8.0_212]
at java.net.AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:188) ~[?:1.8.0_212]
at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392) ~[?:1.8.0_212]
at java.net.Socket.connect(Socket.java:589) ~[?:1.8.0_212]
at okhttp3.internal.platform.Platform.connectSocket(Platform.java:129) ~[graylog.jar:?]
at okhttp3.internal.connection.RealConnection.connectSocket(RealConnection.java:245) ~[graylog.jar:?]
<omitted the rest of the output>
I already checked:
Communication between the servers, all are within the same network, and communication between them is ok.
Graylog service, doing a 127.0.0.1 curl on each node the graylog responds correctly.
Detail
I noticed that restarting the docker service on all hosts sometimes the communication between the nodes goes back to working again
As I said I’m just testing the environment, I’m learning to use graylog and docker swarm, so I wonder if anyone has already gone through this.
I feel that the problem may be related to docker, since restarting the service sometimes working, but if someone with more experience could pass some techniques of how to do the troublehshotting.
Thank you very much