# 获取特定 pod 的所有日志(例如,一个 worker)
$ kubectl -n alx-ns logs alluxio-cluster-worker-59476bf8c5-lg4sc
# 过滤 WARN 或 ERROR 消息并显示匹配后的行
$ kubectl -n alx-ns logs alluxio-cluster-fuse-acee53e8f0a9-3gjbrdekk0 | grep -A 1 'WARN\|ERROR'
2024-07-04 17:29:53,499 ERROR HdfsUfsStatusIterator - Failed to list the path hdfs://localhost:9000/
java.net.ConnectException: Call From myhost/192.168.1.10 to localhost:9000 failed on connection exception: java.net.ConnectException: Connection refused; For more details see: http://wiki.apache.org/hadoop/ConnectionRefused
# 检查先前失败的容器的日志
$ kubectl -n alx-ns logs -p alluxio-cluster-worker-59476bf8c5-lg4sc
# 1. 获取您的应用程序或 FUSE pod 正在运行的节点名称
$ PODNS=alx-ns POD=alluxio-cluster-fuse-acee53e8f0a9-3gjbrdekk0
$ NODE_NAME=$(kubectl get pod -o jsonpath='{.spec.nodeName}' -n ${PODNS} ${POD})
# 2. 在该节点上找到 Alluxio CSI 节点插件 pod
$ CSI_POD_NAME=$(kubectl -n alluxio-operator get pod -l app.kubernetes.io/component=csi-nodeplugin --field-selector spec.nodeName=${NODE_NAME} -o jsonpath='{..metadata.name}')
# 3. 从 csi-nodeserver 容器获取日志
$ kubectl -n alluxio-operator logs -c csi-nodeserver ${CSI_POD_NAME}
$ kubectl -n alluxio-operator get pod -l app.kubernetes.io/component=doctor-controller
NAME READY STATUS RESTARTS AGE
alluxio-doctor-controller-cc49c56b6-wlw8k 1/1 Running 0 19s