共计 17338 个字符,预计需要花费 44 分钟才能阅读完成。
前阵子搭了集群体验了下kuboard控制面板,后台停机只保留了一台master,结果所有发现kuboard namespace中的资源都是Terminating,迟迟未转移到master上。。。。也删不掉。。。
[root@node1 ~]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
node1 Ready master 18d v1.18.9
node2 NotReady <none> 18d v1.18.9
node3 NotReady <none> 18d v1.18.9
[root@node1 pratic]# kubectl get pods -A -o wide
NAMESPACE NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
default nginx-deployment-c6d76d89f-29z9n 1/1 Running 2 18d 10.100.135.38 node3 <none> <none>
default nginx-deployment-c6d76d89f-m98tq 1/1 Running 2 18d 10.100.104.31 node2 <none> <none>
default nginx-deployment-c6d76d89f-qd2dj 1/1 Running 2 18d 10.100.104.36 node2 <none> <none>
default nginx-deployment-c6d76d89f-s846s 1/1 Running 2 18d 10.100.104.38 node2 <none> <none>
default nginx-deployment-c6d76d89f-sgn2t 1/1 Running 2 18d 10.100.135.37 node3 <none> <none>
kube-system calico-kube-controllers-5b8b769fcd-v285z 1/1 Running 3 18d 10.100.166.162 node1 <none> <none>
kube-system calico-node-285tv 0/1 Running 4 18d 192.168.174.132 node1 <none> <none>
kube-system calico-node-hr94c 1/1 Running 7 18d 192.168.174.133 node2 <none> <none>
kube-system calico-node-sgfql 1/1 Running 7 18d 192.168.174.134 node3 <none> <none>
kube-system coredns-66db54ff7f-kldqw 1/1 Running 3 18d 10.100.166.161 node1 <none> <none>
kube-system coredns-66db54ff7f-strrs 1/1 Running 3 18d 10.100.166.160 node1 <none> <none>
kube-system eip-nfs-nfsserver-b8c6d97c-89l2x 1/1 Running 6 18d 10.100.135.31 node3 <none> <none>
kube-system etcd-node1 1/1 Running 3 18d 192.168.174.132 node1 <none> <none>
kube-system kube-apiserver-node1 1/1 Running 7 18d 192.168.174.132 node1 <none> <none>
kube-system kube-controller-manager-node1 1/1 Running 5 18d 192.168.174.132 node1 <none> <none>
kube-system kube-proxy-96x9p 1/1 Running 3 18d 192.168.174.132 node1 <none> <none>
kube-system kube-proxy-sk6r2 1/1 Running 2 18d 192.168.174.133 node2 <none> <none>
kube-system kube-proxy-v9frz 1/1 Running 2 18d 192.168.174.134 node3 <none> <none>
kube-system kube-scheduler-node1 1/1 Running 4 18d 192.168.174.132 node1 <none> <none>
kuboard alertmanager-main-0 2/2 Terminating 5 18d 10.100.135.39 node3 <none> <none>
kuboard alertmanager-main-1 2/2 Terminating 5 18d 10.100.104.37 node2 <none> <none>
kuboard alertmanager-main-2 2/2 Terminating 5 18d 10.100.135.32 node3 <none> <none>
kuboard blackbox-exporter-555d8688d9-rnhk8 3/3 Terminating 6 18d 10.100.104.39 node2 <none> <none>
kuboard dingtalk-949d7795f-lqmrk 1/1 Terminating 2 18d 10.100.135.36 node3 <none> <none>
kuboard dingtalk-949d7795f-m6f69 1/1 Terminating 2 18d 10.100.104.40 node2 <none> <none>
kuboard grafana-66c85d8454-q4v6l 1/1 Terminating 2 18d 10.100.104.35 node2 <none> <none>
kuboard kube-state-metrics-5b9f68d8d6-wxmll 3/3 Terminating 9 18d 10.100.135.34 node3 <none> <none>
kuboard kuboard-pv-browser-tfrv6 2/2 Terminating 30 18d 10.100.104.34 node2 <none> <none>
kuboard kuboard-pv-browser-zh699 2/2 Terminating 30 18d 10.100.135.40 node3 <none> <none>
kuboard node-exporter-5t5hc 2/2 Terminating 4 18d 192.168.174.134 node3 <none> <none>
kuboard node-exporter-th4lj 2/2 Terminating 4 18d 192.168.174.133 node2 <none> <none>
kuboard prometheus-adapter-54849b899c-2sgvn 1/1 Terminating 4 18d 10.100.104.29 node2 <none> <none>
kuboard prometheus-adapter-54849b899c-f6mz6 1/1 Terminating 5 18d 10.100.135.30 node3 <none> <none>
kuboard prometheus-k8s-0 2/2 Terminating 4 18d 10.100.104.33 node2 <none> <none>
kuboard prometheus-k8s-1 2/2 Terminating 4 18d 10.100.135.35 node3 <none> <none>
kuboard prometheus-operator-55b85b9cbf-qrz7r 2/2 Terminating 6 18d 10.100.104.32 node2 <none> <none>
kuboard system-monitor-config-7b67d679d6-l4fl5 1/1 Terminating 2 18d 10.100.135.33 node3 <none> <none>
nginx-ingress nginx-ingress-jb7cc 1/1 Running 5 18d 10.100.104.30 node2 <none> <none>
nginx-ingress nginx-ingress-t99n6 1/1 Running 4 18d 10.100.135.41 node3 <none> <none>
用原来的kuboard的声明文件删除,发现卡住了
[root@node1 pratic]# kubectl delete -f kuboard-agent.yaml
namespace "kuboard" deleted
serviceaccount "kuboard-admin" deleted
clusterrolebinding.rbac.authorization.k8s.io "kuboard-admin-crb" deleted
serviceaccount "kuboard-viewer" deleted
clusterrolebinding.rbac.authorization.k8s.io "kuboard-viewer-crb" deleted
deployment.apps "kuboard-agent" deleted
deployment.apps "kuboard-agent-2" deleted
^C
# 无法删除,等了n久
[root@node1 pratic]# kubectl delete -f kuboard-agent.yaml
namespace "kuboard" deleted
Error from server (NotFound): error when deleting "kuboard-agent.yaml": serviceaccounts "kuboard-admin" not found
Error from server (NotFound): error when deleting "kuboard-agent.yaml": clusterrolebindings.rbac.authorization.k8s.io "kuboard-admin-crb" not found
Error from server (NotFound): error when deleting "kuboard-agent.yaml": serviceaccounts "kuboard-viewer" not found
Error from server (NotFound): error when deleting "kuboard-agent.yaml": clusterrolebindings.rbac.authorization.k8s.io "kuboard-viewer-crb" not found
Error from server (NotFound): error when deleting "kuboard-agent.yaml": deployments.apps "kuboard-agent" not found
Error from server (NotFound): error when deleting "kuboard-agent.yaml": deployments.apps "kuboard-agent-2" not found
发现cpu、磁盘啥的资源消耗也不高
[root@node1 pratic]# top
top - 00:33:46 up 14 min, 5 users, load average: 0.87, 1.08, 0.79
Tasks: 202 total, 1 running, 201 sleeping, 0 stopped, 0 zombie
%Cpu(s): 0.8 us, 0.8 sy, 0.0 ni, 98.4 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
KiB Mem : 16247820 total, 13258136 free, 1221864 used, 1767820 buff/cache
KiB Swap: 0 total, 0 free, 0 used. 14618080 avail Mem
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
9646 root 20 0 2777948 57172 15800 S 6.2 0.4 0:08.47 containerd
9916 root 20 0 1186224 99088 29288 S 6.2 0.6 0:34.26 dockerd
23653 root 20 0 146512 23704 13976 S 6.2 0.1 0:02.98 coredns
1 root 20 0 191364 4404 2600 S 0.0 0.0 0:03.96 systemd
2 root 20 0 0 0 0 S 0.0 0.0 0:00.01 kthreadd
3 root 20 0 0 0 0 S 0.0 0.0 0:00.07 ksoftirqd/0
4 root 20 0 0 0 0 S 0.0 0.0 0:00.41 kworker/0:0
5 root 0 -20 0 0 0 S 0.0 0.0 0:00.00 kworker/0:0H
6 root 20 0 0 0 0 S 0.0 0.0 0:00.12 kworker/u256:0
7 root rt 0 0 0 0 S 0.0 0.0 0:00.06 migration/0
8 root 20 0 0 0 0 S 0.0 0.0 0:00.00 rcu_bh
9 root 20 0 0 0 0 S 0.0 0.0 0:01.75 rcu_sched
10 root 0 -20 0 0 0 S 0.0 0.0 0:00.00 lru-add-drain
11 root rt 0 0 0 0 S 0.0 0.0 0:00.00 watchdog/0
12 root rt 0 0 0 0 S 0.0 0.0 0:00.00 watchdog/1
13 root rt 0 0 0 0 S 0.0 0.0 0:00.06 migration/1
14 root 20 0 0 0 0 S 0.0 0.0 0:00.07 ksoftirqd/1
16 root 0 -20 0 0 0 S 0.0 0.0 0:00.00 kworker/1:0H
当时也没有时间describe看看啥问题,只想强制删除了,反正体验完毕该结束了
# 测试了下发现可以删除
[root@node1 pratic]# kubectl delete pod/prometheus-k8s-0 --force --grace-period=0 -n kuboard
warning: Immediate deletion does not wait for confirmation that the running resource has been terminated. The resource may continue to run on the cluster indefinitely.
pod "prometheus-k8s-0" force deleted
# 批量删除kuboard ns下的资源
for i in `kubectl get pods -A | awk '/kuboard/ {print $2}'`;do echo $i && kubectl delete pod\/$i --force --grace-period=0 -n kuboard;done
alertmanager-main-0
warning: Immediate deletion does not wait for confirmation that the running resource has been terminated. The resource may continue to run on the cluster indefinitely.
pod "alertmanager-main-0" force deleted
alertmanager-main-1
warning: Immediate deletion does not wait for confirmation that the running resource has been terminated. The resource may continue to run on the cluster indefinitely.
pod "alertmanager-main-1" force deleted
alertmanager-main-2
warning: Immediate deletion does not wait for confirmation that the running resource has been terminated. The resource may continue to run on the cluster indefinitely.
pod "alertmanager-main-2" force deleted
blackbox-exporter-555d8688d9-rnhk8
warning: Immediate deletion does not wait for confirmation that the running resource has been terminated. The resource may continue to run on the cluster indefinitely.
pod "blackbox-exporter-555d8688d9-rnhk8" force deleted
dingtalk-949d7795f-lqmrk
warning: Immediate deletion does not wait for confirmation that the running resource has been terminated. The resource may continue to run on the cluster indefinitely.
pod "dingtalk-949d7795f-lqmrk" force deleted
dingtalk-949d7795f-m6f69
warning: Immediate deletion does not wait for confirmation that the running resource has been terminated. The resource may continue to run on the cluster indefinitely.
pod "dingtalk-949d7795f-m6f69" force deleted
grafana-66c85d8454-q4v6l
warning: Immediate deletion does not wait for confirmation that the running resource has been terminated. The resource may continue to run on the cluster indefinitely.
pod "grafana-66c85d8454-q4v6l" force deleted
kube-state-metrics-5b9f68d8d6-wxmll
warning: Immediate deletion does not wait for confirmation that the running resource has been terminated. The resource may continue to run on the cluster indefinitely.
pod "kube-state-metrics-5b9f68d8d6-wxmll" force deleted
kuboard-pv-browser-tfrv6
warning: Immediate deletion does not wait for confirmation that the running resource has been terminated. The resource may continue to run on the cluster indefinitely.
pod "kuboard-pv-browser-tfrv6" force deleted
kuboard-pv-browser-zh699
warning: Immediate deletion does not wait for confirmation that the running resource has been terminated. The resource may continue to run on the cluster indefinitely.
pod "kuboard-pv-browser-zh699" force deleted
node-exporter-5t5hc
warning: Immediate deletion does not wait for confirmation that the running resource has been terminated. The resource may continue to run on the cluster indefinitely.
pod "node-exporter-5t5hc" force deleted
node-exporter-th4lj
warning: Immediate deletion does not wait for confirmation that the running resource has been terminated. The resource may continue to run on the cluster indefinitely.
pod "node-exporter-th4lj" force deleted
prometheus-adapter-54849b899c-2sgvn
warning: Immediate deletion does not wait for confirmation that the running resource has been terminated. The resource may continue to run on the cluster indefinitely.
pod "prometheus-adapter-54849b899c-2sgvn" force deleted
prometheus-adapter-54849b899c-f6mz6
warning: Immediate deletion does not wait for confirmation that the running resource has been terminated. The resource may continue to run on the cluster indefinitely.
pod "prometheus-adapter-54849b899c-f6mz6" force deleted
system-monitor-config-7b67d679d6-l4fl5
warning: Immediate deletion does not wait for confirmation that the running resource has been terminated. The resource may continue to run on the cluster indefinitely.
pod "system-monitor-config-7b67d679d6-l4fl5" force deleted
删除namespace
[root@node1 pratic]# kubectl get namespace kuboard -o json > tmp.json
[root@node1 pratic]# cat tmp.json
{
"apiVersion": "v1",
"kind": "Namespace",
"metadata": {
"annotations": {
"kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"v1\",\"kind\":\"Namespace\",\"metadata\":{\"annotations\":{},\"name\":\"kuboard\"}}\n"
},
"creationTimestamp": "2022-04-13T09:55:22Z",
"deletionTimestamp": "2022-05-01T16:23:27Z",
"managedFields": [
{
"apiVersion": "v1",
"fieldsType": "FieldsV1",
"fieldsV1": {
"f:metadata": {
"f:annotations": {
".": {},
"f:kubectl.kubernetes.io/last-applied-configuration": {}
}
},
"f:status": {
"f:phase": {}
}
},
"manager": "kubectl",
"operation": "Update",
"time": "2022-04-13T09:55:22Z"
},
{
"apiVersion": "v1",
"fieldsType": "FieldsV1",
"fieldsV1": {
"f:status": {
"f:conditions": {
".": {},
"k:{\"type\":\"NamespaceContentRemaining\"}": {
".": {},
"f:lastTransitionTime": {},
"f:message": {},
"f:reason": {},
"f:status": {},
"f:type": {}
},
"k:{\"type\":\"NamespaceDeletionContentFailure\"}": {
".": {},
"f:lastTransitionTime": {},
"f:message": {},
"f:reason": {},
"f:status": {},
"f:type": {}
},
"k:{\"type\":\"NamespaceDeletionDiscoveryFailure\"}": {
".": {},
"f:lastTransitionTime": {},
"f:message": {},
"f:reason": {},
"f:status": {},
"f:type": {}
},
"k:{\"type\":\"NamespaceDeletionGroupVersionParsingFailure\"}": {
".": {},
"f:lastTransitionTime": {},
"f:message": {},
"f:reason": {},
"f:status": {},
"f:type": {}
},
"k:{\"type\":\"NamespaceFinalizersRemaining\"}": {
".": {},
"f:lastTransitionTime": {},
"f:message": {},
"f:reason": {},
"f:status": {},
"f:type": {}
}
}
}
},
"manager": "kube-controller-manager",
"operation": "Update",
"time": "2022-05-01T16:25:59Z"
}
],
"name": "kuboard",
"resourceVersion": "92938",
"selfLink": "/api/v1/namespaces/kuboard",
"uid": "b495dc61-60dc-4320-afb7-64f0480730f0"
},
"spec": {
"finalizers": [
"kubernetes"
]
},
"status": {
"conditions": [
{
"lastTransitionTime": "2022-05-01T16:23:32Z",
"message": "Discovery failed for some groups, 1 failing: unable to retrieve the complete list of server APIs: metrics.k8s.io/v1beta1: the server is currently unable to handle the request",
"reason": "DiscoveryFailed",
"status": "True",
"type": "NamespaceDeletionDiscoveryFailure"
},
{
"lastTransitionTime": "2022-05-01T16:23:34Z",
"message": "All legacy kube types successfully parsed",
"reason": "ParsedGroupVersions",
"status": "False",
"type": "NamespaceDeletionGroupVersionParsingFailure"
},
{
"lastTransitionTime": "2022-05-01T16:23:34Z",
"message": "All content successfully deleted, may be waiting on finalization",
"reason": "ContentDeleted",
"status": "False",
"type": "NamespaceDeletionContentFailure"
},
{
"lastTransitionTime": "2022-05-01T16:23:34Z",
"message": "Some resources are remaining: persistentvolumeclaims. has 2 resource instances, pods. has 18 resource instances",
"reason": "SomeResourcesRemain",
"status": "True",
"type": "NamespaceContentRemaining"
},
{
"lastTransitionTime": "2022-05-01T16:23:34Z",
"message": "Some content in the namespace has finalizers remaining: kubernetes.io/pvc-protection in 2 resource instances",
"reason": "SomeFinalizersRemain",
"status": "True",
"type": "NamespaceFinalizersRemaining"
}
],
"phase": "Terminating"
}
}
# 将文件中的
# "spec": {
# "finalizers": [
# "kubernetes"
# ]
# },
# 改为如下
# "spec": {
# },
开启一个api-server代理
[root@node1 pratic]# kubectl proxy --port=8081
Starting to serve on 127.0.0.1:8081
[root@node1 ~]# curl -k -H "Content-Type: application/json" -X PUT --data-binary @tmp.json http://127.0.0.1:8081/api/v1/namespaces/kuboard/finalize
{
"kind": "Namespace",
"apiVersion": "v1",
"metadata": {
"name": "kuboard",
"selfLink": "/api/v1/namespaces/kuboard/finalize",
"uid": "b495dc61-60dc-4320-afb7-64f0480730f0",
"resourceVersion": "92938",
"creationTimestamp": "2022-04-13T09:55:22Z",
"deletionTimestamp": "2022-05-01T16:23:27Z",
"annotations": {
"kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"v1\",\"kind\":\"Namespace\",\"metadata\":{\"annotations\":{},\"name\":\"kuboard\"}}\n"
},
"managedFields": [
{
"manager": "kubectl",
"operation": "Update",
"apiVersion": "v1",
"time": "2022-04-13T09:55:22Z",
"fieldsType": "FieldsV1",
"fieldsV1": {"f:metadata":{"f:annotations":{".":{},"f:kubectl.kubernetes.io/last-applied-configuration":{}}},"f:status":{"f:phase":{}}}
},
{
"manager": "kube-controller-manager",
"operation": "Update",
"apiVersion": "v1",
"time": "2022-05-01T16:25:59Z",
"fieldsType": "FieldsV1",
"fieldsV1": {"f:status":{"f:conditions":{".":{},"k:{\"type\":\"NamespaceContentRemaining\"}":{".":{},"f:lastTransitionTime":{},"f:message":{},"f:reason":{},"f:status":{},"f:type":{}},"k:{\"type\":\"NamespaceDeletionContentFailure\"}":{".":{},"f:lastTransitionTime":{},"f:message":{},"f:reason":{},"f:status":{},"f:type":{}},"k:{\"type\":\"NamespaceDeletionDiscoveryFailure\"}":{".":{},"f:lastTransitionTime":{},"f:message":{},"f:reason":{},"f:status":{},"f:type":{}},"k:{\"type\":\"NamespaceDeletionGroupVersionParsingFailure\"}":{".":{},"f:lastTransitionTime":{},"f:message":{},"f:reason":{},"f:status":{},"f:type":{}},"k:{\"type\":\"NamespaceFinalizersRemaining\"}":{".":{},"f:lastTransitionTime":{},"f:message":{},"f:reason":{},"f:status":{},"f:type":{}}}}}
}
]
},
"spec": {
},
"status": {
"phase": "Terminating",
"conditions": [
{
"type": "NamespaceDeletionDiscoveryFailure",
"status": "True",
"lastTransitionTime": "2022-05-01T16:23:32Z",
"reason": "DiscoveryFailed",
"message": "Discovery failed for some groups, 1 failing: unable to retrieve the complete list of server APIs: metrics.k8s.io/v1beta1: the server is currently unable to handle the request"
},
{
"type": "NamespaceDeletionGroupVersionParsingFailure",
"status": "False",
"lastTransitionTime": "2022-05-01T16:23:34Z",
"reason": "ParsedGroupVersions",
"message": "All legacy kube types successfully parsed"
},
{
"type": "NamespaceDeletionContentFailure",
"status": "False",
"lastTransitionTime": "2022-05-01T16:23:34Z",
"reason": "ContentDeleted",
"message": "All content successfully deleted, may be waiting on finalization"
},
{
"type": "NamespaceContentRemaining",
"status": "True",
"lastTransitionTime": "2022-05-01T16:23:34Z",
"reason": "SomeResourcesRemain",
"message": "Some resources are remaining: persistentvolumeclaims. has 2 resource instances, pods. has 18 resource instances"
},
{
"type": "NamespaceFinalizersRemaining",
"status": "True",
"lastTransitionTime": "2022-05-01T16:23:34Z",
"reason": "SomeFinalizersRemain",
"message": "Some content in the namespace has finalizers remaining: kubernetes.io/pvc-protection in 2 resource instances"
}
]
}
}
我节点服务器已经释放了,发现还有一些node2/node3上的资源
[root@node1 ~]# kubectl get pods -A
NAMESPACE NAME READY STATUS RESTARTS AGE
default nginx-deployment-c6d76d89f-29z9n 1/1 Terminating 2 18d
default nginx-deployment-c6d76d89f-m98tq 1/1 Terminating 2 18d
default nginx-deployment-c6d76d89f-qd2dj 1/1 Terminating 2 18d
default nginx-deployment-c6d76d89f-s846s 1/1 Terminating 2 18d
default nginx-deployment-c6d76d89f-sgn2t 1/1 Terminating 2 18d
kube-system calico-kube-controllers-5b8b769fcd-v285z 1/1 Running 3 18d
kube-system calico-node-285tv 0/1 Running 4 18d
kube-system calico-node-hr94c 1/1 Running 7 18d
kube-system calico-node-sgfql 1/1 Running 7 18d
kube-system coredns-66db54ff7f-kldqw 1/1 Running 3 18d
kube-system coredns-66db54ff7f-strrs 1/1 Running 3 18d
kube-system eip-nfs-nfsserver-b8c6d97c-89l2x 1/1 Terminating 6 18d
kube-system eip-nfs-nfsserver-b8c6d97c-lphrh 1/1 Running 0 50m
kube-system etcd-node1 1/1 Running 3 18d
kube-system kube-apiserver-node1 1/1 Running 7 18d
kube-system kube-controller-manager-node1 1/1 Running 5 18d
kube-system kube-proxy-96x9p 1/1 Running 3 18d
kube-system kube-proxy-sk6r2 1/1 Running 2 18d
kube-system kube-proxy-v9frz 1/1 Running 2 18d
kube-system kube-scheduler-node1 1/1 Running 4 18d
nginx-ingress nginx-ingress-grqrl 1/1 Running 0 20m
nginx-ingress nginx-ingress-jb7cc 1/1 Running 5 18d
nginx-ingress nginx-ingress-t99n6 1/1 Running 4 18d
下线删除node2/node3
[root@node1 ~]# kubectl delete node2 node3
[root@node1 ~]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
node1 Ready master 18d v1.18.9
[root@node1 ~]# kubectl get pods -A
NAMESPACE NAME READY STATUS RESTARTS AGE
kube-system calico-kube-controllers-5b8b769fcd-v285z 1/1 Running 3 18d
kube-system calico-node-285tv 1/1 Running 4 18d
kube-system coredns-66db54ff7f-kldqw 1/1 Running 3 18d
kube-system coredns-66db54ff7f-strrs 1/1 Running 3 18d
kube-system eip-nfs-nfsserver-b8c6d97c-lphrh 1/1 Running 0 56m
kube-system etcd-node1 1/1 Running 3 18d
kube-system kube-apiserver-node1 1/1 Running 7 18d
kube-system kube-controller-manager-node1 1/1 Running 5 18d
kube-system kube-proxy-96x9p 1/1 Running 3 18d
kube-system kube-scheduler-node1 1/1 Running 4 18d
nginx-ingress nginx-ingress-grqrl 1/1 Running 0 26m
正文完