共计 9422 个字符,预计需要花费 24 分钟才能阅读完成。
对于k8s的组件,一般你们都怎么维护呢?最近博主一直在看k8s组件kubelet相关的问题,大部分是内存持续飙高或是cpu高,但是为什么高呢?对博主来讲这其实是个黑盒,没办法只能卷,卷起来~
持续分析工具Pyroscope
目前博主用的k8s版本1.18.9,试了下增加以下参数但无效,可能这个版本似乎没有profile功能
KUBELET_ARGS="--enable-profiling=true"
无奈之际正好想起以前用来分析django项目时的工具Pyroscope,这个工具支持分析多种语言,而且可持续分析并生成相应的火焰图。当时为了测试博主写的代码有多烂,也没有过多的使用,只是想找一款能替代阿里ARMS的开源工具,误打误撞搜到了它
Pyroscope 专注于构建专门用于分析数据的存储引擎,以尽可能高效地存储和查询数据。它使用代理服务器模型将配置文件从应用程序发送到 Pyroscope 服务器,是一个C/S结构,可惜是侵入式的,需要一丢丢开发接入能力
Pyroscope快速体验
[root@localhost src]# git clone https://github.com/grafana/pyroscope.git
[root@localhost src]# cd pyroscope/examples/python/rideshare/flask/
[root@localhost flask]# ll
total 20
-rw-r--r-- 1 root root 586 May 27 19:18 docker-compose.yml
-rw-r--r-- 1 root root 161 May 27 19:18 Dockerfile
-rw-r--r-- 1 root root 152 May 27 19:18 Dockerfile.load-generator
drwxr-xr-x 6 root root 95 May 27 19:18 lib
-rwxr-xr-x 1 root root 699 May 27 19:18 load-generator.py
-rw-r--r-- 1 root root 254 May 27 19:18 README.md
[root@localhost flask]# docker compose up -d
[root@localhost flask]# docker compose ps -a
NAME IMAGE COMMAND SERVICE CREATED STATUS PORTS
flask-ap-south-1 flask-ap-south "python lib/server.py" ap-south 4 hours ago Up 4 hours 0.0.0.0:32768->5000/tcp, :::32768->5000/tcp
flask-eu-north-1 flask-eu-north "python lib/server.py" eu-north 4 hours ago Up 4 hours 0.0.0.0:32770->5000/tcp, :::32770->5000/tcp
flask-load-generator-1 flask-load-generator "python load-generat…" load-generator 4 hours ago Up 4 hours
flask-pyroscope-1 pyroscope/pyroscope "/usr/bin/pyroscope …" pyroscope 4 hours ago Up 4 hours 0.0.0.0:4040->4040/tcp, :::4040->4040/tcp
flask-us-east-1 flask-us-east "python lib/server.py" us-east 4 hours ago Up 4 hours 0.0.0.0:32769->5000/tcp, :::32769->5000/tcp
访问ip:4040即可
kubelet接入Pyroscope
调整kubelet源码
对于go应用,需要增加以下代码
package main
import "github.com/pyroscope-io/client/pyroscope"
func main() {
// These 2 lines are only required if you're using mutex or block profiling
// Read the explanation below for how to set these rates:
runtime.SetMutexProfileFraction(5)
runtime.SetBlockProfileRate(5)
pyroscope.Start(pyroscope.Config{
ApplicationName: "simple.golang.app",
// replace this with the address of pyroscope server
ServerAddress: "http://pyroscope-server:4040",
// you can disable logging by setting this to nil
Logger: pyroscope.StandardLogger,
// optionally, if authentication is enabled, specify the API key:
// AuthToken: os.Getenv("PYROSCOPE_AUTH_TOKEN"),
// you can provide static tags via a map:
Tags: map[string]string{"hostname": os.Getenv("HOSTNAME")},
ProfileTypes: []pyroscope.ProfileType{
// these profile types are enabled by default:
pyroscope.ProfileCPU,
pyroscope.ProfileAllocObjects,
pyroscope.ProfileAllocSpace,
pyroscope.ProfileInuseObjects,
pyroscope.ProfileInuseSpace,
// these profile types are optional:
pyroscope.ProfileGoroutines,
pyroscope.ProfileMutexCount,
pyroscope.ProfileMutexDuration,
pyroscope.ProfileBlockCount,
pyroscope.ProfileBlockDuration,
},
})
// your code goes here
}
所以我们得修改下kubelet源码,位置为:kubernetes/cmd/kubelet/kubelet.go
,修改前别忘记切换为到自己k8s得版本分支代码,博主这里是1.18.9,最后修改的kubelet.go
为:
[root@localhost src]# git clone https://github.com/kubernetes/kubernetes.git
[root@localhost src]# git checkout v1.18.9
[root@localhost src]# cd kubernetes/cmd/kubelet
[root@localhost kubelet]# pwd
/usr/local/src/kubernetes/cmd/kubelet
[root@localhost kubelet]# cat kubelet.go
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// The kubelet binary is responsible for maintaining a set of containers on a particular host VM.
// It syncs data from both configuration file(s) as well as from a quorum of etcd servers.
// It then queries Docker to see what is currently running. It synchronizes the configuration data,
// with the running set of containers by starting or stopping Docker containers.
package main
import (
"math/rand"
"os"
"time"
"k8s.io/component-base/logs"
_ "k8s.io/component-base/metrics/prometheus/restclient"
_ "k8s.io/component-base/metrics/prometheus/version" // for version metric registration
"k8s.io/kubernetes/cmd/kubelet/app"
"github.com/pyroscope-io/client/pyroscope"
)
func main() {
pyroscope.Start(pyroscope.Config{
ApplicationName: "simple.golang.app.kubelet",
// replace this with the address of pyroscope server
ServerAddress: "http://192.168.44.162:4040",
// you can disable logging by setting this to nil
Logger: pyroscope.StandardLogger,
// optionally, if authentication is enabled, specify the API key:
// AuthToken: os.Getenv("PYROSCOPE_AUTH_TOKEN"),
// you can provide static tags via a map:
Tags: map[string]string{"hostname": os.Getenv("HOSTNAME")},
ProfileTypes: []pyroscope.ProfileType{
// these profile types are enabled by default:
pyroscope.ProfileCPU,
pyroscope.ProfileAllocObjects,
pyroscope.ProfileAllocSpace,
pyroscope.ProfileInuseObjects,
pyroscope.ProfileInuseSpace,
// these profile types are optional:
pyroscope.ProfileGoroutines,
pyroscope.ProfileMutexCount,
pyroscope.ProfileMutexDuration,
pyroscope.ProfileBlockCount,
pyroscope.ProfileBlockDuration,
},
})
rand.Seed(time.Now().UnixNano())
command := app.NewKubeletCommand()
logs.InitLogs()
defer logs.FlushLogs()
if err := command.Execute(); err != nil {
os.Exit(1)
}
}
开始编译二进制文件kubelet
[root@localhost kubelet]# pwd
/usr/local/src/kubernetes/cmd/kubelet
[root@localhost kubelet]# go build
# 编译完后会在当前目录生成kubelet
[root@localhost kubelet]# ll
total 131624
drwxr-xr-x 3 root root 308 May 27 11:05 app
-rw-r--r-- 1 root root 1104 May 27 10:54 BUILD
-rwxr-xr-x 1 root root 134770488 May 27 19:49 kubelet
-rw-r--r-- 1 root root 2690 May 27 19:48 kubelet.go
-rw-r--r-- 1 root root 189 May 27 10:54 OWNERS
# 复制kubelet二进制文件到k8s集群中,略
替换kubelet
# 停止kubelet服务
[root@k8s-master ~]# systemctl stop kubelet
[root@k8s-master ~]# mv /usr/bin/kubelet{,.bak}
[root@k8s-master ~]# mv /usr/local/src/kubelet /usr/bin/kubelet
# 重启kubelet
[root@k8s-master ~]# systemctl restart kubelet
# 查看日志,可以看到此时每隔10秒上传一次数据到pyrscope
[root@k8s-master ~]# journalctl -u kubelet
#### 略
May 27 19:50:10 k8s-master kubelet[19198]: [DEBUG] profiling session reset 2023-05-27 19:50:00 +0800 CST
May 27 19:50:10 k8s-master kubelet[19198]: [DEBUG] uploading at http://192.168.44.162:4040/ingest?aggregationType=sum&from=1685188200&name=simple.golang.app.kubel
May 27 19:50:10 k8s-master kubelet[19198]: [DEBUG] content type: multipart/form-data; boundary=46197c9dadb630635e8dc78d650c3e2f956150ef05aa223d0c8bede9ebda
May 27 19:50:10 k8s-master kubelet[19198]: [DEBUG] uploading at http://192.168.44.162:4040/ingest?aggregationType=average&from=1685188200&name=simple.golang.app.k
May 27 19:50:10 k8s-master kubelet[19198]: [DEBUG] content type: multipart/form-data; boundary=0d4b217192ea0e78c0377f4957b757d66a05222f411fad43610e9e79530e
May 27 19:50:10 k8s-master kubelet[19198]: [DEBUG] uploading at http://192.168.44.162:4040/ingest?aggregationType=&from=1685188200&name=simple.golang.app.kubelet%
May 27 19:50:10 k8s-master kubelet[19198]: [DEBUG] content type: multipart/form-data; boundary=fff22283e1ebe9279a8e06b04db5808f7a7b34e6f3497036a356e63cea84
May 27 19:50:10 k8s-master kubelet[19198]: [DEBUG] uploading at http://192.168.44.162:4040/ingest?aggregationType=&from=1685188200&name=simple.golang.app.kubelet%
May 27 19:50:10 k8s-master kubelet[19198]: [DEBUG] content type: multipart/form-data; boundary=518b04c75315bed5d4a13211a6a3b0df40eebdd8b23a4b20e51eb077569b
May 27 19:50:10 k8s-master kubelet[19198]: [DEBUG] uploading at http://192.168.44.162:4040/ingest?aggregationType=&from=1685188200&name=simple.golang.app.kubelet%
May 27 19:50:10 k8s-master kubelet[19198]: [DEBUG] content type: multipart/form-data; boundary=ec3e5fdf7371b4b3348645e6a588a07048cd472b115705d82fe11a0857a8
May 27 19:50:14 k8s-master kubelet[19198]: I0527 19:50:14.096461 19198 topology_manager.go:219] [topologymanager] RemoveContainer - Container ID: 243197dd048b51
May 27 19:50:20 k8s-master kubelet[19198]: [DEBUG] profiling session reset 2023-05-27 19:50:10 +0800 CST
May 27 19:50:20 k8s-master kubelet[19198]: [DEBUG] uploading at http://192.168.44.162:4040/ingest?aggregationType=sum&from=1685188210&name=simple.golang.app.kubel
May 27 19:50:20 k8s-master kubelet[19198]: [DEBUG] content type: multipart/form-data; boundary=06cac7d52fc1f52fc568ca3f4223cab407b79cc03cc8a1cd79b09404ed13
May 27 19:50:20 k8s-master kubelet[19198]: [DEBUG] uploading at http://192.168.44.162:4040/ingest?aggregationType=average&from=1685188210&name=simple.golang.app.k
May 27 19:50:20 k8s-master kubelet[19198]: [DEBUG] content type: multipart/form-data; boundary=f299626a3a9ae9178ea3083d73e854ee05c26950a242c42d77ceeb77185c
May 27 19:50:20 k8s-master kubelet[19198]: [DEBUG] uploading at http://192.168.44.162:4040/ingest?aggregationType=&from=1685188210&name=simple.golang.app.kubelet%
May 27 19:50:20 k8s-master kubelet[19198]: [DEBUG] content type: multipart/form-data; boundary=50115ae9a619c7b905460d86af70f614c3a79140d6bb87a6e023a19397aa
May 27 19:50:20 k8s-master kubelet[19198]: [DEBUG] uploading at http://192.168.44.162:4040/ingest?aggregationType=&from=1685188210&name=simple.golang.app.kubelet%
May 27 19:50:20 k8s-master kubelet[19198]: [DEBUG] content type: multipart/form-data; boundary=40f684312ff3177d209b9ecccc42281ff69a36229c77fba7d4bb8c82edf1
May 27 19:50:20 k8s-master kubelet[19198]: [DEBUG] uploading at http://192.168.44.162:4040/ingest?aggregationType=&from=1685188210&name=simple.golang.app.kubelet%
May 27 19:50:20 k8s-master kubelet[19198]: [DEBUG] content type: multipart/form-data; boundary=d4ed70f389d1590b9add04bf34fd0f9c148491ab8ecb1bc075730abf5ef2
查看kubelet火焰图
查看application,有以下几块内容
- 若 gc 相关函数占用异常,可重点排查对象数量
- 解决速度问题(CPU占用)时,关注对象数量( –inuse/alloc_objects )指标
- 解决内存占用问题时,关注分配空间( –inuse/alloc_space )指标
看下cpu火焰图
目前从cpu火焰图可以看到*scanobject
和*prometheus
的cpu占大头
内存火焰图
代码的分析这里就不涉及了,博主则借助这个工具边学边看源码,工具还挺不错的,有兴趣的读者可自行学习,另外其他组件接入也类似。其实除了这个pyscope工具,博主还想看调用链来着,想接入skywalking去分析,后续有时间再折腾了~