why's blog

kubernetes hpa

时间：Jan. 3, 2021 分类：容器

hpa配置

证书

cat << EOF > custom-metrics-apiserver-csr.json
{
  "CN": "custom-metrics-apiserver",
  "hosts": [
      "custom-metrics-apiserver.monitoring.svc"
  ],
  "key": {
    "algo": "rsa",
    "size": 2048
  },
  "names": [
    {
      "C": "CN",
      "L": "Beijing",
      "ST": "Beijing"
    }
  ]
}
EOF
sudo wget -O /bin/cfssl https://pkg.cfssl.org/R1.2/cfssl_linux-amd64
sudo wget -O /bin/cfssljson https://pkg.cfssl.org/R1.2/cfssljson_linux-amd64
sudo wget -O /bin/cfssl-certinfo  https://pkg.cfssl.org/R1.2/cfssl-certinfo_linux-amd64
for cfssl in `ls /bin/cfssl*`;do sudo chmod +x $cfssl;done;
sudo cfssl gencert -ca=/etc/kubernetes/pki/ca.crt -ca-key=/etc/kubernetes/pki/ca.key -config=ca-config.json -profile=kubernetes custom-metrics-apiserver-csr.json | cfssljson -bare custom-metrics-apiserver
kubectl create secret generic cm-adapter-serving-certs --from-file=serving.crt=./custom-metrics-apiserver.pem --from-file=serving.key=./custom-metrics-apiserver-key.pem -n monitoring

服务配置

---
kind: ServiceAccount
apiVersion: v1
metadata:
  name: custom-metrics-apiserver
  namespace: monitoring
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
  name: custom-metrics:system:auth-delegator
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: system:auth-delegator
subjects:
- kind: ServiceAccount
  name: custom-metrics-apiserver
  namespace: monitoring
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRole
metadata:
  name: custom-metrics-resource-reader
rules:
- apiGroups:
  - ""
  resources:
  - namespaces
  - pods
  - services
  - nodes
  verbs:
  - get
  - list
  - watch
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
  name: custom-metrics-resource-reader
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: custom-metrics-resource-reader
subjects:
- kind: ServiceAccount
  name: custom-metrics-apiserver
  namespace: monitoring
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: RoleBinding
metadata:
  name: custom-metrics-auth-reader
  namespace: kube-system
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: Role
  name: extension-apiserver-authentication-reader
subjects:
- kind: ServiceAccount
  name: custom-metrics-apiserver
  namespace: monitoring
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRole
metadata:
  name: custom-metrics-server-resources
rules:
- apiGroups:
  - custom.metrics.k8s.io
  resources: ["*"]
  verbs: ["*"]
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
  name: hpa-controller-custom-metrics
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: custom-metrics-server-resources
subjects:
- kind: ServiceAccount
  name: horizontal-pod-autoscaler
  namespace: kube-system
---
apiVersion: apiregistration.k8s.io/v1beta1
kind: APIService
metadata:
  name: v1beta1.custom.metrics.k8s.io
spec:
  service:
    name: custom-metrics-apiserver
    namespace: monitoring
  group: custom.metrics.k8s.io
  version: v1beta1
  insecureSkipTLSVerify: true
  groupPriorityMinimum: 100
  versionPriority: 100
---
apiVersion: v1
kind: Service
metadata:
  name: custom-metrics-apiserver
  namespace: monitoring
spec:
  ports:
  - port: 443
    targetPort: 443
  selector:
    app: custom-metrics-apiserver
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: custom-metrics-apiserver
  namespace: monitoring
  labels:
    app: custom-metrics-apiserver
spec:
  replicas: 1
  selector:
    matchLabels:
      app: custom-metrics-apiserver
  template:
    metadata:
      labels:
        app: custom-metrics-apiserver
    spec:
      tolerations:
      - key: beta.kubernetes.io/arch
        value: arm
        effect: NoSchedule
      - key: beta.kubernetes.io/arch
        value: arm64
        effect: NoSchedule
      serviceAccountName: custom-metrics-apiserver
      containers:
      - name: custom-metrics-server
        image: hub.sfjswl.com/op/k8s-prometheus-adapter:1.0.0.3
        args:
        - /adapter
        - --secure-port=443      
        - --prometheus-url=http://prometheus-mirror-num0.monitoring.svc:9090
        - --tls-cert-file=/var/run/serving-cert/serving.crt
        - --tls-private-key-file=/var/run/serving-cert/serving.key
        - --config=/etc/adapter/config.yaml
        - --metrics-relist-interval=30s
        - --v=10
        - --logtostderr=true
        ports:
        - containerPort: 443
        securityContext:
          runAsUser: 0
        volumeMounts:
        - mountPath: /etc/adapter/
          name: config
          readOnly: true
        - mountPath: /var/run/serving-cert
          name: volume-serving-cert
          readOnly: true
      volumes:
      - name: config
        configMap:
          name: adapter-config
      - name: volume-serving-cert
        secret:
          secretName: cm-adapter-serving-certs

config配置

apiVersion: v1
kind: ConfigMap
metadata:
  name: adapter-config
  namespace: monitoring
data:
  config.yaml: |
    rules:
    - seriesQuery: '{__name__="container_cpu_usage_seconds_total",metrics_storage="m3db_remote",cluster="mirror"}'
      seriesFilters: []
      resources:
        template: <<.Resource>>
      name:
        matches: 'container_cpu_usage_seconds_total'
        as: 'container_cpu_usage_seconds_total'
      metricsQuery: sum(rate(<<.Series>>{<<.LabelMatchers>>}[1m])) by (<<.GroupBy>>)
    - seriesQuery: '{__name__="Container_Cpu_Used_Percent",metrics_storage="m3db_remote",cluster="mirror"}'
      seriesFilters: []
      resources:
        template: <<.Resource>>
      name:
        matches: "Container_Cpu_Used_Percent"
        as: "Container_Cpu_Used_Percent"
      metricsQuery: <<.Series>>{<<.LabelMatchers>>}

Prometheus-Adapter规则,大致可以分为以下几个部分：

seriesQuery：查询 Prometheus 的语句，通过这个查询语句查询到的所有指标都可以用于 HPA
seriesFilters：查询到的指标可能会存在不需要的，可以通过它过滤掉。
resources：通过 seriesQuery 查询到的只是指标，如果需要查询某个 Pod 的指标，肯定要将它的名称和所在的命名空间作为指标的标签进行查询，resources 就是将指标的标签和 k8s 的资源类型关联起来，最常用的就是 pod 和 namespace。有两种添加标签的方式，一种是 overrides，另一种是 template。
overrides：它会将指标中的标签和 k8s 资源关联起来。上面示例中就是将指标中的 pod 和 namespace 标签和 k8s 中的 pod 和 namespace 关联起来，因为 pod 和 namespace 都属于核心 api 组，所以不需要指定 api 组。当我们查询某个 pod 的指标时，它会自动将 pod 的名称和名称空间作为标签加入到查询条件中。比如 pod: {group: "apps", resource: "deployment"} 这么写表示的就是将指标中 podinfo 这个标签和 apps 这个 api 组中的 deployment 资源关联起来；
template：通过 go 模板的形式。比如template: "kube_<<.Group>>_<<.Resource>>" 这么写表示，假如 <<.Group>> 为 apps，<<.Resource>> 为 deployment，那么它就是将指标中 kube_apps_deployment 标签和 deployment 资源关联起来。
name：用来给指标重命名的，之所以要给指标重命名是因为有些指标是只增的，比如以 total 结尾的指标。这些指标拿来做 HPA 是没有意义的，我们一般计算它的速率，以速率作为值，那么此时的名称就不能以 total 结尾了，所以要进行重命名。
matches：通过正则表达式来匹配指标名，可以进行分组
as：默认值为 $1，也就是第一个分组。as 为空就是使用默认值的意思。
metricsQuery：这就是 Prometheus 的查询语句了，前面的 seriesQuery 查询是获得 HPA 指标。当我们要查某个指标的值时就要通过它指定的查询语句进行了。可以看到查询语句使用了速率和分组，这就是解决上面提到的只增指标的问题。
Series：表示指标名称
LabelMatchers：附加的标签，目前只有 pod 和 namespace 两种，因此我们要在之前使用 resources 进行关联
GroupBy：就是 pod 名称，同样需要使用 resources 进行关联。

部分参数

--horizontal-pod-autoscaler-sync-period Pod的扩容时间默认为15s
--horizontal-pod-autoscaler-initial-readiness-delay Pod的扩容就绪时间默认为30s，在此期间都是ok的
--horizontal-pod-autoscaler-cpu-initialization-period Pod的初始化时间默认为5m，在这个期间资源的度量值不会被采纳

另外autoscaling/v2beta2可以指定多个指标，使用最大的扩容策略

服务测试

---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: hpa-test
  namespace: monitoring
  labels:
    app: hpa-test
spec:
  replicas: 1
  selector:
    matchLabels:
      app: hpa-test
  template:
    metadata:
      labels:
        app: hpa-test
    spec:
      tolerations:
      - key: beta.kubernetes.io/arch
        value: arm
        effect: NoSchedule
      - key: beta.kubernetes.io/arch
        value: arm64
        effect: NoSchedule
      containers:
      - name: hpa-test
        image: hub.sfjswl.com/op/burning-cpu:1.0.0.1
        env:
        - name: cpuNum
          value: "2"
---
kind: HorizontalPodAutoscaler
apiVersion: autoscaling/v2beta1
metadata:
  name: hpa-test
  namespace: monitoring
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: hpa-test
  minReplicas: 1
  maxReplicas: 3
  metrics:
  - type: Pods
    pods:
      metricName: Container_Cpu_Used_Percent
      targetAverageValue: 90000m

问题

起因是HPA配置不能正常的获取到数据

查看的接口

kubectl get --raw "/apis/metrics.k8s.io/v1beta1/pods" | jq .
kubectl get --raw "/apis/custom.metrics.k8s.io/v1beta1"  | jq .  | grep cpu
kubectl get --raw "/apis/authorization.k8s.io/v1beta1/"  | jq .

这里是等价于

Token=`kubectl get secret -n kube-system horizontal-pod-autoscaler-token-p8mwl -o jsonpath={.data.token} | base64 -d`
curl -k -H "Authorization: Bearer $Token" https://127.0.0.1:6443/apis/custom.metrics.k8s.io/v1beta1/namespaces/monitoring/pods/*/kube_pod_container_resource_limits_memory_bytes

查看了一下hpa获取数据的接口

10.210.108.200 - - [07/Dec/2020:11:17:28 +0800] "GET /api/v1/series?match%5B%5D=%7B__name__%3D~%22%5Ehttp_requests_.%2A%22%2Ckubernetes_pod_name%21%3D%22%22%2Ckubernetes_namespace%21%3D%22%22%7D&start=1607309848.31 HTTP/1.1" 404 3650 "-" "Go-http-client/1.1" "-"
10.210.108.200 - - [07/Dec/2020:11:17:28 +0800] "GET /api/v1/series?match%5B%5D=%7Bnamespace%21%3D%22%22%2C__name__%21~%22%5Econtainer_.%2A%22%7D&start=1607309848.31 HTTP/1.1" 404 3650 "-" "Go-http-client/1.1" "-"
10.210.108.200 - - [07/Dec/2020:11:17:28 +0800] "GET /api/v1/series?match%5B%5D=%7B__name__%3D~%22%5Econtainer_.%2A%22%2Ccontainer_name%21%3D%22POD%22%2Cnamespace%21%3D%22%22%2Cpod_name%21%3D%22%22%7D&start=1607309848.31 HTTP/1.1" 404 3650 "-" "Go-http-client/1.1" "-"

使用的是一个名为series的接口，和我们之前query的接口不一样

curl "10.94.78.39:9090/api/v1/series?match%5B%5D=%7Bnamespace%21%3D%22%22%2C__name__%21~%22%5Econtainer_.%2A%22%7D&start=1607309668.298" | jq . | wc -l

等价于下边的

curl "http://10.210.108.200:30004/api/v1/series?" --data-urlencode 'match[]={namespace!="",__name__!~"^container_.*"}'

这个接口是能正常获取到数据的，但是只能获取到本地的数据，并不能获取到远端存储的数据，导致由于我们拆分称为2个prometheus收集一个集群数据，在这种查询的时候数据是不全的

解决

series的api文档参考https://prometheus.io/docs/prometheus/latest/querying/api/#finding-series-by-label-matchers

获取是查询的本地的存储，不会去查询远端的存储，导致目前我们的使用metrics_storage="m3db_remote"的label从远程存储不能实现，并且因为分成了两个prometheus查询本地只能获取到一个prometheus的监控数据，就会缺失

查询series接口数据

$ curl "http://10.210.108.200:30004/api/v1/series?" --data-urlencode 'match[]=Container_Cpu_Used_Percent'
{"status":"success","data":[]}
$ curl "http://10.210.108.200:30004/api/v1/series?" --data-urlencode 'match[]=container_cpu_usage_seconds_total'
{
  "status": "success",
  "data": [
    {
      "__name__": "container_cpu_usage_seconds_total",
      "beta_kubernetes_io_arch": "amd64",
      "beta_kubernetes_io_os": "linux",
      "cluster": "mirror",
      "container": "POD",
      "cpu": "total",
      "id": "/kubepods.slice/kubepods-besteffort.slice/kubepods-besteffort-pod245fd31e_a0ac_48ee_9603_0954babfeba2.slice/docker-ec72dd6a034049b1e1030f67ebd9cec25af177bbc3c837d00253179c4552f3c0.scope",
      "image": "registry.aliyuncs.com/google_containers/pause:3.2",
      "instance": "shbx-cvm-mirrork8s-master01.shbx.sfjswl.com",
      "job": "kubernetes-cadvisor",
      "kubernetes_io_arch": "amd64",
      "kubernetes_io_hostname": "shbx-cvm-mirrork8s-master01.shbx.sfjswl.com",
      "kubernetes_io_os": "linux",
      "name": "k8s_POD_cattle-cluster-agent-749f57f95d-ftlpv_cattle-system_245fd31e-a0ac-48ee-9603-0954babfeba2_0",
      "namespace": "cattle-system",
      "pod": "cattle-cluster-agent-749f57f95d-ftlpv"
    },
    ...
}

可选改造方案

prometheus-url参数传入两个url，在请求的时候进行两个url进行数据请求，拿到结果取并集
将series改为query获取数据

方案1不能使用远端查询，所以选择方案2

query数据

{
    "status": "success",
    "data": {
        "resultType": "vector",
        "result": [{
            "metric": {
                "__name__": "container_cpu_usage_seconds_total",
                "beta_kubernetes_io_arch": "amd64",
                "beta_kubernetes_io_os": "linux",
                "cluster": "bx",
                "container": "POD",
                "cpu": "total",
                "id": "/kubepods.slice/kubepods-besteffort.slice/kubepods-besteffort-pod0cd15b1a_dd51_417f_a0bf_da1cab752494.slice/docker-67327916a34b7c094a24195eb0da7b6cdab6866b3e158054c13d212aa0a60098.scope",
                "image": "registry.aliyuncs.com/google_containers/pause:3.2",
                "instance": "shbx-cvm-k8s-master02.shbx.sfjswl.com",
                "job": "kubernetes-cadvisor",
                "kubernetes_io_arch": "amd64",
                "kubernetes_io_hostname": "shbx-cvm-k8s-master02.shbx.sfjswl.com",
                "kubernetes_io_os": "linux",
                "metrics_storage": "m3db_remote",
                "name": "k8s_POD_kube-proxy-zvml9_kube-system_0cd15b1a-dd51-417f-a0bf-da1cab752494_0",
                "namespace": "kube-system",
                "pod": "kube-proxy-zvml9"
            },
            "value": [1607416531.04, "0.009208282"]
        }, {
            "metric": {
                "__name__": "container_cpu_usage_seconds_total",
                "beta_kubernetes_io_arch": "amd64",
                "beta_kubernetes_io_os": "linux",
                "cluster": "bx",
                "container": "POD",
                "cpu": "total",
                "id": "/kubepods.slice/kubepods-besteffort.slice/kubepods-besteffort-pod285256b1_6590_48b9_94dc_ea4cd947bd2f.slice/docker-52e0a5794b84b966567255ebd172b15ac810eaf0cc10ef2b11bb9cfea982f8fd.scope",
                "image": "registry.aliyuncs.com/google_containers/pause:3.2",
                "instance": "shbx-cvm-k8s-master01.shbx.sfjswl.com",
                "job": "kubernetes-cadvisor",
                "kubernetes_io_arch": "amd64",
                "kubernetes_io_hostname": "shbx-cvm-k8s-master01.shbx.sfjswl.com",
                "kubernetes_io_os": "linux",
                "metrics_storage": "m3db_remote",
                "name": "k8s_POD_cattle-node-agent-4lvnt_cattle-system_285256b1-6590-48b9-94dc-ea4cd947bd2f_0",
                "namespace": "cattle-system",
                "pod": "cattle-node-agent-4lvnt"
            },
            "value": [1607416531.04, "0.014198291"]
        }
}

query的data.result下的metric内容和series的data内容是一致的，这边采用了修改pkg/client/api.go的Series方法的方式

func (h *queryClient) Series(ctx context.Context, interval model.Interval, selectors ...Selector) ([]Series, error) {

    type ResultData struct {
        Metrics Series `json:"metric"`
        //Values string `json:"value"`
    }
    type ResData struct {
        Type   model.ValueType `json:"resultType"`
        Result []ResultData    `json:"result"`
    }

    var seriesRes []Series
    var err error

    for _, selector := range selectors {
        vals := url.Values{}
        vals.Set("query", string(selector))
        vals.Set("time", strconv.FormatInt(time.Now().Unix(), 10))
        if timeout, hasTimeout := timeoutFromContext(ctx); hasTimeout {
            vals.Set("timeout", model.Duration(timeout).String())
        }
        res, err := h.api.Do(ctx, "GET", queryURL, vals)
        if err != nil {
            return nil, err
        }
        resData := ResData{}
        err = json.Unmarshal(res.Data, &resData)
        if err != nil {
            return nil, err
        }
        for _, resultData := range resData.Result {
            seriesRes = append(seriesRes, resultData.Metrics)
        }
    }

    return seriesRes, err
}

构建

go get
GOOS=linux GOARCH=amd64 go build cmd/adapter/adapter.go

再次部署查询

kubectl get --raw "/apis/custom.metrics.k8s.io/v1beta1"  | jq .  | grep cpu
      "name": "jobs.batch/container_cpu_usage_seconds_total",
      "name": "namespaces/container_cpu_usage_seconds_total",
      "name": "pods/container_cpu_usage_seconds_total",

参考

https://v1-18.docs.kubernetes.io/zh/docs/tasks/run-application/horizontal-pod-autoscale/#support-for-metrics-apis
https://github.com/kubernetes/metrics/blob/master/IMPLEMENTATIONS.md#custom-metrics-api
https://github.com/DirectXMan12/k8s-prometheus-adapter/blob/master/pkg/client/api.go

火眼征信大数据工程师闫大佬