kubernetes配置filebeat+kafka+logstash+elasticksearch+kibana

时间:Jan. 24, 2019 分类:

目录:

kafka

下载配置

$ git clone https://github.com/ramhiser/kafka-kubernetes.git
$ cd kafka-kubernetes/
$ ll
total 24
-rw-r--r-- 1 root root  672 Jan 18 16:34 kafka-cluster.yaml
-rw-r--r-- 1 root root  223 Jan 18 16:34 kafka-service.yaml
-rw-r--r-- 1 root root  101 Jan 18 16:34 namespace-kafka.yaml
-rw-r--r-- 1 root root 3955 Jan 18 16:34 README.md
-rw-r--r-- 1 root root 1942 Jan 18 16:34 zookeeper-cluster.yaml
-rw-r--r-- 1 root root  856 Jan 18 16:34 zookeeper-services.yaml

zk

$ kubectl apply -f zookeeper-cluster.yaml 
$ kubectl apply -f zookeeper-services.yaml

原理解读

这边直接创建了三个Deployment和三个Service,理论上可以通过StatefulSet完成的,官网上有关通过StatefulSet创建zookeeper集群

对应zk容器上的配置文件

bash-4.4# cat /opt/zookeeper/conf/zoo.cfg
tickTime=2000
initLimit=10
syncLimit=5
dataDir=/var/lib/zookeeper
clientPort=2181
maxClientCnxns=60
autopurge.snapRetainCount=3
autopurge.purgeInterval=0
server.1=zoo1:2888:3888
server.3=zoo3:2888:3888
server.2=0.0.0.0:2888:3888
bash-4.4#  cat /var/lib/zookeeper/myid 
2

可以看一下docker启动下的配置

[root@node-01 ~]# docker run -it -d --name look-zk digitalwonderland/zookeeper
d8db7f8d2b3928cbda36e5c21303fd52c7441047f00c587e5dae6081c5e24462
[root@node-01 ~]# docker exec -it look-zk /bin/bash
bash-4.4# cat /opt/zookeeper/conf/zoo.cfg 
tickTime=2000
initLimit=10
syncLimit=5
dataDir=/var/lib/zookeeper
clientPort=2181
maxClientCnxns=60
autopurge.snapRetainCount=3
autopurge.purgeInterval=0

这边启动的时候通过的sh脚本

bash /usr/local/sbin/start.sh

查看一下是通过shell脚本生成的,根据Deployment传递进来的环境变量

#! /usr/bin/env bash

# Fail hard and fast
set -eo pipefail

ZOOKEEPER_ID=${ZOOKEEPER_ID:-1}
echo "ZOOKEEPER_ID=$ZOOKEEPER_ID"

echo $ZOOKEEPER_ID > /var/lib/zookeeper/myid

ZOOKEEPER_TICK_TIME=${ZOOKEEPER_TICK_TIME:-2000}
echo "tickTime=${ZOOKEEPER_TICK_TIME}" > /opt/zookeeper/conf/zoo.cfg
echo "tickTime=${ZOOKEEPER_TICK_TIME}"

ZOOKEEPER_INIT_LIMIT=${ZOOKEEPER_INIT_LIMIT:-10}
echo "initLimit=${ZOOKEEPER_INIT_LIMIT}" >> /opt/zookeeper/conf/zoo.cfg
echo "initLimit=${ZOOKEEPER_INIT_LIMIT}"

ZOOKEEPER_SYNC_LIMIT=${ZOOKEEPER_SYNC_LIMIT:-5}
echo "syncLimit=${ZOOKEEPER_SYNC_LIMIT}" >> /opt/zookeeper/conf/zoo.cfg
echo "syncLimit=${ZOOKEEPER_SYNC_LIMIT}"

echo "dataDir=/var/lib/zookeeper" >> /opt/zookeeper/conf/zoo.cfg
echo "clientPort=2181" >> /opt/zookeeper/conf/zoo.cfg

ZOOKEEPER_CLIENT_CNXNS=${ZOOKEEPER_CLIENT_CNXNS:-60}
echo "maxClientCnxns=${ZOOKEEPER_CLIENT_CNXNS}" >> /opt/zookeeper/conf/zoo.cfg
echo "maxClientCnxns=${ZOOKEEPER_CLIENT_CNXNS}"

ZOOKEEPER_AUTOPURGE_SNAP_RETAIN_COUNT=${ZOOKEEPER_AUTOPURGE_SNAP_RETAIN_COUNT:-3}
echo "autopurge.snapRetainCount=${ZOOKEEPER_AUTOPURGE_SNAP_RETAIN_COUNT}" >> /opt/zookeeper/conf/zoo.cfg
echo "autopurge.snapRetainCount=${ZOOKEEPER_AUTOPURGE_SNAP_RETAIN_COUNT}"

ZOOKEEPER_AUTOPURGE_PURGE_INTERVAL=${ZOOKEEPER_AUTOPURGE_PURGE_INTERVAL:-0}
echo "autopurge.purgeInterval=${ZOOKEEPER_AUTOPURGE_PURGE_INTERVAL}" >> /opt/zookeeper/conf/zoo.cfg
echo "autopurge.purgeInterval=${ZOOKEEPER_AUTOPURGE_PURGE_INTERVAL}"

for VAR in `env`
do
  if [[ $VAR =~ ^ZOOKEEPER_SERVER_[0-9]+= ]]; then
    SERVER_ID=`echo "$VAR" | sed -r "s/ZOOKEEPER_SERVER_(.*)=.*/\1/"`
    SERVER_IP=`echo "$VAR" | sed 's/.*=//'`
    if [ "${SERVER_ID}" = "${ZOOKEEPER_ID}" ]; then
      echo "server.${SERVER_ID}=0.0.0.0:2888:3888" >> /opt/zookeeper/conf/zoo.cfg
      echo "server.${SERVER_ID}=0.0.0.0:2888:3888"
    else
      echo "server.${SERVER_ID}=${SERVER_IP}:2888:3888" >> /opt/zookeeper/conf/zoo.cfg
      echo "server.${SERVER_ID}=${SERVER_IP}:2888:3888"
    fi
  fi
done

su zookeeper -s /bin/bash -c "/opt/zookeeper/bin/zkServer.sh start-foreground"

查看一下环境变量

kubectl exec -it zookeeper-deployment-3-6b65cf9778-hndtv  /bin/bash
bash-4.4# echo $ZOOKEEPER_ID
3

检查集群数据一致性

$ kubectl exec -it zookeeper-deployment-3-6b65cf9778-hndtv  /opt/zookeeper/bin/zkCli.sh ls /
$ kubectl exec -it zookeeper-deployment-3-6b65cf9778-hndtv  /opt/zookeeper/bin/zkCli.sh create /why 123
$ kubectl exec -it zookeeper-deployment-3-6b65cf9778-hndtv  /opt/zookeeper/bin/zkCli.sh get /why
$ kubectl exec -it zookeeper-deployment-2-6fcf5fd55b-kpj45  /opt/zookeeper/bin/zkCli.sh get /why
$ kubectl exec -it zookeeper-deployment-1-689c56fdc4-l9h29  /opt/zookeeper/bin/zkCli.sh get /why

数据持久化的问题

数据持久化的问题,zk的数据是存储在内存,磁盘数据用于数据恢复,防止所有节点同时挂掉导致数据丢失

测试删除一个节点

$ kubectl delete pod zookeeper-deployment-1-689c56fdc4-l9h29
pod "zookeeper-deployment-1-689c56fdc4-l9h29" deleted
$ kubectl exec -it zookeeper-deployment-1-689c56fdc4-4zhsg /opt/zookeeper/bin/zkCli.sh  get /why

WATCHER::

WatchedEvent state:SyncConnected type:None path:null
123
cZxid = 0x100000008
ctime = Fri Jan 18 10:33:43 GMT 2019
mZxid = 0x100000008
mtime = Fri Jan 18 10:33:43 GMT 2019
pZxid = 0x100000008
cversion = 0
dataVersion = 0
aclVersion = 0
ephemeralOwner = 0x0
dataLength = 3
numChildren = 0

测试删除多个节点

$ kubectl delete pod zookeeper-deployment-1-689c56fdc4-4zhsg  zookeeper-deployment-2-6fcf5fd55b-kpj45  zookeeper-deployment-3-6b65cf9778-hndtv
pod "zookeeper-deployment-1-689c56fdc4-4zhsg" deleted
pod "zookeeper-deployment-2-6fcf5fd55b-kpj45" deleted
pod "zookeeper-deployment-3-6b65cf9778-hndtv" deleted
$ kubectl exec -it zookeeper-deployment-1-689c56fdc4-6828b /opt/zookeeper/bin/zkCli.sh  get /why

WATCHER::

WatchedEvent state:SyncConnected type:None path:null
Node does not exist: /why

安装kafka


在kafka-deployment-1创建topic

$ kubectl exec -it kafka-deployment-1-76bf8fcdc8-n7vzg /bin/bash
bash-4.4# kafka-topics.sh --create --zookeeper zoo1:2181,zoo2:2181,zoo3:2181 --replication-factor 1 --partitions 3 --topic test001
Created topic "test001".

partition可以理解为一条信息可以被重复消费几次

在kafka-deployment-2查看topic

$ kubectl exec -it kafka-deployment-2-78d9f7fd56-prtnx /bin/bash
bash-4.4# kafka-topics.sh --list --zookeeper zoo1:2181,zoo2:2181,zoo3:2181
__consumer_offsets
test001

可以看到创建的topic,在对应目录下也能看到相关的东西

bash-4.4# ls /kafka/kafka-logs-kafka-deployment-2-78d9f7fd56-prtnx/test001-1/
00000000000000000000.index      00000000000000000000.log        00000000000000000000.timeindex  leader-epoch-checkpoint

在kafka-deployment-1生产消息

bash-4.4# kafka-console-producer.sh --broker-list kafka-service-1:9092 --topic test001
>message001   
>message002

在kafka-deployment-2获取消息

bash-4.4# kafka-console-consumer.sh --bootstrap-server kafka-service-2:9092 --topic test001 --from-beginning
message001
message002

filebeat

apiVersion: extensions/v1beta1
kind: Deployment
metadata:
  name: filebeat-test
  namespace: default
spec:
  replicas: 2
  template:
    metadata:
      labels:
        k8s-app: filebeat-test
    spec:
      containers:
      - image: docker.elastic.co/beats/filebeat:5.4.0
        name: filebeat
        volumeMounts:
        - name: app-logs
          mountPath: /log
        - name: filebeat-config
          mountPath: /usr/share/filebeat/filebeat.yml
          subPath: filebeat.yml
      volumes:
      - name: app-logs
        emptyDir: {}
      - name: filebeat-config
        configMap:
          name: filebeat-config
          items:
          - key: filebeat.yml
            path: filebeat.yml
---
apiVersion: v1
kind: ConfigMap
metadata:
  name: filebeat-config
data:
  filebeat.yml: |
    filebeat.prospectors:
    - input_type: log
      paths:
        - "/log/*"
    output.kafka:
      hosts: ["kafka-service-1:9092","kafka-service-2:9092","kafka-service-3:9092"]
      topic: "test002"
      required_acks: 1
      compression: gzip

filebeat写入日志并传输

$ kubectl exec -it filebeat-test-97d659dd9-gjvd4  /bin/sh
sh-4.2$ echo "1" >> /log/test.log
sh-4.2$ cat /usr/share/filebeat/data/registry 
[{"source":"/log/test.log","offset":2,"FileStateOS":{"inode":659120,"device":64769},"timestamp":"2019-01-21T09:40:02.575213743Z","ttl":-1}]
sh-4.2$ echo "2" >> /log/test.log
sh-4.2$ cat /usr/share/filebeat/data/registry 
[{"source":"/log/test.log","offset":2,"FileStateOS":{"inode":659120,"device":64769},"timestamp":"2019-01-21T09:40:02.575213743Z","ttl":-1}]
sh-4.2$ cat /log/test.log
1
2
sh-4.2$ cat /usr/share/filebeat/data/registry 
[{"source":"/log/test.log","offset":4,"FileStateOS":{"inode":659120,"device":64769},"timestamp":"2019-01-21T09:42:17.149325425Z","ttl":-1}]

kafka读取数据测试

$ kubectl exec -it kafka-deployment-1-76bf8fcdc8-td6n5 /bin/bash
bash-4.4# kafka-topics.sh --list --zookeeper zoo1:2181,zoo2:2181,zoo3:2181
__consumer_offsets
mytopic
test001
test002
bash-4.4# kafka-console-consumer.sh --bootstrap-server kafka-service-3:9092 --topic test002 --from-beginning
{"@timestamp":"2019-01-21T09:40:02.143Z","beat":{"hostname":"filebeat-test-97d659dd9-gjvd4","name":"filebeat-test-97d659dd9-gjvd4","version":"5.4.0"},"input_type":"log","message":"1","offset":2,"source":"/log/test.log","type":"log"}
{"@timestamp":"2019-01-21T09:42:17.145Z","beat":{"hostname":"filebeat-test-97d659dd9-gjvd4","name":"filebeat-test-97d659dd9-gjvd4","version":"5.4.0"},"input_type":"log","message":"2","offset":4,"source":"/log/test.log","type":"log"}

http://blog.51cto.com/seekerwolf/2106509

elastsearch

$ git clone  https://github.com/liukuan73/kubernetes-addons.git
$ cd kubernetes-addons/log/elasticsearch/
$ vi use-glusterfs/es-data-stateful.yaml
      volumes:
      - name: elasticsearch-data
        emptyDir: {}
$ kubectl apply -f .
$ kubectl apply -f use-glusterfs/es-data-stateful.yaml
kubectl get svc -n kube-system  elasticsearch
NAME            TYPE       CLUSTER-IP       EXTERNAL-IP   PORT(S)          AGE
elasticsearch   NodePort   10.111.235.252   <none>        9200:30020/TCP   91m
$ curl 10.111.235.252:9200/_cluster/health 2>/dev/null | jq .
{
  "cluster_name": "myesdb",
  "status": "green",
  "timed_out": false,
  "number_of_nodes": 7,
  "number_of_data_nodes": 2,
  "active_primary_shards": 0,
  "active_shards": 0,
  "relocating_shards": 0,
  "initializing_shards": 0,
  "unassigned_shards": 0,
  "delayed_unassigned_shards": 0,
  "number_of_pending_tasks": 0,
  "number_of_in_flight_fetch": 0,
  "task_max_waiting_in_queue_millis": 0,
  "active_shards_percent_as_number": 100
}
$ curl -XPUT 'http://10.111.235.252:9200/megacorp/employee/1' -H 'Content-type: application/json;charset=utf-8' -d '{"first_name" : "John", "last_name" : "Smith", "age" : 25, "about" : "I love to go rock climbing", "interests": [ "sports", "music" ]}'
{"_index":"megacorp","_type":"employee","_id":"1","_version":1,"result":"created","_shards":{"total":2,"successful":2,"failed":0},"_seq_no":0,"_primary_term":1}
$ curl -XGET 'http://10.111.235.252:9200/megacorp/employee/1'
{"_index":"megacorp","_type":"employee","_id":"1","_version":1,"found":true,"_source":{"first_name" : "John", "last_name" : "Smith", "age" : 25, "about" : "I love to go rock climbing", "interests": [ "sports", "music" ]}}

以下配置

      initContainers:
      - name: init-sysctl
        image: busybox:1.27.2
        command:
        - sysctl
        - -w
        - vm.max_map_count=262144
        securityContext:
          privileged: true

初始化镜像设定命名空间的内核参数,securityContext解决的配置不成功的问题,使当前配置只对pod生效,privileged运行特权容器

$ docker run -it --rm busybox:1.27.2 sh
/ # sysctl -w vm.max_map_count=262144
sysctl: error setting key 'vm.max_map_count': Read-only file system

es优化

kibana

apiVersion: extensions/v1beta1
kind: Deployment
metadata:
  name: kibana
  namespace: kube-system
  labels:
    app: kibana
spec:
  replicas: 1
  template:
    metadata:
      name: kibana
      labels:
        app: kibana
    spec:
      containers:
      - name: kibana
        image: docker.elastic.co/kibana/kibana-oss:6.2.2
        imagePullPolicy: IfNotPresent
        ports:
        - name: webui
          containerPort: 5601
        volumeMounts:
        - name: config-volume
          mountPath: /usr/share/kibana/config/
      volumes:
      - name: config-volume
        configMap:
          name: kibana
---
apiVersion: v1
kind: Service
metadata:
  name: logging-kibana
  namespace: kube-system
  labels:
    app: kibana
spec:
  type: NodePort
  ports:
    - name: webui
      port: 5601
      targetPort: 5601
      protocol: TCP
      nodePort: 30021
  selector:
    app: kibana
---
apiVersion: v1
kind: ConfigMap
metadata:
  name: kibana
  namespace: kube-system
data:
  kibana.yml: |
  # 
  server.host: "0.0.0.0"
  elasticsearch.url: "http://elasticsearch:9200"

可以直接通过30021端口访问kibana

logstash

配置可以参考

input {
    kafka {
        consumer_restart_on_error => true
        consumer_threads => 2
        group_id => "elk"
        topic_id => "test002"
        type => "application-log"
        zk_connect => "zoo1:2181"
        }
    }

filter {
        if [type] == "application-log" {
            ruby{
                    init=> "@kname = ['host', 'level','logId','business','c_time','http_host','user_agent','request_uri','post','client_ip','local_ip','func','user_id','msg']"
                    code=> "event.append(Hash[@kname.zip(event['message'].split('-==-'))])"
                }
            mutate {
                remove_field => [ "message" ]
            }
        }
}

output {
    if [type] == "shop-waiter-application-log" {
        elasticsearch {
            action => "index"
            flush_size => 100
            hosts => ["elasticsearch"] # qbj3-op-es
            idle_flush_time => 1
            index => "application-log-%{+YYYY.MM.dd}"
            manage_template => true
            retry_max_interval => 2
            timeout => 2
            workers => 20
            template => "/op/op-logstash/conf/.template/shop-waiter-v2.json"
            template_name => "application-log"
            template_overwrite => true
        }
    }
}

如何配置docker的logstash

默认读取的/usr/share/logstash/pipeline/logstash.conf

kind: Deployment
apiVersion: apps/v1beta2
metadata:
  labels:
    app: logstash
  name: logstash
spec:
  replicas: 1
  revisionHistoryLimit: 10
  selector:
    matchLabels:
      app: logstash
  template:
    metadata:
      labels:
        app: logstash
    spec:
      containers:
        - name: logstash
          image: docker.elastic.co/logstash/logstash:6.5.4
      volumes:
        - name logstash-conf
          mountPath: /usr/share/logstash/pipline/
---
apiVersion: v1
kind: ConfigMap
metadata:
  name: logstash
data:
  logstash.conf: |
  #
  # 

更多的配置可以参考