Helm3安装ES集群7.13.2

Helm部署ES文档

安装helm3.5.2

各版本下载地址：https://github.com/helm/helm/releases

下载最新的helm二进制包

1	[root@k8s-01 ~]# wget https://get.helm.sh/helm-v3.5.2-linux-amd64.tar.gz

解压并移动二进制文件至可执行目录

[root@k8s-01 ~]# tar xzvf helm-v3.5.2-linux-amd64.tar.gz 
linux-amd64/
linux-amd64/helm
linux-amd64/LICENSE
linux-amd64/README.md

[root@k8s-01 ~]# chmod +x linux-amd64/helm 
[root@k8s-01 ~]# mv linux-amd64/helm /usr/bin/

验证安装

1 2	[root@k8s-01 ~]# helm version version.BuildInfo{Version:"v3.5.2", GitCommit:"167aac70832d3a384f65f9745335e9fb40169dc2", GitTreeState:"dirty", GoVersion:"go1.15.7"}

helm安装ES集群

ES官方helm文档地址：https://github.com/elastic/helm-charts/tree/master/elasticsearch

本文档中使用的helm：https://github.com/elastic/helm-charts/tree/7.13/elasticsearch

添加ES的helm仓库并查看helm的仓库列表

$ helm repo add elastic https://helm.elastic.co
"elastic" has been added to your repositories

$ helm repo list 
NAME   	URL                     
harbor 	https://helm.goharbor.io
elastic	https://helm.elastic.co

获取指定版本es的chart包

$ helm pull --version 7.13.2 elastic/elasticsearch

$ ls |grep ela
elasticsearch-7.13.2.tgz

解压Chart包并根据需要修改values.yaml文件

1
2
3

$ tar xzvf elasticsearch-7.13.2.tgz
$ cd elasticsearch-7.13.2
$ vim values.yaml

下面是我已经改好的values文件，仅供参考。

注意集群中一定要有一个默认的存储类，默认模板无法手动指定存储类名称。（有存储类没有设置默认请使用命令设置默认存储类，否则无法动态创建pv ）

---
# 集群通过helm删除后新集群使用原来的pvc挂载时，注意clusterName和nodeGroup的名称和被删除的一致，否则找不到磁盘无法恢复数据
clusterName: "elasticsearch"
nodeGroup: "master"

# 如果未修改clusterName和nodeGroup这里默认空即可
# 如果修改了clusterName和nodeGroup这里需要改成 "$clusterName-$masterNodeGroup"
masterService: ""

# 配置Pod镜像信息
image: "docker.elastic.co/elasticsearch/elasticsearch"
imageTag: "7.13.2"
imagePullPolicy: "IfNotPresent"
imagePullSecrets: []

# StatefulSet 的 副本数
replicas: 3

# 添加pod的注解信息
podAnnotations: {}
  # iam.amazonaws.com/role: es-cluster

# 为pod附加自定义标签
labels: {}

# Pod的优先级配置
priorityClassName: ""
# 是否为sts开启数据持久化的功能 
persistence:
  enabled: true
  labels:
    # Add default labels for the volumeClaimTemplate of the StatefulSet
    enabled: false
  annotations: {}

# 持久化存储动态存储配置,存储类名称一定要指定。
# 生产环境使用 “alicloud-disk-efficiency-retains” 存储类 > 当chart删除后数据保留，alicloud-disk-efficiency为chart删除数据一同删除
volumeClaimTemplate:
  accessModes: [ "ReadWriteOnce" ]
  storageClassName: "alicloud-disk-efficiency"
  resources:
    requests:
      storage: 20Gi

# 自定义配置JVM堆栈信息
esJavaOpts: "-Duser.timezone=GMT+08" # example: "-Xmx1g -Xms1g -Duser.timezone=GMT+08"

# Pod资源限制配置
resources:
  requests:
    cpu: "500m"
    memory: "1Gi"
  limits:
    cpu: "1000m"
    memory: "2Gi"

# 集群中角色相关的配置信息
roles:
  # 主节点负责集群范围内的元数据（即Cluster State）相关的操作，例如创建或删除索引，跟踪哪些节点是集群的一部分以及确定将哪些 shard 分配给哪些节点。 拥有稳定的主节点对于群集健康非常重要
  master: "true"
  # 堆栈监控节点
  ingest: "true"
  # 数据节点包含包含已建立索引的文档的分片。 数据节点处理与数据相关的操作，例如 CRUD，搜索和聚合
  data: "true"
  # 跨集群连接时需要用到的client节点
  remote_cluster_client: "true"
  # 机器学习节点提供了机器学习功能，该节点运行作业并处理机器学习 API 请求
  ml: "false"

service:
  # 为非无头服务svc添加自定义标签
  labels: {}
  # 为无头服务svc添加自定义标签
  labelsHeadless: {}
  # 配置service的类型
  type: NodePort
  # 如果type：nodePort那么我们可以手动指定节点端口号，为空则随机一个节点端口号
  nodePort: "32222"
  # 为服务的LoadBalancer添加注释。如果service.type是LoadBalancer，这将配置负载平衡器LoadBalancer
  annotations: {}
  # svc中 http 端口的名称
  httpPortName: http
  # svc中传输端口的名称
  transportPortName: transport
  # 一些云提供商允许您指定loadBalancer IP。如果loadBalancerIP未指定该字段，则动态分配 IP。如果您指定了loadBalancerIP但云提供商不支持该功能，则它会被忽略
  loadBalancerIP: ""
  loadBalancerSourceRanges: []
  # 一些云提供商允许您指定LoadBalancer externalTrafficPolicy。如果service.type是LoadBalancer将使用它来保留客户端源 IP。
  externalTrafficPolicy: ""

# 将用于就绪探测的协议
protocol: http
# 用于运行状况检查和服务的 http 端口。如果此参数修改，你还需要配置服务端口 http.port 在extraEnvs环境变量中。
httpPort: 9200
# 用于服务的传输端口。如果此参数修改，，你还需要配置传输端口 transport.port 在extraEnvs环境变量中
transportPort: 9300

# StatefulSet的pod更新策略。默认情况下会在升级每个 Pod 后等待集群变为绿色后再更新下一个。将此设置为OnDelete将允许您在升级期间手动删除每个pod
updateStrategy: RollingUpdate

# 最少master节点数量应该设置为(master_eligible_nodes / 2) + 1
minimumMasterNodes: 2

# 自定义 elasticsearch.yml 和 log4j2.properties 配置
esConfig: {}
#  elasticsearch.yml: |
#    path.repo: ["/tmp"]
#  log4j2.properties: |
#    key = value

# 注入自定义环境变量信息
extraEnvs: []
#  - name: MY_ENVIRONMENT_VAR
#    value: the_value_goes_here

# 通过secret或configMap映射加载环境变量
envFrom: []
# - secretRef:
#     name: env-secret
# - configMapRef:
#     name: config-map

# 挂载secret到pod中安装证书和其他secret
secretMounts: []
#  - name: elastic-certificates
#    secretName: elastic-certificates
#    path: /usr/share/elasticsearch/config/certs
#    defaultMode: 0755

# 向Pod中的 /etc/hosts 添加自定义条目
hostAliases: []
#- ip: "127.0.0.1"
#  hostnames:
#  - "foo.local"
#  - "bar.local"

# init初始化容器的资源限制
initResources: {}
  # limits:
  #   cpu: "25m"
  #   # memory: "128Mi"
  # requests:
  #   cpu: "25m"
  #   memory: "128Mi"

# 自定义配置es服务监听的地址
networkHost: "0.0.0.0"

# rbac配置如果create:true。则使用外部的sa， ServiceAccountserviceAccountName: "externalServiceAccountName"
rbac:
  create: false
  serviceAccountAnnotations: {}
  serviceAccountName: ""

# 是否创建具有最小权限的 pod 安全策略的配置，如果create: true则自动创建默认. 也可以create: true在name："externalPodSecurityPolicy"指定自定义的安全策略
podSecurityPolicy:
  create: false
  name: ""
  spec:
    privileged: true
    fsGroup:
      rule: RunAsAny
    runAsUser:
      rule: RunAsAny
    seLinux:
      rule: RunAsAny
    supplementalGroups:
      rule: RunAsAny
    volumes:
      - secret
      - configMap
      - persistentVolumeClaim
      - emptyDir

# 添加额外的Volumes模板配置
extraVolumes: # []
  - name: timezone
    hostPath:
      path: /etc/localtime

# 使用挂载额外的volumes配置
extraVolumeMounts: # []
  - name: timezone
    mountPath: /etc/localtime
    readOnly: true

# 附加额外的容器
extraContainers: []
  # - name: do-something
  #   image: busybox
  #   command: ['do', 'something']

# 附加初始化容器
extraInitContainers: []
  # - name: do-something
  #   image: busybox
  #   command: ['do', 'something']

# 配置pod的反亲和性规则，默认配置将使pod不会部署在同一个节点。
# By default this will make sure two pods don't end up on the same node
# Changing this to a region would allow you to spread pods across regions
antiAffinityTopologyKey: "kubernetes.io/hostname"

# 默认值为：hard 这意味着硬性强制反亲和规则，没有满足pod则不会创建
# 如果设置为：soft 意味着软性反亲和规则，尽可能根据反亲和规则不部署pod在同一个节点上
antiAffinity: "soft"

# 配置节点的亲和性规则
# https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#node-affinity-beta-feature
nodeAffinity: {}

# 默认设置是串行部署所有pod（有序启动-有序删除）。通过将此设置为“平行”，所有Pod将在pod启动时一次性全部启动
podManagementPolicy: "Parallel"

# 服务链接注入的环境变量不被使用，但是当
# 当前命名空间中有许多服务。
# 如果你经历了缓慢的pod启动，你可能想把它设置为“false”。
enableServiceLinks: true

# 这是pod中断预算的最大不可用设置
# 默认值为1 将确保pod中断不允许超过1
# pod在维护期间的最大不可用数量
maxUnavailable: 1

# 为 pod设置securityContext
podSecurityContext:
  fsGroup: 1000
  runAsUser: 1000

# 为容器设置securityContext
securityContext:
  capabilities:
    drop:
    - ALL
  # readOnlyRootFilesystem: true
  runAsNonRoot: true
  runAsUser: 1000

# pod优雅关闭的等待时间，超出将强制干掉pod
terminationGracePeriod: 120

# 设置Elasticsearch 所需的sysctl vm.max_map_count
sysctlVmMaxMapCount: 262144

# 就绪检查配置参数
readinessProbe:
  # 检查失败后尝试的次数
  failureThreshold: 3
  # 容器启动后多少秒开始进行检查
  initialDelaySeconds: 10
  # 两次健康检查的间隔时间
  periodSeconds: 10
  # 失败后检查成功的连续次数
  successThreshold: 3
  # 健康检查超时秒数
  timeoutSeconds: 5

# 集群健康检查的接口参数 GET /_cluster/health?wait_for_status=yellow&timeout=50s
# https://www.elastic.co/guide/en/elasticsearch/reference/7.13/cluster-health.html#request-params wait_for_status
clusterHealthCheckParams: "wait_for_status=green&timeout=1s"

# 使用的自定义调度器名称
schedulerName: ""
# 选择指定节点启动pod
nodeSelector: {}
# 节点污点容忍配置
tolerations: []

# 通过ingress暴露集群
ingress:
  enabled: false
  annotations: {}
    # kubernetes.io/ingress.class: nginx
    # kubernetes.io/tls-acme: "true"
  hosts:
    - host: chart-example.local
      paths:
        - path: /
  tls: []
  #  - secretName: chart-example-tls
  #    hosts:
  #      - chart-example.local

#  指定pod启动前执行的钩子 和pod停止前执行的钩子
lifecycle: {}
  # preStop:
  #   exec:
  #     command: ["/bin/sh", "-c", "echo Hello from the postStart handler > /usr/share/message"]
  # postStart:
  #   exec:
  #     command:
  #       - bash
  #       - -c
  #       - |
  #         #!/bin/bash
  #         # Add a template to adjust number of shards/replicas
  #         TEMPLATE_NAME=my_template
  #         INDEX_PATTERN="logstash-*"
  #         SHARD_COUNT=8
  #         REPLICA_COUNT=1
  #         ES_URL=http://localhost:9200
  #         while [[ "$(curl -s -o /dev/null -w '%{http_code}\n' $ES_URL)" != "200" ]]; do sleep 1; done
  #         curl -XPUT "$ES_URL/_template/$TEMPLATE_NAME" -H 'Content-Type: application/json' -d'{"index_patterns":['\""$INDEX_PATTERN"\"'],"settings":{"number_of_shards":'$SHARD_COUNT',"number_of_replicas":'$REPLICA_COUNT'}}'

# 如果使用另一种方法设置sysctl vm.max_map_count，则允许禁用
sysctlInitContainer:
  enabled: true

# 自定义秘钥库
keystore: []

# 配置网络策略
networkPolicy:
  ## Enable creation of NetworkPolicy resources. Only Ingress traffic is filtered for now.
  ## In order for a Pod to access Elasticsearch, it needs to have the following label:
  ## {{ template "uname" . }}-client: "true"
  ## Example for default configuration to access HTTP port:
  ## elasticsearch-master-http-client: "true"
  ## Example for default configuration to access transport port:
  ## elasticsearch-master-transport-client: "true"

  http:
    enabled: false
    ## if explicitNamespacesSelector is not set or set to {}, only client Pods being in the networkPolicy's namespace
    ## and matching all criteria can reach the DB.
    ## But sometimes, we want the Pods to be accessible to clients from other namespaces, in this case, we can use this
    ## parameter to select these namespaces
    ##
    # explicitNamespacesSelector:
    #   # Accept from namespaces with all those different rules (only from whitelisted Pods)
    #   matchLabels:
    #     role: frontend
    #   matchExpressions:
    #     - {key: role, operator: In, values: [frontend]}

    ## Additional NetworkPolicy Ingress "from" rules to set. Note that all rules are OR-ed.
    ##
    # additionalRules:
    #   - podSelector:
    #       matchLabels:
    #         role: frontend
    #   - podSelector:
    #       matchExpressions:
    #         - key: role
    #           operator: In
    #           values:
    #             - frontend

  transport:
    ## Note that all Elasticsearch Pods can talks to themselves using transport port even if enabled.
    enabled: false
    # explicitNamespacesSelector:
    #   matchLabels:
    #     role: frontend
    #   matchExpressions:
    #     - {key: role, operator: In, values: [frontend]}
    # additionalRules:
    #   - podSelector:
    #       matchLabels:
    #         role: frontend
    #   - podSelector:
    #       matchExpressions:
    #         - key: role
    #           operator: In
    #           values:
    #             - frontend

使用helm install安装es集群到pika名称空间

1	$ helm install elastic-m4nyhd -n pika ./

查看helm安装状态

$ kubectl get all -n pika
NAME                         READY   STATUS    RESTARTS   AGE
pod/elasticsearch-master-0   1/1     Running   0          10m
pod/elasticsearch-master-1   1/1     Running   0          11m
pod/elasticsearch-master-2   1/1     Running   0          12m

NAME                                    TYPE        CLUSTER-IP     EXTERNAL-IP   PORT(S)                         AGE
service/elasticsearch-master            NodePort    172.16.46.13   <none>        9200:32222/TCP,9300:31061/TCP   52m
service/elasticsearch-master-headless   ClusterIP   None           <none>        9200/TCP,9300/TCP               52m

NAME                                    READY   AGE
statefulset.apps/elasticsearch-master   3/3     52m


$ helm list -n pika 
NAME          	NAMESPACE	REVISION	UPDATED                                	STATUS  	CHART              	APP VERSION
elastic-m4nyhd	pika     	2       	2021-07-02 15:31:59.950721086 +0800 CST	deployed	elasticsearch-0.0.6	7.13.2

使用浏览器访问NodeIP:32222即可访问到集群

ES集群的数据迁移

elasticsearch-dump

项目地址：https://github.com/taskrabbit/elasticsearch-dump
开发语言：JavaScript
部署方式：npm　安装，docker部署
端口：9200 支持夸集群索引复制、支持生成dump文件

集群A数据迁移至集群B

注意提前在目标集群创建好索引以及分片和字段

1 2	# 迁移数据 elasticdump --input=http://192.168.201.81:9200/patent --output=http://192.168.203.25:32222/patent --type=data --limit=10000 --noRefresh

参数介绍