Helm部署ES文档

安装helm3.5.2

各版本下载地址:https://github.com/helm/helm/releases

下载最新的helm二进制包

1
[root@k8s-01 ~]# wget https://get.helm.sh/helm-v3.5.2-linux-amd64.tar.gz

解压并移动二进制文件至可执行目录

1
2
3
4
5
6
7
8
[root@k8s-01 ~]# tar xzvf helm-v3.5.2-linux-amd64.tar.gz 
linux-amd64/
linux-amd64/helm
linux-amd64/LICENSE
linux-amd64/README.md

[root@k8s-01 ~]# chmod +x linux-amd64/helm
[root@k8s-01 ~]# mv linux-amd64/helm /usr/bin/

验证安装

1
2
[root@k8s-01 ~]# helm  version 
version.BuildInfo{Version:"v3.5.2", GitCommit:"167aac70832d3a384f65f9745335e9fb40169dc2", GitTreeState:"dirty", GoVersion:"go1.15.7"}

helm安装ES集群

ES官方helm文档地址:https://github.com/elastic/helm-charts/tree/master/elasticsearch

本文档中使用的helm:https://github.com/elastic/helm-charts/tree/7.13/elasticsearch

添加ES的helm仓库并查看helm的仓库列表

1
2
3
4
5
6
7
$ helm repo add elastic https://helm.elastic.co
"elastic" has been added to your repositories

$ helm repo list
NAME URL
harbor https://helm.goharbor.io
elastic https://helm.elastic.co

获取指定版本es的chart包

1
2
3
4
$ helm pull --version 7.13.2 elastic/elasticsearch

$ ls |grep ela
elasticsearch-7.13.2.tgz

解压Chart包并根据需要修改values.yaml文件

1
2
3
$ tar xzvf elasticsearch-7.13.2.tgz
$ cd elasticsearch-7.13.2
$ vim values.yaml

下面是我已经改好的values文件,仅供参考。

注意集群中一定要有一个默认的存储类,默认模板无法手动指定存储类名称。(有存储类没有设置默认请使用命令设置默认存储类,否则无法动态创建pv )

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
---
# 集群通过helm删除后新集群使用原来的pvc挂载时,注意clusterName和nodeGroup的名称和被删除的一致,否则找不到磁盘无法恢复数据
clusterName: "elasticsearch"
nodeGroup: "master"

# 如果未修改clusterName和nodeGroup这里默认空即可
# 如果修改了clusterName和nodeGroup这里需要改成 "$clusterName-$masterNodeGroup"
masterService: ""

# 配置Pod镜像信息
image: "docker.elastic.co/elasticsearch/elasticsearch"
imageTag: "7.13.2"
imagePullPolicy: "IfNotPresent"
imagePullSecrets: []

# StatefulSet 的 副本数
replicas: 3

# 添加pod的注解信息
podAnnotations: {}
# iam.amazonaws.com/role: es-cluster

# 为pod附加自定义标签
labels: {}

# Pod的优先级配置
priorityClassName: ""
# 是否为sts开启数据持久化的功能
persistence:
enabled: true
labels:
# Add default labels for the volumeClaimTemplate of the StatefulSet
enabled: false
annotations: {}

# 持久化存储动态存储配置,存储类名称一定要指定。
# 生产环境使用 “alicloud-disk-efficiency-retains” 存储类 > 当chart删除后数据保留,alicloud-disk-efficiency为chart删除数据一同删除
volumeClaimTemplate:
accessModes: [ "ReadWriteOnce" ]
storageClassName: "alicloud-disk-efficiency"
resources:
requests:
storage: 20Gi

# 自定义配置JVM堆栈信息
esJavaOpts: "-Duser.timezone=GMT+08" # example: "-Xmx1g -Xms1g -Duser.timezone=GMT+08"

# Pod资源限制配置
resources:
requests:
cpu: "500m"
memory: "1Gi"
limits:
cpu: "1000m"
memory: "2Gi"

# 集群中角色相关的配置信息
roles:
# 主节点负责集群范围内的元数据(即Cluster State)相关的操作,例如创建或删除索引,跟踪哪些节点是集群的一部分以及确定将哪些 shard 分配给哪些节点。 拥有稳定的主节点对于群集健康非常重要
master: "true"
# 堆栈监控节点
ingest: "true"
# 数据节点包含包含已建立索引的文档的分片。 数据节点处理与数据相关的操作,例如 CRUD,搜索和聚合
data: "true"
# 跨集群连接时需要用到的client节点
remote_cluster_client: "true"
# 机器学习节点提供了机器学习功能,该节点运行作业并处理机器学习 API 请求
ml: "false"

service:
# 为非无头服务svc添加自定义标签
labels: {}
# 为无头服务svc添加自定义标签
labelsHeadless: {}
# 配置service的类型
type: NodePort
# 如果type:nodePort那么我们可以手动指定节点端口号,为空则随机一个节点端口号
nodePort: "32222"
# 为服务的LoadBalancer添加注释。如果service.type是LoadBalancer,这将配置负载平衡器LoadBalancer
annotations: {}
# svc中 http 端口的名称
httpPortName: http
# svc中传输端口的名称
transportPortName: transport
# 一些云提供商允许您指定loadBalancer IP。如果loadBalancerIP未指定该字段,则动态分配 IP。如果您指定了loadBalancerIP但云提供商不支持该功能,则它会被忽略
loadBalancerIP: ""
loadBalancerSourceRanges: []
# 一些云提供商允许您指定LoadBalancer externalTrafficPolicy。如果service.type是LoadBalancer将使用它来保留客户端源 IP。
externalTrafficPolicy: ""

# 将用于就绪探测的协议
protocol: http
# 用于运行状况检查和服务的 http 端口。如果此参数修改,你还需要配置服务端口 http.port 在extraEnvs环境变量中。
httpPort: 9200
# 用于服务的传输端口。如果此参数修改,,你还需要配置传输端口 transport.port 在extraEnvs环境变量中
transportPort: 9300

# StatefulSet的pod更新策略。默认情况下会在升级每个 Pod 后等待集群变为绿色后再更新下一个。将此设置为OnDelete将允许您在升级期间手动删除每个pod
updateStrategy: RollingUpdate

# 最少master节点数量应该设置为(master_eligible_nodes / 2) + 1
minimumMasterNodes: 2

# 自定义 elasticsearch.yml 和 log4j2.properties 配置
esConfig: {}
# elasticsearch.yml: |
# path.repo: ["/tmp"]
# log4j2.properties: |
# key = value

# 注入自定义环境变量信息
extraEnvs: []
# - name: MY_ENVIRONMENT_VAR
# value: the_value_goes_here

# 通过secret或configMap映射加载环境变量
envFrom: []
# - secretRef:
# name: env-secret
# - configMapRef:
# name: config-map

# 挂载secret到pod中安装证书和其他secret
secretMounts: []
# - name: elastic-certificates
# secretName: elastic-certificates
# path: /usr/share/elasticsearch/config/certs
# defaultMode: 0755

# 向Pod中的 /etc/hosts 添加自定义条目
hostAliases: []
#- ip: "127.0.0.1"
# hostnames:
# - "foo.local"
# - "bar.local"

# init初始化容器的资源限制
initResources: {}
# limits:
# cpu: "25m"
# # memory: "128Mi"
# requests:
# cpu: "25m"
# memory: "128Mi"

# 自定义配置es服务监听的地址
networkHost: "0.0.0.0"

# rbac配置如果create:true。则使用外部的sa, ServiceAccountserviceAccountName: "externalServiceAccountName"
rbac:
create: false
serviceAccountAnnotations: {}
serviceAccountName: ""

# 是否创建具有最小权限的 pod 安全策略的配置,如果create: true则自动创建默认. 也可以create: true在name:"externalPodSecurityPolicy"指定自定义的安全策略
podSecurityPolicy:
create: false
name: ""
spec:
privileged: true
fsGroup:
rule: RunAsAny
runAsUser:
rule: RunAsAny
seLinux:
rule: RunAsAny
supplementalGroups:
rule: RunAsAny
volumes:
- secret
- configMap
- persistentVolumeClaim
- emptyDir

# 添加额外的Volumes模板配置
extraVolumes: # []
- name: timezone
hostPath:
path: /etc/localtime

# 使用挂载额外的volumes配置
extraVolumeMounts: # []
- name: timezone
mountPath: /etc/localtime
readOnly: true

# 附加额外的容器
extraContainers: []
# - name: do-something
# image: busybox
# command: ['do', 'something']

# 附加初始化容器
extraInitContainers: []
# - name: do-something
# image: busybox
# command: ['do', 'something']

# 配置pod的反亲和性规则,默认配置将使pod不会部署在同一个节点。
# By default this will make sure two pods don't end up on the same node
# Changing this to a region would allow you to spread pods across regions
antiAffinityTopologyKey: "kubernetes.io/hostname"

# 默认值为:hard 这意味着硬性强制反亲和规则,没有满足pod则不会创建
# 如果设置为:soft 意味着软性反亲和规则,尽可能根据反亲和规则不部署pod在同一个节点上
antiAffinity: "soft"

# 配置节点的亲和性规则
# https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#node-affinity-beta-feature
nodeAffinity: {}

# 默认设置是串行部署所有pod(有序启动-有序删除)。通过将此设置为“平行”,所有Pod将在pod启动时一次性全部启动
podManagementPolicy: "Parallel"

# 服务链接注入的环境变量不被使用,但是当
# 当前命名空间中有许多服务。
# 如果你经历了缓慢的pod启动,你可能想把它设置为“false”。
enableServiceLinks: true

# 这是pod中断预算的最大不可用设置
# 默认值为1 将确保pod中断不允许超过1
# pod在维护期间的最大不可用数量
maxUnavailable: 1

# 为 pod设置securityContext
podSecurityContext:
fsGroup: 1000
runAsUser: 1000

# 为容器设置securityContext
securityContext:
capabilities:
drop:
- ALL
# readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000

# pod优雅关闭的等待时间,超出将强制干掉pod
terminationGracePeriod: 120

# 设置Elasticsearch 所需的sysctl vm.max_map_count
sysctlVmMaxMapCount: 262144

# 就绪检查配置参数
readinessProbe:
# 检查失败后尝试的次数
failureThreshold: 3
# 容器启动后多少秒开始进行检查
initialDelaySeconds: 10
# 两次健康检查的间隔时间
periodSeconds: 10
# 失败后检查成功的连续次数
successThreshold: 3
# 健康检查超时秒数
timeoutSeconds: 5

# 集群健康检查的接口参数 GET /_cluster/health?wait_for_status=yellow&timeout=50s
# https://www.elastic.co/guide/en/elasticsearch/reference/7.13/cluster-health.html#request-params wait_for_status
clusterHealthCheckParams: "wait_for_status=green&timeout=1s"

# 使用的自定义调度器名称
schedulerName: ""
# 选择指定节点启动pod
nodeSelector: {}
# 节点污点容忍配置
tolerations: []

# 通过ingress暴露集群
ingress:
enabled: false
annotations: {}
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: "true"
hosts:
- host: chart-example.local
paths:
- path: /
tls: []
# - secretName: chart-example-tls
# hosts:
# - chart-example.local

# 指定pod启动前执行的钩子 和pod停止前执行的钩子
lifecycle: {}
# preStop:
# exec:
# command: ["/bin/sh", "-c", "echo Hello from the postStart handler > /usr/share/message"]
# postStart:
# exec:
# command:
# - bash
# - -c
# - |
# #!/bin/bash
# # Add a template to adjust number of shards/replicas
# TEMPLATE_NAME=my_template
# INDEX_PATTERN="logstash-*"
# SHARD_COUNT=8
# REPLICA_COUNT=1
# ES_URL=http://localhost:9200
# while [[ "$(curl -s -o /dev/null -w '%{http_code}\n' $ES_URL)" != "200" ]]; do sleep 1; done
# curl -XPUT "$ES_URL/_template/$TEMPLATE_NAME" -H 'Content-Type: application/json' -d'{"index_patterns":['\""$INDEX_PATTERN"\"'],"settings":{"number_of_shards":'$SHARD_COUNT',"number_of_replicas":'$REPLICA_COUNT'}}'

# 如果使用另一种方法设置sysctl vm.max_map_count,则允许禁用
sysctlInitContainer:
enabled: true

# 自定义秘钥库
keystore: []

# 配置网络策略
networkPolicy:
## Enable creation of NetworkPolicy resources. Only Ingress traffic is filtered for now.
## In order for a Pod to access Elasticsearch, it needs to have the following label:
## {{ template "uname" . }}-client: "true"
## Example for default configuration to access HTTP port:
## elasticsearch-master-http-client: "true"
## Example for default configuration to access transport port:
## elasticsearch-master-transport-client: "true"

http:
enabled: false
## if explicitNamespacesSelector is not set or set to {}, only client Pods being in the networkPolicy's namespace
## and matching all criteria can reach the DB.
## But sometimes, we want the Pods to be accessible to clients from other namespaces, in this case, we can use this
## parameter to select these namespaces
##
# explicitNamespacesSelector:
# # Accept from namespaces with all those different rules (only from whitelisted Pods)
# matchLabels:
# role: frontend
# matchExpressions:
# - {key: role, operator: In, values: [frontend]}

## Additional NetworkPolicy Ingress "from" rules to set. Note that all rules are OR-ed.
##
# additionalRules:
# - podSelector:
# matchLabels:
# role: frontend
# - podSelector:
# matchExpressions:
# - key: role
# operator: In
# values:
# - frontend

transport:
## Note that all Elasticsearch Pods can talks to themselves using transport port even if enabled.
enabled: false
# explicitNamespacesSelector:
# matchLabels:
# role: frontend
# matchExpressions:
# - {key: role, operator: In, values: [frontend]}
# additionalRules:
# - podSelector:
# matchLabels:
# role: frontend
# - podSelector:
# matchExpressions:
# - key: role
# operator: In
# values:
# - frontend

使用helm install安装es集群到pika名称空间

1
$ helm install elastic-m4nyhd  -n pika  ./

查看helm安装状态

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
$ kubectl get all -n pika
NAME READY STATUS RESTARTS AGE
pod/elasticsearch-master-0 1/1 Running 0 10m
pod/elasticsearch-master-1 1/1 Running 0 11m
pod/elasticsearch-master-2 1/1 Running 0 12m

NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
service/elasticsearch-master NodePort 172.16.46.13 <none> 9200:32222/TCP,9300:31061/TCP 52m
service/elasticsearch-master-headless ClusterIP None <none> 9200/TCP,9300/TCP 52m

NAME READY AGE
statefulset.apps/elasticsearch-master 3/3 52m


$ helm list -n pika
NAME NAMESPACE REVISION UPDATED STATUS CHART APP VERSION
elastic-m4nyhd pika 2 2021-07-02 15:31:59.950721086 +0800 CST deployed elasticsearch-0.0.6 7.13.2

使用浏览器访问NodeIP:32222即可访问到集群

ES集群的数据迁移

elasticsearch-dump

集群A数据迁移至集群B

注意提前在目标集群创建好索引以及分片和字段

1
2
# 迁移数据
elasticdump --input=http://192.168.201.81:9200/patent --output=http://192.168.203.25:32222/patent --type=data --limit=10000 --noRefresh

参数介绍

  • –input:源es集群地址 patent为源索引名称
  • –output:目标es集群地址 patent为源索引名称
  • –type=data:为只迁移数据
  • –limit=10000:代表每秒迁移多少行数据(最大貌似是10000)
  • –noRefresh:为不自动刷新,不然每次迁移10000行会自动刷新一次,贼慢。

其他参数看GitHub介绍。