# 集群部署
作者:青牛踏雪御苍穹(CoDo老王)
- 部署 ETCD 集群(自签证书)
# 准备工作
操作系统:Ubuntu 18.04.3 LTS (本地环境) 集群部署(自签证书)
自行打通服务器之间的SSH免密,或者自己输入密码来完成复制操作。
# 配置环境变量
ETCD_DIR='${ETCD_DIR}'
mkddir ${ETCD_DIR}
cat > ${ETCD_DIR}/environment.sh <<EOF
#!/usr/bin/bash
# etcd 版本
export ETCD_VERSION='3.4.14'
# 集群节点
export ETCD0='192.168.1.220'
export ETCD1='192.168.1.221'
export ETCD2='192.168.1.222'
# ETCD集群各机器 IP 数组
export NODE_IPS=(${ETCD0} ${ETCD1} ${ETCD2})
# 集群各 IP 对应的主机名数组
export NODE_NAMES=(etcd-0 etcd-1 etcd-2)
# etcd 集群服务地址列表
export ETCD_ENDPOINTS="https://${ETCD0}:2379,https://${ETCD1}:2379,https://${ETCD2}:2379"
# etcd 集群间通信的 IP 和端口
export ETCD_NODES="etcd-0=https://${ETCD0}:2380,etcd-1=https://${ETCD1}:2380,etcd-2=https://${ETCD2}:2380"
# etcd 目录
export ETCD_DIR="/data/etcd"
# etcd 数据目录
export ETCD_DATA_DIR="/data/etcd/data"
# etcd WAL 目录,建议是 SSD 磁盘分区,或者和 ETCD_DATA_DIR 不同的磁盘分区
export ETCD_WAL_DIR="/data/etcd/wal"
# etcd 自签证书存放目录
export ETCD_CA_DIR='/data/etcd/ca'
# etcd 各个节点证书存放目录
export ETCD_ETC_DIR='/etc/etcd/cert'
EOF
chmod +x environment.sh
# 创建存放数据、证书、WAl目录
mkdir ${ETCD_DATA_DIR} ${ETCD_CA_DIR} ${ETCD_WAL_DIR}
配置本地hosts
# 配置本地host
source ${ETCD_DIR}/environment.sh
cat >> /etc/hosts <<EOF
${ETCD0} etcd-1
${ETCD1} etcd-2
${ETCD2} etcd-3
EOF
# 下载和分发 etcd 二进制文件
release (opens new window) 查找自己需要的版本。
source ${ETCD_DIR}/environment.sh
cd ${ETCD_DIR}
wget https://github.com/coreos/etcd/releases/download/v${ETCD_VERSION}/etcd-v${ETCD_VERSION}-linux-amd64.tar.gz
tar xvf etcd-v${ETCD_VERSION}-linux-amd64.tar.gz
# 分发二进制文件到各个主机
cd ${ETCD_DIR}
source ${ETCD_DIR}/environment.sh
for node_ip in ${NODE_IPS[@]}
do
echo ">>> ${node_ip}"
scp -r /data/etcd/etcd-v${ETCD_VERSION}-linux-amd64/etcd* root@${node_ip}:/usr/local/bin/
ssh root@${node_ip} "chmod +x /usr/local/bin/etcd*"
done
# 输出结果
>>> 192.168.1.220
etcd 100% 23MB 51.1MB/s 00:00
etcdctl 100% 17MB 44.5MB/s 00:00
>>> 192.168.1.221
etcd 100% 23MB 39.1MB/s 00:00
etcdctl 100% 17MB 34.4MB/s 00:00
>>> 192.168.1.222
etcd 100% 23MB 52.0MB/s 00:00
etcdctl 100% 17MB 45.8MB/s 00:00
# 安装 cfssl 工具集
# 安装cfssl
cd ${ETCD_DIR}
wget https://github.com/cloudflare/cfssl/releases/download/v1.4.1/cfssl_1.4.1_linux_amd64
mv cfssl_1.4.1_linux_amd64 /usr/local/bin/cfssl
# cfssljson
wget https://github.com/cloudflare/cfssl/releases/download/v1.4.1/cfssljson_1.4.1_linux_amd64
mv cfssljson_1.4.1_linux_amd64 /usr/local/bin/cfssljson
# cfssl-certinfo
wget https://github.com/cloudflare/cfssl/releases/download/v1.4.1/cfssl-certinfo_1.4.1_linux_amd64
mv cfssl-certinfo_1.4.1_linux_amd64 /usr/local/bin/cfssl-certinfo
# 添加执行权限
chmod +x /usr/local/bin/cfssl*
# 创建证书
CA 配置文件用于配置根证书的使用场景 (profile) 和具体参数 (usage,过期时间、服务端认证、客户端认证、加密等):
cd ${ETCD_CA_DIR}
cat > ca-config.json <<EOF
{
"signing": {
"default": {
"expiry": "87600h"
},
"profiles": {
"etcd-cluster": {
"usages": [
"signing",
"key encipherment",
"server auth",
"client auth"
],
"expiry": "876000h"
}
}
}
}
EOF
创建证书签名请求文件
cat > ca-csr.json <<EOF
{
"CN": "kubernetes-ca",
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"ST": "BeiJing",
"L": "BeiJing",
"O": "etcd",
"OU": "opsnull"
}
],
"ca": {
"expiry": "876000h"
}
}
EOF
signing
:表示该证书可用于签名其它证书(生成的ca.pem
证书中CA=TRUE
);server auth
:表示 client 可以用该该证书对 server 提供的证书进行验证;client auth
:表示 server 可以用该该证书对 client 提供的证书进行验证;"expiry": "876000h"
:证书有效期设置为 100 年;
# 创建证书签名请求文件
cat > ca-csr.json <<EOF
{
"CN": "kubernetes-ca",
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"ST": "BeiJing",
"L": "BeiJing",
"O": "etcd",
"OU": "opsnull"
}
],
"ca": {
"expiry": "876000h"
}
}
EOF
CN:Common Name
:etcd 从证书中提取该字段作为请求的用户名 (User Name),浏览器使用该字段验证网站是否合法;O:Organization
:etcd 从证书中提取该字段作为请求用户所属的组 (Group); kube-apiserver 将提取的User、Group
作为RBAC
授权的用户标识;
# 生成证书文件和私钥
cd ${ETCD_CA_DIR}
cfssl gencert -initca ca-csr.json | cfssljson -bare ca
输出结果:
2020/12/17 09:54:25 [INFO] generating a new CA key and certificate from CSR
2020/12/17 09:54:25 [INFO] generate received request
2020/12/17 09:54:25 [INFO] received CSR
2020/12/17 09:54:25 [INFO] generating key: rsa-2048
2020/12/17 09:54:26 [INFO] encoded CSR
2020/12/17 09:54:26 [INFO] signed certificate with serial number 437386317969465523284385845194090172491501571515
$ ls ca*
# 输出结果:
ca-config.json ca.csr ca-csr.json ca-key.pem ca.pem
# 分发证书文件
source ${ETCD_DIR}/environment.sh
for node_ip in ${NODE_IPS[@]}
do
echo ">>> ${node_ip}"
ssh root@${node_ip} "mkdir -p ${ETCD_ETC_DIR}"
scp ca*.pem ca-config.json root@${node_ip}:${ETCD_ETC_DIR}
done
# 输出结果:
>>> 192.168.1.220
ca-key.pem 100% 1679 1.2MB/s 00:00
ca.pem 100% 1326 1.2MB/s 00:00
ca-config.json 100% 293 221.9KB/s 00:00
>>> 192.168.1.221
ca-key.pem 100% 1679 1.1MB/s 00:00
ca.pem 100% 1326 1.2MB/s 00:00
ca-config.json 100% 293 180.2KB/s 00:00
>>> 192.168.1.222
ca-key.pem 100% 1679 973.5KB/s 00:00
ca.pem 100% 1326 972.0KB/s 00:00
ca-config.json
注意:
- 不同证书 csr 文件的 CN、C、ST、L、O、OU 组合必须不同,否则可能出现 PEER'S CERTIFICATE HAS AN INVALID SIGNATURE 错误;
- 后续创建证书的 csr 文件时,CN 都不相同(C、ST、L、O、OU 相同),以达到区分的目的;
etcd-csr证书签名
hosts
:指定授权使用该证书的 etcd 节点 IP 列表,需要将 etcd 集群所有节点 IP 都列在其中
cd ${ETCD_CA_DIR}
cat > /data/etcd/ca/etcd-csr.json <<EOF
{
"CN": "etcd",
"hosts": [
"127.0.0.1",
"${ETCD0}",
"${ETCD1}",
"${ETCD2}"
],
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"ST": "BeiJing",
"L": "BeiJing",
"O": "etcd",
"OU": "opsnull"
}
]
}
EOF
生成证书和私钥:
cfssl gencert -ca=${ETCD_CA_DIR}/ca.pem \
-ca-key=${ETCD_CA_DIR}/ca-key.pem \
-config=${ETCD_CA_DIR}/ca-config.json \
-profile=etcd-cluster ${ETCD_CA_DIR}/etcd-csr.json | cfssljson -bare etcd
# 输出结果:
2020/12/17 10:03:52 [INFO] generate received request
2020/12/17 10:03:52 [INFO] received CSR
2020/12/17 10:03:52 [INFO] generating key: rsa-2048
2020/12/17 10:03:53 [INFO] encoded CSR
2020/12/17 10:03:53 [INFO] signed certificate with serial number 290666497635689555552442344139619956527112356386
# 分发证书
cd ${ETCD_CA_DIR}
for node_ip in ${NODE_IPS[@]}
do
echo ">>> ${node_ip}"
scp etcd*.pem root@${node_ip}:${ETCD_ETC_DIR}
done
# 输出结果
>>> 192.168.1.220
etcd-key.pem 100% 1679 279.6KB/s 00:00
etcd.pem 100% 1448 1.3MB/s 00:00
>>> 192.168.1.221
etcd-key.pem 100% 1679 763.6KB/s 00:00
etcd.pem 100% 1448 1.0MB/s 00:00
>>> 192.168.1.222
etcd-key.pem 100% 1679 1.0MB/s 00:00
etcd.pem
# 创建 etcd 的 systemd unit 模板文件
模板不需要做任何改动,所有需要替换的地方后续都会替换掉:
cd ${ETCD_DIR}
source ${ETCD_DIR}/environment.sh
cat > etcd.service.template <<EOF
[Unit]
Description=Etcd Server
After=network.target
After=network-online.target
Wants=network-online.target
Documentation=https://github.com/coreos
[Service]
Type=notify
WorkingDirectory=${ETCD_DATA_DIR}
ExecStart=/usr/local/bin/etcd \\
--data-dir=${ETCD_DATA_DIR} \\
--wal-dir=${ETCD_WAL_DIR} \\
--name=##NODE_NAME## \\
--cert-file=${ETCD_ETC_DIR}/etcd.pem \\
--key-file=${ETCD_ETC_DIR}/etcd-key.pem \\
--trusted-ca-file=${ETCD_ETC_DIR}/ca.pem \\
--peer-cert-file=${ETCD_ETC_DIR}/etcd.pem \\
--peer-key-file=${ETCD_ETC_DIR}/etcd-key.pem \\
--peer-trusted-ca-file=${ETCD_ETC_DIR}/ca.pem \\
--peer-client-cert-auth \\
--client-cert-auth \\
--listen-peer-urls=https://##NODE_IP##:2380 \\
--initial-advertise-peer-urls=https://##NODE_IP##:2380 \\
--listen-client-urls=https://##NODE_IP##:2379,http://127.0.0.1:2379 \\
--advertise-client-urls=https://##NODE_IP##:2379 \\
--initial-cluster-token=etcd-cluster-0 \\
--initial-cluster=${ETCD_NODES} \\
--initial-cluster-state=new \\
--auto-compaction-mode=periodic \\
--auto-compaction-retention=1 \\
--max-request-bytes=33554432 \\
--quota-backend-bytes=6442450944 \\
--heartbeat-interval=250 \\
--election-timeout=2000
Restart=on-failure
RestartSec=5
LimitNOFILE=65536
[Install]
WantedBy=multi-user.target
EOF
参数说明:
WorkingDirectory
、--data-dir
:指定工作目录和数据目录为${ETCD_DATA_DIR}
,需在启动服务前创建这个目录;--wal-dir
:指定 wal 目录,为了提高性能,一般使用 SSD 或者和--data-dir
不同的磁盘;--name
:指定节点名称,当--initial-cluster-state
值为 new 时,--name
的参数值必须位于--initial-cluster
列表中;--cert-file、--key-file
:etcd server 与 client 通信时使用的证书和私钥;--trusted-ca-file
:签名 client 证书的 CA 证书,用于验证 client 证书;--peer-cert-file
、--peer-key-file
:etcd 与 peer 通信使用的证书和私钥;--peer-trusted-ca-file
:签名 peer 证书的 CA 证书,用于验证 peer 证书;
# 创建和分发 etcd systemd unit 文件
替换模板文件中的变量,为各节点创建 systemd unit 文件:
# 生成
source ${ETCD_DIR}/environment.sh
for (( i=0; i < 3; i++ ))
do
sed -e "s/##NODE_NAME##/${NODE_NAMES[i]}/" -e "s/##NODE_IP##/${NODE_IPS[i]}/" etcd.service.template > etcd-${NODE_IPS[i]}.service
done
# 验证
ls *.service
# 输出结果:
etcd-192.168.1.220.service etcd-192.168.1.221.service etcd-192.168.1.222.service
- NODE_NAMES 和 NODE_IPS 为相同长度的 bash 数组,分别为节点名称和对应的 IP;
分发生成的 systemd unit 文件:
source ${ETCD_DIR}/environment.sh
for node_ip in ${NODE_IPS[@]}
do
echo ">>> ${node_ip}"
scp etcd-${node_ip}.service root@${node_ip}:/etc/systemd/system/etcd.service
done
# 启动服务测试
source ${ETCD_DIR}/environment.sh
for node_ip in ${NODE_IPS[@]}
do
echo ">>> ${node_ip}"
ssh root@${node_ip} "mkdir -p ${ETCD_DATA_DIR} ${ETCD_WAL_DIR}"
ssh root@${node_ip} "systemctl daemon-reload && systemctl enable etcd && systemctl restart etcd " &
done
# 输出结果:
>>> 192.168.1.220
[7] 26642
>>> 192.168.1.221
[8] 26729
>>> 192.168.1.222
[9] 26840
- 命令首先创建 etcd 数据目录和工作目录
- etcd 进程首次启动时会等待其它节点的 etcd 加入集群,命令
systemctl start etcd
会卡住一段时间,为正常现象
# 检查启动结果
source ${ETCD_DIR}/environment.sh
for node_ip in ${NODE_IPS[@]}
do
echo ">>> ${node_ip}"
ssh root@${node_ip} "systemctl status etcd|grep Active"
done
输出结果:
>>> 192.168.1.220
Active: active (running) since Thu 2020-12-17 10:15:02 UTC; 31s ago
>>> 192.168.1.221
Active: active (running) since Thu 2020-12-17 10:15:02 UTC; 36s ago
>>> 192.168.1.222
Active: active (running) since Thu 2020-12-17 10:15:02 UTC; 40s ago
# 验证ETCD集群服务状态
部署完 etcd 集群后,在任一 etcd
节点上执行如下命令,我是etcd-0执行的部署操作。
source ${ETCD_DIR}/environment.sh
for node_ip in ${NODE_IPS[@]}
do
echo ">>> ${node_ip}"
/usr/local/bin/etcdctl \
--endpoints=https://${node_ip}:2379 \
--cacert=${ETCD_ETC_DIR}/ca.pem \
--cert=${ETCD_ETC_DIR}/etcd.pem \
--key=${ETCD_ETC_DIR}/etcd-key.pem endpoint health
done
# 输出结果:
>>> 192.168.1.220
https://192.168.1.220:2379 is healthy: successfully committed proposal: took = 99.689319ms
>>> 192.168.1.221
https://192.168.1.221:2379 is healthy: successfully committed proposal: took = 47.262066ms
>>> 192.168.1.222
https://192.168.1.222:2379 is healthy: successfully committed proposal: took = 49.330772ms
ok, etcd集群已经部署完成.