1 主从复制及主主复制的实现

1.1 主从复制

当master服务器宕机,提升一个slave成为新的master

# 先搭建一主两从环境
[root@master ~]# dnf install mariadb-server -y
[root@master ~]# vim /etc/my.cnf.d/mariadb-server.cnf
[mysqld]
server-id=8
log-bin
[root@master ~]# systemctl restart mariadb.service
[root@master ~]# mysql
MariaDB [(none)]> SHOW MASTER LOGS;
+--------------------+-----------+
| Log_name           | File_size |
+--------------------+-----------+
| mariadb-bin.000001 |     28267 |
| mariadb-bin.000002 |       344 |
+--------------------+-----------+
2 rows in set (0.000 sec)

MariaDB [(none)]> grant replication slave on *.* to repluser@'10.0.0.%' identified by 'waluna';
Query OK, 0 rows affected (0.000 sec)

[root@slave1 ~]# dnf install mariadb-server -y
[root@slave1 ~]# vim /etc/my.cnf.d/mariadb-server.cnf
[mysqld]
server-id=18
read-only
[root@slave1 ~]# systemctl restart mariadb.service
[root@slave1 ~]# mysql
MariaDB [(none)]> CHANGE MASTER TO
    ->   MASTER_HOST='10.0.0.8',
    ->   MASTER_USER='repluser',
    ->   MASTER_PASSWORD='waluna',
    ->   MASTER_PORT=3306,
    ->   MASTER_LOG_FILE='mariadb-bin.000002',
    ->   MASTER_LOG_POS=344;
Query OK, 0 rows affected (0.004 sec)

MariaDB [(none)]> start slave;
Query OK, 0 rows affected (0.000 sec)

[root@slave2 ~]# vim /etc/my.cnf.d/mariadb-server.cnf
[mysqld]
server-id=28
read-only
[root@slave2 ~]# systemctl restart mariadb.service
[root@slave2 ~]# mysql
MariaDB [(none)]> CHANGE MASTER TO
    ->   MASTER_HOST='10.0.0.8',
    ->   MASTER_USER='repluser',
    ->   MASTER_PASSWORD='waluna',
    ->   MASTER_PORT=3306,
    ->   MASTER_LOG_FILE='mariadb-bin.000002',
    ->   MASTER_LOG_POS=344;
Query OK, 0 rows affected (0.003 sec)

MariaDB [(none)]> start slave;
Query OK, 0 rows affected (0.001 sec)

# 导入数据,模拟宕机
[root@master ~]# mysql < hellodb_innodb.sql
[root@master ~]# mysql hellodb < testlog.sql
[root@master ~]# mysql hellodb
MariaDB [hellodb]> call pro_testlog();
# 关机

# 找到哪个从节点数据库时最新,让它成为master
[root@slave1 ~]# cat /var/lib/mysql/relay-log.info
5
./mariadb-relay-bin.000002
3025808
mariadb-bin.000002
3025595
0
[root@slave2 ~]# cat /var/lib/mysql/relay-log.info
5
./mariadb-relay-bin.000002
3025808
mariadb-bin.000002
3025595
0

# 因为实验环境一样,就暂时选slave1为新master
# 新msater修改配置文件,关闭read-only配置,并开启日志
[root@slave1 ~]# vim /etc/my.cnf.d/mariadb-server.cnf
[mysqld]
server-id=18
read-only=OFF
log-bin
[root@slave1 ~]# systemctl restart mariadb.service

# 清除旧的master复制信息
[root@slave1 ~]# mysql
MariaDB [(none)]> set global read_only=off;
Query OK, 0 rows affected (0.000 sec)

MariaDB [(none)]> stop slave;
Query OK, 0 rows affected (0.001 sec)

MariaDB [(none)]> reset slave all;
Query OK, 0 rows affected (0.000 sec)

# 在新master上完全备份
[root@slave1 ~]# mysqldump -A -F --single-transaction --master-data=1 > /data/slave1.sql
[root@slave1 ~]# scp /data/slave1.sql 10.0.0.28:/data
# 分析旧的master的二进制日志,将为同步到至新master的二进制日志导出来,恢复到新的master,尽可能恢复数据

# 如果一样就停止服务,重新指向新master即可
[root@slave2 ~]# mysql
MariaDB [(none)]> stop slave;
Query OK, 0 rows affected (0.001 sec)

MariaDB [(none)]> reset slave all;
Query OK, 0 rows affected (0.001 sec)

[root@slave1 ~]# mysql
MariaDB [(none)]> show master logs;
+--------------------+-----------+
| Log_name           | File_size |
+--------------------+-----------+
| mariadb-bin.000001 |       379 |
| mariadb-bin.000002 |       375 |
+--------------------+-----------+
2 rows in set (0.000 sec)

[root@slave2 ~]# mysql
MariaDB [(none)]> CHANGE MASTER TO
    ->   MASTER_HOST='10.0.0.18',
    ->   MASTER_USER='repluser',
    ->   MASTER_PASSWORD='waluna',
    ->   MASTER_PORT=3306,
    ->   MASTER_LOG_FILE='mariadb-bin.000002',
    ->   MASTER_LOG_POS=375;
Query OK, 0 rows affected (0.014 sec)

MariaDB [(none)]> start slave;
Query OK, 0 rows affected (0.000 sec)

# 如果不一致
# 其他所有slave重新还原数据库,指向新的master
[root@slave2 ~]# systemctl stop mariadb.service   
[root@slave2 ~]# rm -rf /var/lib/mysql/*          
[root@slave2 ~]# vim /data/slave1.sql
CHANGE MASTER TO
MASTER_HOST='10.0.0.18',                                                                 
MASTER_USER='repluser',
MASTER_PASSWORD='waluna',
MASTER_PORT=3306,
MASTER_LOG_FILE='mariadb-bin.000002', MASTER_LOG_POS=375;
[root@slave2 ~]# systemctl restart mariadb.service
[root@slave2 ~]# mysql
MariaDB [(none)]> source /data/slave1.sql

MariaDB [mysql]> start slave;
Query OK, 0 rows affected (0.001 sec)
1.2 主主复制

主主复制:两个节点,都可以更新数据,并且互为主从

容易产生的问题:数据不一致;因此慎用

考虑要点:自动增长id

配置一个节点使用奇数id

atuo_increment_offset=1         # 开始点
atuo_increment_increment=2      # 增长幅度

另一个节点使用偶数id

auto_increment_offset=2
auto_increment_increment=2

主主复制的配置步骤:

  1. 各节点使用唯一的server-id
  2. 都启动binary log和relay log
  3. 创建拥有复制权限的用户账号
  4. 定义自动增长id字段的数值范围各为奇偶
  5. 均把对方指定为主节点,并启动复制线程

实现两个节点的主主复制

# 在第一个master节点上实现
[root@master1 ~]# vim /etc/my.cnf.d/mariadb-server.cnf
[mysqld]
server-id=8
log-bin
auto_increment_offset=1         # 开始点
auto_increment_increment=2      # 增长幅度
[root@master1 ~]# systemctl restart mariadb.service
[root@master1 ~]# mysql
MariaDB [(none)]> show master logs;
+--------------------+-----------+
| Log_name           | File_size |
+--------------------+-----------+
| mariadb-bin.000001 |     28475 |
| mariadb-bin.000002 |       344 |
+--------------------+-----------+
2 rows in set (0.000 sec)

MariaDB [(none)]> grant replication slave on *.* to repluser@'10.0.0.%' identified by 'waluna';
Query OK, 0 rows affected (0.000 sec)

# 在第二个master节点上实现
[root@master2 ~]# vim /etc/my.cnf.d/mariadb-server.cnf
[mysqld]
server-id=18
log-bin
auto_increment_offset=2         # 开始点
auto_increment_increment=2      # 增长幅度
[root@master2 ~]# systemctl restart mariadb.service
[root@master2 ~]# mysql
MariaDB [(none)]> CHANGE MASTER TO
    ->   MASTER_HOST='10.0.0.8',
    ->   MASTER_USER='repluser',
    ->   MASTER_PASSWORD='waluna',
    ->   MASTER_PORT=3306,
    ->   MASTER_LOG_FILE='mariadb-bin.000002',
    ->   MASTER_LOG_POS=344;
Query OK, 0 rows affected (0.003 sec)

MariaDB [(none)]> start slave;
Query OK, 0 rows affected (0.001 sec)

MariaDB [(none)]> show master logs;      # 查看二进制位置
+--------------------+-----------+
| Log_name           | File_size |
+--------------------+-----------+
| mariadb-bin.000001 |     28497 |
| mariadb-bin.000002 |       344 |
+--------------------+-----------+
2 rows in set (0.000 sec)

# 在第一个master节点上实现

MariaDB [(none)]> CHANGE MASTER TO
    ->   MASTER_HOST='10.0.0.18',
    ->   MASTER_USER='repluser',
    ->   MASTER_PASSWORD='waluna',
    ->   MASTER_PORT=3306,
    ->   MASTER_LOG_FILE='mariadb-bin.000002',
    ->   MASTER_LOG_POS=344;
Query OK, 0 rows affected (0.002 sec)

MariaDB [(none)]> start slave;
Query OK, 0 rows affected (0.001 sec)

MariaDB [(none)]> create database db1;
Query OK, 1 row affected (0.000 sec)

MariaDB [(none)]> use db1
Database changed
MariaDB [db1]> create table t1(id int auto_increment primary key,name char(10));
Query OK, 0 rows affected (0.003 sec)

# 两个节点分别插入数据
# 在一个节点上执行
MariaDB [db1]> insert t1 (name) values('user1');
Query OK, 1 row affected (0.001 sec)
# 在第二个节点上执行
MariaDB [(none)]> use db1
Reading table information for completion of table and column names
You can turn off this feature to get a quicker startup with -A

Database changed
MariaDB [db1]> insert t1 (name) values('user2');
Query OK, 1 row affected (0.001 sec)

# 两个节点同时插入数据
MariaDB [db1]> insert t1 (name) values('user3');
Query OK, 1 row affected (0.001 sec)

MariaDB [db1]> select * from t1;
+----+-------+
| id | name  |
+----+-------+
|  1 | user1 |
|  2 | user2 |
|  3 | user3 |
|  4 | user3 |
+----+-------+
4 rows in set (0.000 sec)

# 老版本两个节点同时创建数据库,发生复制冲突
MariaDB [db1]> create database db2;
Query OK, 1 row affected (0.000 sec)

MariaDB [db1]> show slave status\G

Last_SQL_Errno:1007 

2 xtrabackup实现全量+增量+binlog恢复库

2.1 新版xtrabackup完全,增量备份及还原
# 先安装mysql5.7,安装xtrabackup包
[root@centos8 ~]# wget https://downloads.percona.com/downloads/Percona-XtraBackup-2.4/Percona-XtraBackup-2.4.22/binary/redhat/8/x86_64/percona-xtrabackup-24-2.4.22-1.el8.x86_64.rpm
[root@centos8 ~]# yum install percona-xtrabackup-24-2.4.22-1.el8.x86_64.rpm -y
[root@centos8 ~]# mysql -uroot -pwaluna < hellodb_innodb.sql
1.备份过程
1)完全备份:
[root@centos8 ~]# mkdir /backup
[root@centos8 ~]# xtrabackup -uroot -pwaluna --backup --target-dir=/backup/base
2)第一次修改数据
[root@centos8 ~]# mysql -uroot -pwaluna hellodb
mysql> update students set classid=3 where stuid=24;
Query OK, 1 row affected (0.00 sec)
Rows matched: 1  Changed: 1  Warnings: 0

mysql> update students set classid=7 where stuid=25;
Query OK, 1 row affected (0.00 sec)
Rows matched: 1  Changed: 1  Warnings: 0
3)第一次增量备份
[root@centos8 ~]# xtrabackup -uroot -pwaluna --backup --target-dir=/backup/inc1 --incremental-basedir=/backup/base

# 查看xtrabackup相关文件
[root@centos8 ~]# cat /backup/inc1/xtrabackup_info
uuid = 66a05806-ba20-11eb-97d3-000c2919bf73
name = 
tool_name = xtrabackup
tool_command = -uroot -pwaluna --backup --target-dir=/backup/inc1 --incremental-basedir=/backup/base
tool_version = 2.4.22
ibbackup_version = 2.4.22
server_version = 5.7.33-log
start_time = 2021-05-21 18:36:27
end_time = 2021-05-21 18:36:28
lock_time = 0
binlog_pos = filename 'centos8-bin.000001', position '764'
innodb_from_lsn = 2802747
innodb_to_lsn = 2803810
partial = N
incremental = Y
format = file
compact = N
compressed = N
encrypted = N
[root@centos8 ~]# cat /backup/inc1/xtrabackup_checkpoints
backup_type = incremental
from_lsn = 2802747
to_lsn = 2803810
last_lsn = 2803819
compact = 0
recover_binlog_info = 0
flushed_lsn = 2803819
[root@centos8 ~]# cat /backup/inc1/xtrabackup_binlog_info
centos8-bin.000001      764

4)第二次修改数据
[root@centos8 ~]# mysql -uroot -pwaluna hellodb
mysql> drop table teachers;
Query OK, 0 rows affected (0.01 sec
5)第二次增量
[root@centos8 ~]# xtrabackup -uroot -pwaluna --backup --target-dir=/backup/inc2 --incremental-basedir=/backup/inc1

# 查看xtrabackup相关文件
[root@centos8 ~]# cat /backup/inc2/xtrabackup_info                                       uuid = dbc00da4-ba20-11eb-97d3-000c2919bf73
name = 
tool_name = xtrabackup
tool_command = -uroot -pwaluna --backup --target-dir=/backup/inc2 --incremental-basedir=/backup/inc1
tool_version = 2.4.22
ibbackup_version = 2.4.22
server_version = 5.7.33-log
start_time = 2021-05-21 18:39:43
end_time = 2021-05-21 18:39:45
lock_time = 0
binlog_pos = filename 'centos8-bin.000001', position '956'
innodb_from_lsn = 2803810
innodb_to_lsn = 2806109
partial = N
incremental = Y
format = file
compact = N
compressed = N
encrypted = N
[root@centos8 ~]# cat /backup/inc2/xtrabackup_checkpoints                                backup_type = incremental
from_lsn = 2803810
to_lsn = 2806109
last_lsn = 2806118
compact = 0
recover_binlog_info = 0
flushed_lsn = 2806118
[root@centos8 ~]# cat /backup/inc2/xtrabackup_binlog_info
centos8-bin.000001      956

[root@centos8 ~]# scp -r /backup/* 目标主机:/backup/
# 备份过程生成三个备份目录
/backup/{base,inc1.inc2}

[root@centos8 ~]# rm -rf /data/mysql/*
[root@centos8 ~]# service mysqld stop

2.还原过程
1)预准备完成备份,此选项--apply-log-only 阻止回滚未完成的事务
[root@centos8 ~]# xtrabackup --prepare --apply-log-only --target-dir=/backup/base
2)合并第1次增量备份到完全备份
[root@centos8 ~]# xtrabackup --prepare --apply-log-only --target-dir=/backup/base --incremental-dir=/backup/inc1
3)合并第2次增量备份到完全备份:最后一次还原不需要加--apply-only选项
[root@centos8 ~]# xtrabackup --prepare --target-dir=/backup/base --incremental-dir=/backup/inc2

4)复制到数据库目录,注意数据库目录必须为空,MySQL服务不能启动
[root@centos8 ~]# rm -rf /data/mysql/*
[root@centos8 ~]# ss -ntl|grep 3306
[root@centos8 ~]# xtrabackup --copy-back --target-dir=/backup/base
5)还原属性
[root@centos8 ~]# chown -R mysql.mysql /data/mysql/
6)启动服务
[root@centos8 ~]# service mysqld start
Starting MySQL.Logging to '/data/mysql/mysql.log'.
 SUCCESS! 
2.2 旧版xtrabackup完全,增量备份及还原
2.2.1 在原主机备份
innobackupex /backup
mkdir /backup/inc{1,2}
# 修改数据库内容
innobackupex --incremental /backup/inc1 --incremental-basedir=/backup/2021-05-21_21-39-57(完全备份生成的路径)
# 再次修改数据库内容
innobackupex --incremental /backup/inc2 --incremental-basedir=/backup/inc1/2021-05-21_21-44-33(上次增量备份生成的路径)
scp -r /backup/* 目标主机:/data/
2.2.2 在目标主机还原
# 预准备过程
innobackupex --apply-log --redo-only /data/2021-05-21_21-48-42/
innobackupex --apply-log --redo-only /data/2021-02-21_21-48-42/ --incremental-dir=/ data/inc1/2021-O5-21_21-49-17
innobackupex --apply-log /data/2021-05-21_21-48-42/ --incremental-dir=/data/inc2/2021-O5-21_21-49-17/

# 还原过程
不启动mariadb
systemctl stop mariadb
rm -rf /var/lib/mysql/*
innobackupex --copy-back /data/2021-05-21_21-48-42/
chown -R mysql.mysql /var/lib/mysql/
systemctl start mariadb

3 MyCAT实现MySQL读写分离

实战案例:利用Mycat实现MySQL的读写分离

系统环境:

cat /etc/centos-release
CentOS Linux release 8.3.2011

服务器共三台

mycat  10.0.0.8     # 内存建议2G以上
master 10.0.0.18
slave  10.0.0.28

关闭selinux和防火墙

systemctl stop firewalld
setenforce 0
时间同步
3.1 创建mysql主从数据库
[root@master ~]# dnf install mariadb-server -y
[root@slave ~]# dnf install mariadb-server -y

1)修改maser和salve上的配置文件

# master上的my.cnf
[root@master ~]# vim /etc/my.cnf.d/mariadb-server.cnf
[mysqld]
server-id=8                                                                              
log-bin
[root@master ~]# systemctl restart mariadb.service

# slave上的my.cnf
[root@slave ~]# vim /etc/my.cnf.d/mariadb-server.cnf
[mysqld]
server-id=18
[root@slave ~]# systemctl restart mariadb.service

2)master上创建复制用户

[root@master ~]# mysql
MariaDB [(none)]> grant replication slave on *.* to 'repluser'@'10.0.0.%' identified by 'waluna';
Query OK, 0 rows affected (0.000 sec)

MariaDB [(none)]> flush privileges;
Query OK, 0 rows affected (0.000 sec)

MariaDB [(none)]> show master logs;
+--------------------+-----------+
| Log_name           | File_size |
+--------------------+-----------+
| mariadb-bin.000001 |     28243 |
| mariadb-bin.000002 |       664 |
+--------------------+-----------+
2 rows in set (0.000 sec)

3)slave上执行

[root@slave ~]# mysql
MariaDB [(none)]> CHANGE MASTER TO
    ->   MASTER_HOST='10.0.0.18',
    ->   MASTER_USER='repluser',
    ->   MASTER_PASSWORD='waluna',
    ->   MASTER_PORT=3306,
    ->   MASTER_LOG_FILE='mariadb-bin.000002',
    ->   MASTER_LOG_POS=664;
Query OK, 0 rows affected (0.013 sec)

MariaDB [(none)]> start slave;
Query OK, 0 rows affected (0.001 sec)

MariaDB [(none)]> show slave status\G                    
*************************** 1. row ***************************
                Slave_IO_State: Waiting for master to send event
                   Master_Host: 10.0.0.18
                   Master_User: repluser
                   Master_Port: 3306
                 Connect_Retry: 60
               Master_Log_File: mariadb-bin.000002
           Read_Master_Log_Pos: 664
                Relay_Log_File: mariadb-relay-bin.000002
                 Relay_Log_Pos: 557
         Relay_Master_Log_File: mariadb-bin.000002
              Slave_IO_Running: Yes
             Slave_SQL_Running: Yes
...省略...
3.2 在MySQL代理服务器10.0.0.8安装mycat并启动
[root@mycat ~]# dnf install java mariadb -y
# 确认安装成功
[root@mycat ~]# java -version
openjdk version "1.8.0_292"
OpenJDK Runtime Environment (build 1.8.0_292-b10)
OpenJDK 64-Bit Server VM (build 25.292-b10, mixed mode)

# 下载并安装
[root@mycat ~]# wget http://dl.mycat.org.cn/1.6.7.4/Mycat-server-1.6.7.4-release/Mycat-server-1.6.7.4-release-20200105164103-linux.tar.gz

[root@mycat ~]# mkdir /apps
[root@mycat ~]# tar xvf Mycat-server-1.6.7.4-release-20200105164103-linux.tar.gz -C /apps/

# 配置环境变量
[root@mycat ~]# echo 'PATH=/apps/mycat/bin:$PATH' > /etc/profile.d/mycat.sh
[root@mycat ~]# . /etc/profile.d/mycat.sh

# 查看端口
[root@mycat ~]# ss -ntl
State      Recv-Q     Send-Q          Local Address:Port           Peer Address:Port     
LISTEN     0          128                   0.0.0.0:22                  0.0.0.0:*        
LISTEN     0          128                      [::]:22                     [::]:*

# 启动mycat
[root@mycat ~]# file /apps/mycat/bin/mycat
/apps/mycat/bin/mycat: POSIX shell script, ASCII text executable

[root@mycat ~]# mycat
Usage: /apps/mycat/bin/mycat { console | start | stop | restart | status | dump }

# 查看日志,没启动之前没有生成文件
[root@mycat ~]# ll /apps/mycat/logs/
total 0

# 注意:此步启动较慢,需要等一会儿,另外内存太小,会导致无法启动
[root@mycat ~]# mycat start
Starting Mycat-server...

# 可以看到打开多个端口,其中8066端口用于连接mycat
[root@mycat ~]# ss -ntl
State      Recv-Q     Send-Q         Local Address:Port            Peer Address:Port     
LISTEN     0          128                  0.0.0.0:22                   0.0.0.0:*        
LISTEN     0          1                  127.0.0.1:32000                0.0.0.0:*        
LISTEN     0          100                        *:9066                       *:*        
LISTEN     0          50                         *:37745                      *:*        
LISTEN     0          128                     [::]:22                      [::]:*        
LISTEN     0          50                         *:45087                      *:*        
LISTEN     0          50                         *:1984                       *:*        
LISTEN     0          100                        *:8066                       *:*
# 查看日志,确定成功,可能需要等一会才能看到成功的提示
[root@mycat ~]# ls /apps/mycat/logs/
mycat.log  mycat.pid  switch.log  wrapper.log
[root@mycat ~]# cat /apps/mycat/logs/wrapper.log
STATUS | wrapper  | 2021/05/25 22:49:49 | --> Wrapper Started as Daemon
STATUS | wrapper  | 2021/05/25 22:49:49 | Launching a JVM...
INFO   | jvm 1    | 2021/05/25 22:49:50 | Wrapper (Version 3.2.3) http://wrapper.tanukisoftware.org
INFO   | jvm 1    | 2021/05/25 22:49:50 |   Copyright 1999-2006 Tanuki Software, Inc.  All Rights Reserved.
INFO   | jvm 1    | 2021/05/25 22:49:50 | 
INFO   | jvm 1    | 2021/05/25 22:49:53 | MyCAT Server startup successfully. see logs in logs/mycat.log

# 用默认密码123456来连接mycat
[root@mycat ~]# mysql -uroot -p123456 -h10.0.0.8 -P8066
Welcome to the MariaDB monitor.  Commands end with ; or \g.
Your MySQL connection id is 1
Server version: 5.6.29-mycat-1.6.7.4-release-20200105164103 MyCat Server (OpenCloudDB)

Copyright (c) 2000, 2018, Oracle, MariaDB Corporation Ab and others.

Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.

MySQL [(none)]> show databases;
+----------+
| DATABASE |
+----------+
| TESTDB   |
+----------+
1 row in set (0.002 sec)

MySQL [(none)]> use TESTDB
Reading table information for completion of table and column names
You can turn off this feature to get a quicker startup with -A

Database changed
MySQL [TESTDB]> show tables;
+------------------+
| Tables in TESTDB |
+------------------+
| address          |
| travelrecord     |
+------------------+
2 rows in set (0.001 sec)

MySQL [TESTDB]> select * from address;
ERROR 1105 (HY000): backend connect: java.lang.IllegalArgumentException: Invalid DataSource:0
MySQL [TESTDB]> select * from travelrecord;
ERROR 1105 (HY000): backend connect: java.lang.IllegalArgumentException: Invalid DataSource:0
MySQL [TESTDB]> 
3.3 在mycat服务器上修改server.xml文件配置mycat的连接信息
[root@mycat ~]# vim /apps/mycat/conf/server.xml
...省略...
    <user name="root" defaultAccount="true">              # 连接mycat的用户名
        <property name="password">waluna</property>           # 连接mycat的密码
        <property name="schemas">TESTDB</property>            # 数据库名要和schema.xml相对应
        <property name="defaultSchema">TESTDB</property>
        <!--No MyCAT Database selected 错误前会尝试使用该schema作为schema,不设置则为null
,报错 -->

这里使用的是root,密码为waluna,逻辑数据库为TESTDB,这些信息都可以自己随意定义,读写权限都有,没有针对表做任何特殊的权限。重点关注上面这段配置,其他默认即可。

3.4 修改schema.xml实现读写分离策略
[root@mycat ~]# vim /apps/mycat/conf/schema.xml
<?xml version="1.0"?>
<!DOCTYPE mycat:schema SYSTEM "schema.dtd">
<mycat:schema xmlns:mycat="http://io.mycat/">
    <schema name="TESTDB" checkSQLschema="***flase***" sqlMaxLimit="100" ***dataNode="dn1"***></schema>
    <dataNode name="dn1" dataHost="localhost1" database="***mycat***" />    # 其中mycat表示后端服务器实际对数据库名称
    <dataHost name="localhost1" maxCon="1000" minCon="10" balance="***1***"
              writeType="0" dbType="mysql" dbDriver="native" switchType="1"  slaveThreshold="100">
        <heartbeat>select user()</heartbeat>
     ***<writeHost host="hostM1" url="10.0.0.18:3306" user="root" password="waluna">***
     ***<readHost host="hostM1" url="10.0.0.28:3306" user="root" password="waluna"/>***
        </writeHost>
    </dataHost>
</mycat:schema>

# 以上***部分表示源原配置文件中需要修改的内容

# 重新启动mycat,并查看日志
[root@mycat ~]# mycat restart
Stopping Mycat-server...
Mycat-server was not running.
Starting Mycat-server...
[root@mycat ~]# tail -f /apps/mycat/logs/wrapper.log
STATUS | wrapper  | 2021/05/25 23:26:38 | --> Wrapper Started as Daemon
STATUS | wrapper  | 2021/05/25 23:26:38 | Launching a JVM...
INFO   | jvm 1    | 2021/05/25 23:26:38 | Wrapper (Version 3.2.3) http://wrapper.tanukisoftware.org
INFO   | jvm 1    | 2021/05/25 23:26:38 |   Copyright 1999-2006 Tanuki Software, Inc.  All Rights Reserved.
INFO   | jvm 1    | 2021/05/25 23:26:38 | 
INFO   | jvm 1    | 2021/05/25 23:26:39 | MyCAT Server startup successfully. see logs in logs/mycat.log

上面配置中,balance改为1,表示读写分离。以上配置达到的效果就是10.0.0.18为主库,10.0.0.28为从库

注意:要保证能使用root/waluna权限成功登录10.0.0.18和10.0.0.28机器上面的mysql数据库。同时,也一定要授权mycat机器能使用root/waluna权限成功登录这两台机器的mysql数据库!!这很重要,否则会导致登录mycat后,对库和表操作失败!

schema.xml文件实例

3.5 在后端主服务器创建用户并对mycat授权
[root@master ~]# mysql
MariaDB [(none)]> create database mycat;
Query OK, 1 row affected (0.001 sec)

MariaDB [(none)]> grant all on *.* to 'root'@'10.0.0.%' identified by 'waluna';
Query OK, 0 rows affected (0.001 sec)

MariaDB [(none)]> flush privileges;
Query OK, 0 rows affected (0.001 sec)
3.6 在mycat服务器上连接并测试
[root@mycat ~]# mysql -uroot -pwaluna -h127.0.0.1 -P8066 -DTESTDB
MySQL [TESTDB]> show databases;
+----------+
| DATABASE |
+----------+
| TESTDB   |        # 只能看到一个虚拟数据库
+----------+
1 row in set (0.000 sec)

MySQL [TESTDB]> create table t1(id int);
Query OK, 0 rows affected (0.007 sec)

MySQL [TESTDB]> select @@server_id;  # 可以看到是在从服务器读数据
+-------------+
| @@server_id |
+-------------+
|          18 |
+-------------+
1 row in set (0.002 sec)

MySQL [TESTDB]> select @@hostname; 
+------------+
| @@hostname |
+------------+
| slave      |
+------------+
1 row in set (0.001 sec)
3.7 通过通用日志确认实现读写分离

在mariadb/MySQL中查看通用日志

MariaDB [(none)]> show variables like 'general_log';   # 查看通用日志是否开启
+---------------+-------+
| Variable_name | Value |
+---------------+-------+
| general_log   | OFF   |
+---------------+-------+
1 row in set (0.001 sec)

MariaDB [(none)]> set global general_log=on; # 开启通用日志功能
Query OK, 0 rows affected (0.001 sec)

MariaDB [(none)]> show variables like 'general_log_file';  # 查看通用日志文件保存位置
+------------------+------------+
| Variable_name    | Value      |
+------------------+------------+
| general_log_file | master.log |
+------------------+------------+
1 row in set (0.001 sec)

MariaDB [(none)]> set global general_log_file='/tmp/general.log';  # 设置通用日志文件保存位置
Query OK, 0 rows affected (0.000 sec)

MariaDB [(none)]> show variables like 'general_log_file';
+------------------+------------------+
| Variable_name    | Value            |
+------------------+------------------+
| general_log_file | /tmp/general.log |
+------------------+------------------+
1 row in set (0.001 sec)

在主和从服务器分别启动通用日志,查看读写分离

[root@master ~]# vim /etc/my.cnf.d/mariadb-server.cnf
[mysqld]
general_log=ON

[root@master ~]# systemctl restart mariadb.service

[root@mycat ~]# mysql -uroot -pwaluna -h127.0.0.1 -P8066 -DTESTDB
MySQL [TESTDB]> insert t1 value(1);
Query OK, 1 row affected (0.003 sec)

MySQL [TESTDB]> select * from t1;
+------+
| id   |
+------+
|    1 |
+------+
1 row in set (0.002 sec)

[root@master ~]# tail -f /var/lib/mysql/master.log
210525 23:52:44     13 Query    SET names utf8;insert t1 value(1)

[root@slave ~]# tail -f /var/lib/mysql/slave.log
210525 23:53:30     12 Query    SET names utf8;select * from t1
3.8 停止从节点,mycat自动调度请求至主节点
[root@slave ~]# systemctl stop mariadb.service
[root@mycat ~]# mysql -uroot -pwaluna -h127.0.0.1 -P8066 -DTESTDB
MySQL [TESTDB]> select @@server_id;  # 立即查看会报错
ERROR 1184 (HY000): java.net.ConnectException: Connection refused

MySQL [TESTDB]> select @@server_id;  # 过段时间才会调度过去,可以看出已经调度成功
+-------------+
| @@server_id |
+-------------+
|           8 |
+-------------+
1 row in set (0.001 sec)

# 停止主节点,mycat不会自动调度写请求给从节点
[root@slave ~]# systemctl restart mariadb.service
[root@master ~]# systemctl stop mariadb.service
[root@mycat ~]# mysql -uroot -pwaluna -h127.0.0.1 -P8066 -DTESTDB
MySQL [TESTDB]> insert t1 value(3);
ERROR 1184 (HY000): java.net.ConnectException: Connection refused

4 MHA实现

实现MHA实战案例

环境:四台主机
MHA管理器 centos7 10.0.0.7
master   centos8 10.0.0.8
slave1   centos8 10.0.0.18
slave2   centos8 10.0.0.28
4.1 在管理节点上安装两个包mha4mysql-manager和mha4mysql-node

说明:mha4mysql-manager-0.56-0.el6.noarch.rpm不支持Centos 8,只支持CentOS7以下版本

*说明:mha4mysql-manager-0.58-0.el7.centos.noarch.rpm,支持MySQL 5.7,但和Centos8版本上的Mariadb-10.3.17不兼容,Mariadb10.3.不支持super_read_only,所以MHA只能用mha4mysql-0.56,最新版的mha4mysql-0.58支持super_read_only,mysql5.7可以用。**

mha4mysql-manager
mha4mysql-node

例:

[root@mha ~]# yum install mha4mysql-node-0.56-0.el6.noarch.rpm -y
[root@mha ~]# yum install  mha4mysql-manager-0.56-0.el6.noarch.rpm -y
# 注意:因为依赖关系,需先装node包再装manager包
4.2 在所有mysql服务器上安装mha4mysql-node包

此包支持centos8,7,6

mha4mysql-node

例:

[root@master ~]# yum install mha4mysql-node-0.56-0.el6.noarch.rpm -y
[root@slave1 ~]# yum install mha4mysql-node-0.56-0.el6.noarch.rpm -y
[root@slave2 ~]# yum install mha4mysql-node-0.56-0.el6.noarch.rpm -y
4.3 在所有节点实现互相之间ssh key验证
[root@mha ~]# ssh-keygen
Generating public/private rsa key pair.
Enter file in which to save the key (/root/.ssh/id_rsa): 
Enter passphrase (empty for no passphrase): 
Enter same passphrase again: 
Your identification has been saved in /root/.ssh/id_rsa.
Your public key has been saved in /root/.ssh/id_rsa.pub.
The key fingerprint is:
SHA256:dHISzeL5r0m26EVK6Fm2u9X5zeZfgy7iJO/71T1UJJA root@mha
The key's randomart image is:
+---[RSA 2048]----+
|        .o  .o. .|
|        ..o E  o |
|       .+oo     .|
|       oo=      .|
|      . S..    . |
|     . = +.. .+ .|
|      o.o.=.oo +o|
|        +O ++. o=|
|       .B*Bo...o*|
+----[SHA256]-----+
[root@mha ~]# ssh-copy-id 10.0.0.7
/usr/bin/ssh-copy-id: INFO: Source of key(s) to be installed: "/root/.ssh/id_rsa.pub"
The authenticity of host '10.0.0.7 (10.0.0.7)' can't be established.
ECDSA key fingerprint is SHA256:BvEOFVidIWSNe478SZ34jegCVOHesBaPh7bWvtccBkU.
ECDSA key fingerprint is MD5:ae:39:15:a0:72:e9:23:fd:02:0f:18:4b:b2:19:29:32.
Are you sure you want to continue connecting (yes/no)? yes
/usr/bin/ssh-copy-id: INFO: attempting to log in with the new key(s), to filter out any that are already installed
/usr/bin/ssh-copy-id: INFO: 1 key(s) remain to be installed -- if you are prompted now it is to install the new keys
root@10.0.0.7's password: 

Number of key(s) added: 1

Now try logging into the machine, with:   "ssh '10.0.0.7'"
and check to make sure that only the key(s) you wanted were added.

[root@mha ~]# yum install rsync -y  # 所有节点都安装
[root@mha ~]# rsync -av .ssh 10.0.0.8:~
root@10.0.0.8's password: 
sending incremental file list
.ssh/
.ssh/authorized_keys
.ssh/id_rsa
.ssh/id_rsa.pub
.ssh/known_hosts

sent 3,483 bytes  received 96 bytes  1,431.60 bytes/sec
total size is 3,141  speedup is 0.88
[root@mha ~]# rsync -av .ssh 10.0.0.18:~
root@10.0.0.18's password: 
sending incremental file list
.ssh/
.ssh/authorized_keys
.ssh/id_rsa
.ssh/id_rsa.pub
.ssh/known_hosts

sent 3,483 bytes  received 96 bytes  1,431.60 bytes/sec
total size is 3,141  speedup is 0.88
[root@mha ~]# rsync -av .ssh 10.0.0.28:~
root@10.0.0.28's password: 
sending incremental file list
.ssh/
.ssh/authorized_keys
.ssh/id_rsa
.ssh/id_rsa.pub
.ssh/known_hosts

sent 3,483 bytes  received 96 bytes  2,386.00 bytes/sec
total size is 3,141  speedup is 0.88
4.4 在管理节点建立配置文件
[root@mha ~]# mkdir /etc/mastermha
[root@mha ~]# vim /etc/mastermha/app1.cnf
[server default]
user=mhauser        # 用于远程连接mysql所有节点的用户,需要管理员的权限
password=waluna
manager_workdir=/data/mastermha/app1/       # 目录会自动生成,无需手动创建
manager_log=/data/mastermha/app1/manager.log
remote_workdir=/data/mastermha/app1/
ssh_user=root       # 用于实现远程ssh基于key的连接,访问二进制日志
repl_user=repluser  # 主从复制的用户信息
repl_password=waluna
ping_interval=1     # 健康性检查的时间间隔
master_ip_failover_script=/usr/local/bin/master_ip_failover # 切换VIP的perl脚本
report_script=/usr/local/bin/sendmail.sh        # 当主节点宕机执行报警脚本
check_repl_delay=0  # 默认如果slave中从库落后主库relaylog超过100M,主库不会选择这个从库为新的master,因为这个从库进行恢复需要很长的时间。通过这个参数,mha触发主从切换的时候会忽略复制的延时,通过check_repl_delay=0这个参数,mha触发主从切换时会忽略复制的延时,对于设置candidate_master=1的从库非常有用,这样确保这个从库一定能成为最新的master
master_binlog_dir=/data/mysql/  # 指定二进制日志存放的目录, mha4mysql-manager-0.58必须指定,之前版本不需要指定

[server1]
hostname=10.0.0.8
candidate_master=1
[server2]
hostname=10.0.0.18
candidate_master=1  # 设置为优先候选master,即使不是集群中事件最新的sTave,也会优先当master
[server3]
hostname=10.0.0.28

说明:主库宕机谁来接管新的master

1.所有从节点日志都是一致的,默认会以配置文件的顺序去选择一个新主
2.从节点日志不一致,自动选择最接近于主库的从库充当新主
3.如果对于某节点设定了权重(candidate_master=1),权重节点会优先选择。但是此节点日志量落后主库超过100M日志的话,也不会被选择。可以配合 check_repl_delay=0,关闭日志量的检查,强制选择候选节点
4.5 相关脚本
[root@mha ~]# cat /usr/local/bin/sendmail.sh
echo "MySQL is down"|mail -s 'MHA Warning' root@waluna.top
[root@mha ~]# chmod +x /usr/local/bin/sendmail.sh
[root@mha ~]# ll /usr/local/bin/sendmail.sh
-rwxr-xr-x 1 root root 528 May 26 18:58 /usr/local/bin/sendmail.sh
[root@mha ~]# vim /usr/local/bin/master_ip_failover
#!/usr/bin/env perl
use strict;
use warnings FATAL => 'all';
use Getopt::Long;
my (
$command,$ssh_user,$orig_master_host,$orig_master_ip,$orig_master_port,$new_master_host,$new_master_ip,$new_master_port
);
my $vip = '10.0.0.100/24';    # 设置virtual IP
my $gateway = '10.0.0.2'; # 网关Gateway IP
my $interface = 'eth0';       # 指定VIP所在网卡
my $key = "1";
my $ssh_start_vip = "/sbin/ifconfig $interface:$key $vip;/sbin/arping -I $interface -c 3 -s $vip $gateway >/dev/null 2>&1";
my $ssh_stop_vip = "/sbin/ifconfig $interface:$key down";
GetOptions(
'command=s' => \$command,
'ssh_user=s' => \$ssh_user,
'orig_master_host=s' => \$orig_master_host,
'orig_master_ip=s' => \$orig_master_ip,
'orig_master_port=i' => \$orig_master_port,
'new_master_host=s' => \$new_master_host,
'new_master_ip=s' => \$new_master_ip,
'new_master_port=i' => \$new_master_port,
);
exit &main();
sub main {
print "\n\nIN SCRIPT TEST====$ssh_stop_vip===$ssh_start_vip===\n\n";
if ( $command eq "stop" || $command eq "stopssh" ) {
# $orig_master_host,$orig_master_ip,$orig_master_port are passed.
# If you manage master ip address at global catalog database,
# invalidate orig_master_ip here.
my $exit_code = 1;
eval {
print "Disabling the VIP on old master: $orig_master_host \n";
&stop_vip();
$exit_code = 0;
};
if ($@) {
warn "Got Error: $@\n";
exit $exit_code;
}
exit $exit_code;
}
elsif ( $command eq "start" ) {
# all arguments are passed.
# If you manage master ip address at global catalog database,
# activate new_master_ip here.
# You can also grant write access (create user,set read_only=0,etc) here.
my $exit_code = 10;
eval {
print "Enabling the VIP - $vip on the new master - $new_master_host \n";
&start_vip();
$exit_code = 0;
};
if ($@) {
warn $@;
exit $exit_code;
}
exit $exit_code;
}
elsif ( $command eq "status" ) {
print "Checking the Status of the script.. OK \n";
`ssh $ssh_user\@$orig_master_host \" $ssh_start_vip \"`;
exit 0;
}
else {
&usage();
exit 1;
}
}
# A simple system call that enable the vIP on the new master
sub start_vip() {
`ssh $ssh_user\@$new_master_host \" $ssh_start_vip \"`;
}
# A simple system call that disable the VIP on the old_master
sub stop_vip() {
`ssh $ssh_user\@$orig_master_host \" $ssh_stop_vip \"`;
}
sub usage {
print
"Usage: master_ip_failover --command=start|stop|stopssh|status --orig_master_host=host --orig_master_ip=ip --orig_master_port=port --new_master_host=host --new_master_ip=ip --new_master_port=port\n";
}

[root@mha ~]# chmod +x /usr/local/bin/master_ip_failover
4.6 实现master
[root@master ~]# mkdir /data/mysql
[root@master ~]# chown mysql.mysql /data/mysql/
[root@master ~]# vim /etc/my.cnf.d/mariadb-server.cnf
[mysqld]
server-id=8
log-bin=/data/mysql/mariadb-bin
skip_name_resolve=1
general_log     # 非必选
[root@master ~]# systemctl restart mariadb.service
[root@master ~]# mysql
MariaDB [(none)]> show master logs;
+--------------------+-----------+
| Log_name           | File_size |
+--------------------+-----------+
| mariadb-bin.000001 |     28243 |
| mariadb-bin.000002 |       344 |
+--------------------+-----------+
2 rows in set (0.000 sec)

MariaDB [(none)]> grant replication slave on *.* to 'repluser'@'10.0.0.%' identified by 'waluna';
Query OK, 0 rows affected (0.000 sec)

MariaDB [(none)]> grant all on *.* to mhauser@'10.0.0.%' identified by 'waluna';
Query OK, 0 rows affected (0.000 sec)

MariaDB [(none)]> flush privileges;
Query OK, 0 rows affected (0.000 sec)

# 配置VIP
[root@master ~]# ifconfig eth0:1 10.0.0.100/24
4.7 实现slave
[root@slave1 ~]# mkdir /data/mysql
[root@slave1 ~]# chown mysql.mysql /data/mysql/
[root@slave1 ~]# vim /etc/my.cnf.d/mariadb-server.cnf
[mysqld]
server-id=18    # 不同节点此值各不相同
log-bin=/data/mysql/mariadb-bin
read-only
relay_log_purge=0
skip_name_resolve=1     # 禁止反向解析

[root@slave1 ~]# systemctl restart mariadb.service
[root@slave1 ~]# mysql
MariaDB [(none)]> CHANGE MASTER TO
    ->   MASTER_HOST='10.0.0.8',
    ->   MASTER_USER='repluser',
    ->   MASTER_PASSWORD='waluna',
    ->   MASTER_PORT=3306,
    ->   MASTER_LOG_FILE='mariadb-bin.000002',
    ->   MASTER_LOG_POS=344;
Query OK, 0 rows affected (0.015 sec)

MariaDB [(none)]> start slave;
Query OK, 0 rows affected (0.000 sec)

MariaDB [(none)]> show slave status\G

# slave2同上操作,注意记得server-id改为28
4.8 检查MHA的环境
# 检查环境
[root@mha ~]# masterha_check_ssh --conf=/etc/mastermha/app1.cnf
[root@mha ~]# masterha_check_repl --conf=/etc/mastermha/app1.cnf

# 查看状态
[root@mha ~]# masterha_check_status --conf=/etc/mastermha/app1.cnf

例:

# 检查环境
[root@mha ~]# masterha_check_ssh --conf=/etc/mastermha/app1.cnf
Wed May 26 20:28:11 2021 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Wed May 26 20:28:11 2021 - [info] Reading application default configuration from /etc/mastermha/app1.cnf..
Wed May 26 20:28:11 2021 - [info] Reading server configuration from /etc/mastermha/app1.cnf..
Wed May 26 20:28:11 2021 - [info] Starting SSH connection tests..
Wed May 26 20:28:12 2021 - [debug] 
Wed May 26 20:28:11 2021 - [debug]  Connecting via SSH from root@10.0.0.8(10.0.0.8:22) to root@10.0.0.18(10.0.0.18:22)..
Wed May 26 20:28:11 2021 - [debug]   ok.
Wed May 26 20:28:11 2021 - [debug]  Connecting via SSH from root@10.0.0.8(10.0.0.8:22) to root@10.0.0.28(10.0.0.28:22)..
Wed May 26 20:28:11 2021 - [debug]   ok.
Wed May 26 20:28:12 2021 - [debug] 
Wed May 26 20:28:11 2021 - [debug]  Connecting via SSH from root@10.0.0.18(10.0.0.18:22) to root@10.0.0.8(10.0.0.8:22)..
Wed May 26 20:28:12 2021 - [debug]   ok.
Wed May 26 20:28:12 2021 - [debug]  Connecting via SSH from root@10.0.0.18(10.0.0.18:22) to root@10.0.0.28(10.0.0.28:22)..
Wed May 26 20:28:12 2021 - [debug]   ok.
Wed May 26 20:28:13 2021 - [debug] 
Wed May 26 20:28:12 2021 - [debug]  Connecting via SSH from root@10.0.0.28(10.0.0.28:22) to root@10.0.0.8(10.0.0.8:22)..
Wed May 26 20:28:12 2021 - [debug]   ok.
Wed May 26 20:28:12 2021 - [debug]  Connecting via SSH from root@10.0.0.28(10.0.0.28:22) to root@10.0.0.18(10.0.0.18:22)..
Wed May 26 20:28:12 2021 - [debug]   ok.
Wed May 26 20:28:13 2021 - [info] All SSH connection tests passed successfully.

[root@centos7 ~]# masterha_check_repl --conf=/etc/mastermha/app1.cnf
Wed May 26 21:04:32 2021 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Wed May 26 21:04:32 2021 - [info] Reading application default configuration from /etc/mastermha/app1.cnf..
Wed May 26 21:04:32 2021 - [info] Reading server configuration from /etc/mastermha/app1.cnf..
Wed May 26 21:04:32 2021 - [info] MHA::MasterMonitor version 0.56.
Wed May 26 21:04:33 2021 - [info] GTID failover mode = 0
Wed May 26 21:04:33 2021 - [info] Dead Servers:
Wed May 26 21:04:33 2021 - [info] Alive Servers:
Wed May 26 21:04:33 2021 - [info]   10.0.0.8(10.0.0.8:3306)
Wed May 26 21:04:33 2021 - [info]   10.0.0.18(10.0.0.18:3306)
Wed May 26 21:04:33 2021 - [info]   10.0.0.28(10.0.0.28:3306)
Wed May 26 21:04:33 2021 - [info] Alive Slaves:
Wed May 26 21:04:33 2021 - [info]   10.0.0.18(10.0.0.18:3306)  Version=10.3.28-MariaDB-log (oldest major version between slaves) log-bin:enabled
Wed May 26 21:04:33 2021 - [info]     Replicating from 10.0.0.8(10.0.0.8:3306)
Wed May 26 21:04:33 2021 - [info]     Primary candidate for the new Master (candidate_master is set)
Wed May 26 21:04:33 2021 - [info]   10.0.0.28(10.0.0.28:3306)  Version=10.3.28-MariaDB-log (oldest major version between slaves) log-bin:enabled
Wed May 26 21:04:33 2021 - [info]     Replicating from 10.0.0.8(10.0.0.8:3306)
Wed May 26 21:04:33 2021 - [info] Current Alive Master: 10.0.0.8(10.0.0.8:3306)
Wed May 26 21:04:33 2021 - [info] Checking slave configurations..
Wed May 26 21:04:33 2021 - [info] Checking replication filtering settings..
Wed May 26 21:04:33 2021 - [info]  binlog_do_db= , binlog_ignore_db= 
Wed May 26 21:04:33 2021 - [info]  Replication filtering check ok.
Wed May 26 21:04:33 2021 - [info] GTID (with auto-pos) is not supported
Wed May 26 21:04:33 2021 - [info] Starting SSH connection tests..
Wed May 26 21:04:35 2021 - [info] All SSH connection tests passed successfully.
Wed May 26 21:04:35 2021 - [info] Checking MHA Node version..
Wed May 26 21:04:36 2021 - [info]  Version check ok.
Wed May 26 21:04:36 2021 - [info] Checking SSH publickey authentication settings on the current master..
Wed May 26 21:04:36 2021 - [info] HealthCheck: SSH to 10.0.0.8 is reachable.
Wed May 26 21:04:36 2021 - [info] Master MHA Node version is 0.56.
Wed May 26 21:04:36 2021 - [info] Checking recovery script configurations on 10.0.0.8(10.0.0.8:3306)..
Wed May 26 21:04:36 2021 - [info]   Executing command: save_binary_logs --command=test --start_pos=4 --binlog_dir=/data/mysql/ --output_file=/data/mastermha/app1//save_binary_logs_test --manager_version=0.56 --start_file=mariadb-bin.000002 
Wed May 26 21:04:36 2021 - [info]   Connecting to root@10.0.0.8(10.0.0.8:22).. 
  Creating /data/mastermha/app1 if not exists..    ok.
  Checking output directory is accessible or not..
   ok.
  Binlog found at /data/mysql/, up to mariadb-bin.000002
Wed May 26 21:04:36 2021 - [info] Binlog setting check done.
Wed May 26 21:04:36 2021 - [info] Checking SSH publickey authentication and checking recovery script configurations on all alive slave servers..
Wed May 26 21:04:36 2021 - [info]   Executing command : apply_diff_relay_logs --command=test --slave_user='mhauser' --slave_host=10.0.0.18 --slave_ip=10.0.0.18 --slave_port=3306 --workdir=/data/mastermha/app1/ --target_version=10.3.28-MariaDB-log --manager_version=0.56 --relay_log_info=/var/lib/mysql/relay-log.info  --relay_dir=/var/lib/mysql/  --slave_pass=xxx
Wed May 26 21:04:36 2021 - [info]   Connecting to root@10.0.0.18(10.0.0.18:22).. 
  Checking slave recovery environment settings..
    Opening /var/lib/mysql/relay-log.info ... ok.
    Relay log found at /var/lib/mysql, up to mariadb-relay-bin.000002
    Temporary relay log file is /var/lib/mysql/mariadb-relay-bin.000002
    Testing mysql connection and privileges.. done.
    Testing mysqlbinlog output.. done.
    Cleaning up test file(s).. done.
Wed May 26 21:04:37 2021 - [info]   Executing command : apply_diff_relay_logs --command=test --slave_user='mhauser' --slave_host=10.0.0.28 --slave_ip=10.0.0.28 --slave_port=3306 --workdir=/data/mastermha/app1/ --target_version=10.3.28-MariaDB-log --manager_version=0.56 --relay_log_info=/var/lib/mysql/relay-log.info  --relay_dir=/var/lib/mysql/  --slave_pass=xxx
Wed May 26 21:04:37 2021 - [info]   Connecting to root@10.0.0.28(10.0.0.28:22).. 
  Checking slave recovery environment settings..
    Opening /var/lib/mysql/relay-log.info ... ok.
    Relay log found at /var/lib/mysql, up to mariadb-relay-bin.000002
    Temporary relay log file is /var/lib/mysql/mariadb-relay-bin.000002
    Testing mysql connection and privileges.. done.
    Testing mysqlbinlog output.. done.
    Cleaning up test file(s).. done.
Wed May 26 21:04:37 2021 - [info] Slaves settings check done.
Wed May 26 21:04:37 2021 - [info] 
10.0.0.8(10.0.0.8:3306) (current master)
 +--10.0.0.18(10.0.0.18:3306)
 +--10.0.0.28(10.0.0.28:3306)

Wed May 26 21:04:37 2021 - [info] Checking replication health on 10.0.0.18..
Wed May 26 21:04:37 2021 - [info]  ok.
Wed May 26 21:04:37 2021 - [info] Checking replication health on 10.0.0.28..
Wed May 26 21:04:37 2021 - [info]  ok.
Wed May 26 21:04:37 2021 - [info] Checking master_ip_failover_script status:
Wed May 26 21:04:37 2021 - [info]   /usr/local/bin/master_ip_failover --command=status --ssh_user=root --orig_master_host=10.0.0.8 --orig_master_ip=10.0.0.8 --orig_master_port=3306 

IN SCRIPT TEST====/sbin/ifconfig eth0:1 down===/sbin/ifconfig eth0:1 10.0.0.100/24;/sbin/arping -I eth0 -c 3 -s 10.0.0.100/24 10.0.0.2 >/dev/null 2>&1===

Checking the Status of the script.. OK 
Wed May 26 21:04:37 2021 - [info]  OK.
Wed May 26 21:04:37 2021 - [warning] shutdown_script is not defined.
Wed May 26 21:04:37 2021 - [info] Got exit code 0 (Not master dead).

MySQL Replication Health is OK.

# 查看状态
[root@centos7 ~]# masterha_check_status --conf=/etc/mastermha/app1.cnf
app1 is stopped(2:NOT_RUNNING).
4.9 启动MHA
# 开启MHA,默认是前台运行
nohup masterha_manager --conf=/etc/mastermha/app1.cnf &> /dev/null &
# 查看状态
masterha_check_status --conf=/etc/mastermha/app1.cnf

# nohup是忽略强制kill信号以外的信号
nohup - run a command immune to hangups, with output to a non-tty

例:

[root@centos7 ~]# masterha_manager --conf=/etc/mastermha/app1.cnf       
Wed May 26 21:09:04 2021 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Wed May 26 21:09:04 2021 - [info] Reading application default configuration from /etc/mastermha/app1.cnf..
Wed May 26 21:09:04 2021 - [info] Reading server configuration from /etc/mastermha/app1.cnf..

# 查看健康性检查
[root@master ~]# tail -f /var/lib/mysql/master.log 
210526 21:11:29     29 Query    SELECT 1 As Value
210526 21:11:30     29 Query    SELECT 1 As Value
210526 21:11:31     29 Query    SELECT 1 As Value
210526 21:11:32     29 Query    SELECT 1 As Value
210526 21:11:33     29 Query    SELECT 1 As Value
210526 21:11:34     29 Query    SELECT 1 As Value
210526 21:11:35     29 Query    SELECT 1 As Value

[root@centos7 ~]# masterha_check_status --conf=/etc/mastermha/app1.cnf
app1 (pid:1526) is running(0:PING_OK), master:10.0.0.8
4.10 排错日志
tail /data/mastermha/app1/manager.log

例:

[root@centos7 ~]# cat /data/mastermha/app1/manager.log
Wed May 26 21:09:04 2021 - [info] MHA::MasterMonitor version 0.56.
Wed May 26 21:09:05 2021 - [info] GTID failover mode = 0
Wed May 26 21:09:05 2021 - [info] Dead Servers:
Wed May 26 21:09:05 2021 - [info] Alive Servers:
Wed May 26 21:09:05 2021 - [info]   10.0.0.8(10.0.0.8:3306)
Wed May 26 21:09:05 2021 - [info]   10.0.0.18(10.0.0.18:3306)
Wed May 26 21:09:05 2021 - [info]   10.0.0.28(10.0.0.28:3306)
Wed May 26 21:09:05 2021 - [info] Alive Slaves:
Wed May 26 21:09:05 2021 - [info]   10.0.0.18(10.0.0.18:3306)  Version=10.3.28-MariaDB-log (oldest major version between slaves) log-bin:enabled
Wed May 26 21:09:05 2021 - [info]     Replicating from 10.0.0.8(10.0.0.8:3306)
Wed May 26 21:09:05 2021 - [info]     Primary candidate for the new Master (candidate_master is set)
Wed May 26 21:09:05 2021 - [info]   10.0.0.28(10.0.0.28:3306)  Version=10.3.28-MariaDB-log (oldest major version between slaves) log-bin:enabled
Wed May 26 21:09:05 2021 - [info]     Replicating from 10.0.0.8(10.0.0.8:3306)
Wed May 26 21:09:05 2021 - [info] Current Alive Master: 10.0.0.8(10.0.0.8:3306)
Wed May 26 21:09:05 2021 - [info] Checking slave configurations..
Wed May 26 21:09:05 2021 - [info] Checking replication filtering settings..
Wed May 26 21:09:05 2021 - [info]  binlog_do_db= , binlog_ignore_db= 
Wed May 26 21:09:05 2021 - [info]  Replication filtering check ok.
Wed May 26 21:09:05 2021 - [info] GTID (with auto-pos) is not supported
Wed May 26 21:09:05 2021 - [info] Starting SSH connection tests..
Wed May 26 21:09:08 2021 - [info] All SSH connection tests passed successfully.
Wed May 26 21:09:08 2021 - [info] Checking MHA Node version..
Wed May 26 21:09:08 2021 - [info]  Version check ok.
Wed May 26 21:09:08 2021 - [info] Checking SSH publickey authentication settings on the current master..
Wed May 26 21:09:08 2021 - [info] HealthCheck: SSH to 10.0.0.8 is reachable.
Wed May 26 21:09:08 2021 - [info] Master MHA Node version is 0.56.
Wed May 26 21:09:08 2021 - [info] Checking recovery script configurations on 10.0.0.8(10.0.0.8:3306)..
Wed May 26 21:09:08 2021 - [info]   Executing command: save_binary_logs --command=test --start_pos=4 --binlog_dir=/data/mysql/ --output_file=/data/mastermha/app1//save_binary_logs_test --manager_version=0.56 --start_file=mariadb-bin.000002 
Wed May 26 21:09:08 2021 - [info]   Connecting to root@10.0.0.8(10.0.0.8:22).. 
  Creating /data/mastermha/app1 if not exists..    ok.
  Checking output directory is accessible or not..
   ok.
  Binlog found at /data/mysql/, up to mariadb-bin.000002
Wed May 26 21:09:09 2021 - [info] Binlog setting check done.
Wed May 26 21:09:09 2021 - [info] Checking SSH publickey authentication and checking recovery script configurations on all alive slave servers..
Wed May 26 21:09:09 2021 - [info]   Executing command : apply_diff_relay_logs --command=test --slave_user='mhauser' --slave_host=10.0.0.18 --slave_ip=10.0.0.18 --slave_port=3306 --workdir=/data/mastermha/app1/ --target_version=10.3.28-MariaDB-log --manager_version=0.56 --relay_log_info=/var/lib/mysql/relay-log.info  --relay_dir=/var/lib/mysql/  --slave_pass=xxx
Wed May 26 21:09:09 2021 - [info]   Connecting to root@10.0.0.18(10.0.0.18:22).. 
  Checking slave recovery environment settings..
    Opening /var/lib/mysql/relay-log.info ... ok.
    Relay log found at /var/lib/mysql, up to mariadb-relay-bin.000002
    Temporary relay log file is /var/lib/mysql/mariadb-relay-bin.000002
    Testing mysql connection and privileges.. done.
    Testing mysqlbinlog output.. done.
    Cleaning up test file(s).. done.
Wed May 26 21:09:09 2021 - [info]   Executing command : apply_diff_relay_logs --command=test --slave_user='mhauser' --slave_host=10.0.0.28 --slave_ip=10.0.0.28 --slave_port=3306 --workdir=/data/mastermha/app1/ --target_version=10.3.28-MariaDB-log --manager_version=0.56 --relay_log_info=/var/lib/mysql/relay-log.info  --relay_dir=/var/lib/mysql/  --slave_pass=xxx
Wed May 26 21:09:09 2021 - [info]   Connecting to root@10.0.0.28(10.0.0.28:22).. 
  Checking slave recovery environment settings..
    Opening /var/lib/mysql/relay-log.info ... ok.
    Relay log found at /var/lib/mysql, up to mariadb-relay-bin.000002
    Temporary relay log file is /var/lib/mysql/mariadb-relay-bin.000002
    Testing mysql connection and privileges.. done.
    Testing mysqlbinlog output.. done.
    Cleaning up test file(s).. done.
Wed May 26 21:09:09 2021 - [info] Slaves settings check done.
Wed May 26 21:09:09 2021 - [info] 
10.0.0.8(10.0.0.8:3306) (current master)
 +--10.0.0.18(10.0.0.18:3306)
 +--10.0.0.28(10.0.0.28:3306)

Wed May 26 21:09:09 2021 - [info] Checking master_ip_failover_script status:
Wed May 26 21:09:09 2021 - [info]   /usr/local/bin/master_ip_failover --command=status --ssh_user=root --orig_master_host=10.0.0.8 --orig_master_ip=10.0.0.8 --orig_master_port=3306 

IN SCRIPT TEST====/sbin/ifconfig eth0:1 down===/sbin/ifconfig eth0:1 10.0.0.100/24;/sbin/arping -I eth0 -c 3 -s 10.0.0.100/24 10.0.0.2 >/dev/null 2>&1===

Checking the Status of the script.. OK 
Wed May 26 21:09:09 2021 - [info]  OK.
Wed May 26 21:09:09 2021 - [warning] shutdown_script is not defined.
Wed May 26 21:09:09 2021 - [info] Set master ping interval 1 seconds.
Wed May 26 21:09:09 2021 - [warning] secondary_check_script is not defined. It is highly recommended setting it to check master reachability from two or more routes.
Wed May 26 21:09:09 2021 - [info] Starting ping health check on 10.0.0.8(10.0.0.8:3306)..
Wed May 26 21:09:09 2021 - [info] Ping(SELECT) succeeded, waiting until MySQL doesn't respond..
4.11 模拟故障
# 当master down机后,mha自动退出
[root@master ~]# systemctl stop mariadb.service

[root@centos7 ~]# masterha_manager --conf=/etc/mastermha/app1.cnf       
Wed May 26 21:09:04 2021 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Wed May 26 21:09:04 2021 - [info] Reading application default configuration from /etc/mastermha/app1.cnf..
Wed May 26 21:09:04 2021 - [info] Reading server configuration from /etc/mastermha/app1.cnf..
  Creating /data/mastermha/app1 if not exists..    ok.
  Checking output directory is accessible or not..
   ok.
  Binlog found at /data/mysql/, up to mariadb-bin.000002
Wed May 26 21:15:26 2021 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Wed May 26 21:15:26 2021 - [info] Reading application default configuration from /etc/mastermha/app1.cnf..
Wed May 26 21:15:26 2021 - [info] Reading server configuration from /etc/mastermha/app1.cnf..
[root@centos7 ~]# masterha_check_status --conf=/etc/mastermha/app1.cnf  
app1 is stopped(2:NOT_RUNNING).

# 查看更新的日志
[root@centos7 ~]# tail -f /data/mastermha/app1/manager.log

Wed May 26 21:15:22 2021 - [warning] Got error on MySQL select ping: 2006 (MySQL server has gone away)
Wed May 26 21:15:22 2021 - [info] Executing SSH check script: save_binary_logs --command=test --start_pos=4 --binlog_dir=/data/mysql/ --output_file=/data/mastermha/app1//save_binary_logs_test --manager_version=0.56 --binlog_prefix=mariadb-bin
Wed May 26 21:15:22 2021 - [info] HealthCheck: SSH to 10.0.0.8 is reachable.
Wed May 26 21:15:23 2021 - [warning] Got error on MySQL connect: 2003 (Can't connect to MySQL server on '10.0.0.8' (111))
Wed May 26 21:15:23 2021 - [warning] Connection failed 2 time(s)..
Wed May 26 21:15:24 2021 - [warning] Got error on MySQL connect: 2003 (Can't connect to MySQL server on '10.0.0.8' (111))
Wed May 26 21:15:24 2021 - [warning] Connection failed 3 time(s)..
Wed May 26 21:15:25 2021 - [warning] Got error on MySQL connect: 2003 (Can't connect to MySQL server on '10.0.0.8' (111))
Wed May 26 21:15:25 2021 - [warning] Connection failed 4 time(s)..
Wed May 26 21:15:25 2021 - [warning] Master is not reachable from health checker!
Wed May 26 21:15:25 2021 - [warning] Master 10.0.0.8(10.0.0.8:3306) is not reachable!
Wed May 26 21:15:25 2021 - [warning] SSH is reachable.
Wed May 26 21:15:25 2021 - [info] Connecting to a master server failed. Reading configuration file /etc/masterha_default.cnf and /etc/mastermha/app1.cnf again, and trying to connect to all servers to check server status..
Wed May 26 21:15:25 2021 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Wed May 26 21:15:25 2021 - [info] Reading application default configuration from /etc/mastermha/app1.cnf..
Wed May 26 21:15:25 2021 - [info] Reading server configuration from /etc/mastermha/app1.cnf..
Wed May 26 21:15:26 2021 - [info] GTID failover mode = 0
Wed May 26 21:15:26 2021 - [info] Dead Servers:
Wed May 26 21:15:26 2021 - [info]   10.0.0.8(10.0.0.8:3306)
Wed May 26 21:15:26 2021 - [info] Alive Servers:
Wed May 26 21:15:26 2021 - [info]   10.0.0.18(10.0.0.18:3306)
Wed May 26 21:15:26 2021 - [info]   10.0.0.28(10.0.0.28:3306)
Wed May 26 21:15:26 2021 - [info] Alive Slaves:
Wed May 26 21:15:26 2021 - [info]   10.0.0.18(10.0.0.18:3306)  Version=10.3.28-MariaDB-log (oldest major version between slaves) log-bin:enabled
Wed May 26 21:15:26 2021 - [info]     Replicating from 10.0.0.8(10.0.0.8:3306)
Wed May 26 21:15:26 2021 - [info]     Primary candidate for the new Master (candidate_master is set)
Wed May 26 21:15:26 2021 - [info]   10.0.0.28(10.0.0.28:3306)  Version=10.3.28-MariaDB-log (oldest major version between slaves) log-bin:enabled
Wed May 26 21:15:26 2021 - [info]     Replicating from 10.0.0.8(10.0.0.8:3306)
Wed May 26 21:15:26 2021 - [info] Checking slave configurations..
Wed May 26 21:15:26 2021 - [info] Checking replication filtering settings..
Wed May 26 21:15:26 2021 - [info]  Replication filtering check ok.
Wed May 26 21:15:26 2021 - [info] Master is down!
Wed May 26 21:15:26 2021 - [info] Terminating monitoring script.
Wed May 26 21:15:26 2021 - [info] Got exit code 20 (Master dead).
Wed May 26 21:15:26 2021 - [info] MHA::MasterFailover version 0.56.
Wed May 26 21:15:26 2021 - [info] Starting master failover.
Wed May 26 21:15:26 2021 - [info] 
Wed May 26 21:15:26 2021 - [info] * Phase 1: Configuration Check Phase..
Wed May 26 21:15:26 2021 - [info] 
Wed May 26 21:15:27 2021 - [info] GTID failover mode = 0
Wed May 26 21:15:27 2021 - [info] Dead Servers:
Wed May 26 21:15:27 2021 - [info]   10.0.0.8(10.0.0.8:3306)
Wed May 26 21:15:27 2021 - [info] Checking master reachability via MySQL(double check)...
Wed May 26 21:15:27 2021 - [info]  ok.
Wed May 26 21:15:27 2021 - [info] Alive Servers:
Wed May 26 21:15:27 2021 - [info]   10.0.0.18(10.0.0.18:3306)
Wed May 26 21:15:27 2021 - [info]   10.0.0.28(10.0.0.28:3306)
Wed May 26 21:15:27 2021 - [info] Alive Slaves:
Wed May 26 21:15:27 2021 - [info]   10.0.0.18(10.0.0.18:3306)  Version=10.3.28-MariaDB-log (oldest major version between slaves) log-bin:enabled
Wed May 26 21:15:27 2021 - [info]     Replicating from 10.0.0.8(10.0.0.8:3306)
Wed May 26 21:15:27 2021 - [info]     Primary candidate for the new Master (candidate_master is set)
Wed May 26 21:15:27 2021 - [info]   10.0.0.28(10.0.0.28:3306)  Version=10.3.28-MariaDB-log (oldest major version between slaves) log-bin:enabled
Wed May 26 21:15:27 2021 - [info]     Replicating from 10.0.0.8(10.0.0.8:3306)
Wed May 26 21:15:27 2021 - [info] Starting Non-GTID based failover.
Wed May 26 21:15:27 2021 - [info] 
Wed May 26 21:15:27 2021 - [info] ** Phase 1: Configuration Check Phase completed.
Wed May 26 21:15:27 2021 - [info] 
Wed May 26 21:15:27 2021 - [info] * Phase 2: Dead Master Shutdown Phase..
Wed May 26 21:15:27 2021 - [info] 
Wed May 26 21:15:27 2021 - [info] Forcing shutdown so that applications never connect to the current master..
Wed May 26 21:15:27 2021 - [info] Executing master IP deactivation script:
Wed May 26 21:15:27 2021 - [info]   /usr/local/bin/master_ip_failover --orig_master_host=10.0.0.8 --orig_master_ip=10.0.0.8 --orig_master_port=3306 --command=stopssh --ssh_user=root  

IN SCRIPT TEST====/sbin/ifconfig eth0:1 down===/sbin/ifconfig eth0:1 10.0.0.100/24;/sbin/arping -I eth0 -c 3 -s 10.0.0.100/24 10.0.0.2 >/dev/null 2>&1===

Disabling the VIP on old master: 10.0.0.8 
Wed May 26 21:15:27 2021 - [info]  done.
Wed May 26 21:15:27 2021 - [warning] shutdown_script is not set. Skipping explicit shutting down of the dead master.
Wed May 26 21:15:27 2021 - [info] * Phase 2: Dead Master Shutdown Phase completed.
Wed May 26 21:15:27 2021 - [info] 
Wed May 26 21:15:27 2021 - [info] * Phase 3: Master Recovery Phase..
Wed May 26 21:15:27 2021 - [info] 
Wed May 26 21:15:27 2021 - [info] * Phase 3.1: Getting Latest Slaves Phase..
Wed May 26 21:15:27 2021 - [info] 
Wed May 26 21:15:27 2021 - [info] The latest binary log file/position on all slaves is mariadb-bin.000002:973
Wed May 26 21:15:27 2021 - [info] Latest slaves (Slaves that received relay log files to the latest):
Wed May 26 21:15:27 2021 - [info]   10.0.0.18(10.0.0.18:3306)  Version=10.3.28-MariaDB-log (oldest major version between slaves) log-bin:enabled
Wed May 26 21:15:27 2021 - [info]     Replicating from 10.0.0.8(10.0.0.8:3306)
Wed May 26 21:15:27 2021 - [info]     Primary candidate for the new Master (candidate_master is set)
Wed May 26 21:15:27 2021 - [info]   10.0.0.28(10.0.0.28:3306)  Version=10.3.28-MariaDB-log (oldest major version between slaves) log-bin:enabled
Wed May 26 21:15:27 2021 - [info]     Replicating from 10.0.0.8(10.0.0.8:3306)
Wed May 26 21:15:27 2021 - [info] The oldest binary log file/position on all slaves is mariadb-bin.000002:973
Wed May 26 21:15:27 2021 - [info] Oldest slaves:
Wed May 26 21:15:27 2021 - [info]   10.0.0.18(10.0.0.18:3306)  Version=10.3.28-MariaDB-log (oldest major version between slaves) log-bin:enabled
Wed May 26 21:15:27 2021 - [info]     Replicating from 10.0.0.8(10.0.0.8:3306)
Wed May 26 21:15:27 2021 - [info]     Primary candidate for the new Master (candidate_master is set)
Wed May 26 21:15:27 2021 - [info]   10.0.0.28(10.0.0.28:3306)  Version=10.3.28-MariaDB-log (oldest major version between slaves) log-bin:enabled
Wed May 26 21:15:27 2021 - [info]     Replicating from 10.0.0.8(10.0.0.8:3306)
Wed May 26 21:15:27 2021 - [info] 
Wed May 26 21:15:27 2021 - [info] * Phase 3.2: Saving Dead Master's Binlog Phase..
Wed May 26 21:15:27 2021 - [info] 
Wed May 26 21:15:27 2021 - [info] Fetching dead master's binary logs..
Wed May 26 21:15:27 2021 - [info] Executing command on the dead master 10.0.0.8(10.0.0.8:3306): save_binary_logs --command=save --start_file=mariadb-bin.000002  --start_pos=973 --binlog_dir=/data/mysql/ --output_file=/data/mastermha/app1//saved_master_binlog_from_10.0.0.8_3306_20210526211526.binlog --handle_raw_binlog=1 --disable_log_bin=0 --manager_version=0.56
  Creating /data/mastermha/app1 if not exists..    ok.
 Concat binary/relay logs from mariadb-bin.000002 pos 973 to mariadb-bin.000002 EOF into /data/mastermha/app1//saved_master_binlog_from_10.0.0.8_3306_20210526211526.binlog ..
 Binlog Checksum enabled
  Dumping binlog format description event, from position 0 to 256.. ok.
  Dumping effective binlog data from /data/mysql//mariadb-bin.000002 position 973 to tail(996).. ok.
 Binlog Checksum enabled
 Concat succeeded.
Wed May 26 21:15:28 2021 - [info] scp from root@10.0.0.8:/data/mastermha/app1//saved_master_binlog_from_10.0.0.8_3306_20210526211526.binlog to local:/data/mastermha/app1//saved_master_binlog_from_10.0.0.8_3306_20210526211526.binlog succeeded.
Wed May 26 21:15:28 2021 - [info] HealthCheck: SSH to 10.0.0.18 is reachable.
Wed May 26 21:15:28 2021 - [info] HealthCheck: SSH to 10.0.0.28 is reachable.
Wed May 26 21:15:28 2021 - [info] 
Wed May 26 21:15:28 2021 - [info] * Phase 3.3: Determining New Master Phase..
Wed May 26 21:15:28 2021 - [info] 
Wed May 26 21:15:28 2021 - [info] Finding the latest slave that has all relay logs for recovering other slaves..
Wed May 26 21:15:28 2021 - [info] All slaves received relay logs to the same position. No need to resync each other.
Wed May 26 21:15:28 2021 - [info] Searching new master from slaves..
Wed May 26 21:15:28 2021 - [info]  Candidate masters from the configuration file:
Wed May 26 21:15:28 2021 - [info]   10.0.0.18(10.0.0.18:3306)  Version=10.3.28-MariaDB-log (oldest major version between slaves) log-bin:enabled
Wed May 26 21:15:28 2021 - [info]     Replicating from 10.0.0.8(10.0.0.8:3306)
Wed May 26 21:15:28 2021 - [info]     Primary candidate for the new Master (candidate_master is set)
Wed May 26 21:15:28 2021 - [info]  Non-candidate masters:
Wed May 26 21:15:28 2021 - [info]  Searching from candidate_master slaves which have received the latest relay log events..
Wed May 26 21:15:28 2021 - [info] New master is 10.0.0.18(10.0.0.18:3306)
Wed May 26 21:15:28 2021 - [info] Starting master failover..
Wed May 26 21:15:28 2021 - [info] 
From:
10.0.0.8(10.0.0.8:3306) (current master)
 +--10.0.0.18(10.0.0.18:3306)
 +--10.0.0.28(10.0.0.28:3306)

To:
10.0.0.18(10.0.0.18:3306) (new master)
 +--10.0.0.28(10.0.0.28:3306)
Wed May 26 21:15:28 2021 - [info] 
Wed May 26 21:15:28 2021 - [info] * Phase 3.3: New Master Diff Log Generation Phase..
Wed May 26 21:15:28 2021 - [info] 
Wed May 26 21:15:28 2021 - [info]  This server has all relay logs. No need to generate diff files from the latest slave.
Wed May 26 21:15:28 2021 - [info] Sending binlog..
Wed May 26 21:15:29 2021 - [info] scp from local:/data/mastermha/app1//saved_master_binlog_from_10.0.0.8_3306_20210526211526.binlog to root@10.0.0.18:/data/mastermha/app1//saved_master_binlog_from_10.0.0.8_3306_20210526211526.binlog succeeded.
Wed May 26 21:15:29 2021 - [info] 
Wed May 26 21:15:29 2021 - [info] * Phase 3.4: Master Log Apply Phase..
Wed May 26 21:15:29 2021 - [info] 
Wed May 26 21:15:29 2021 - [info] *NOTICE: If any error happens from this phase, manual recovery is needed.
Wed May 26 21:15:29 2021 - [info] Starting recovery on 10.0.0.18(10.0.0.18:3306)..
Wed May 26 21:15:29 2021 - [info]  Generating diffs succeeded.
Wed May 26 21:15:29 2021 - [info] Waiting until all relay logs are applied.
Wed May 26 21:15:29 2021 - [info]  done.
Wed May 26 21:15:29 2021 - [info] Getting slave status..
Wed May 26 21:15:29 2021 - [info] This slave(10.0.0.18)'s Exec_Master_Log_Pos equals to Read_Master_Log_Pos(mariadb-bin.000002:973). No need to recover from Exec_Master_Log_Pos.
Wed May 26 21:15:29 2021 - [info] Connecting to the target slave host 10.0.0.18, running recover script..
Wed May 26 21:15:29 2021 - [info] Executing command: apply_diff_relay_logs --command=apply --slave_user='mhauser' --slave_host=10.0.0.18 --slave_ip=10.0.0.18  --slave_port=3306 --apply_files=/data/mastermha/app1//saved_master_binlog_from_10.0.0.8_3306_20210526211526.binlog --workdir=/data/mastermha/app1/ --target_version=10.3.28-MariaDB-log --timestamp=20210526211526 --handle_raw_binlog=1 --disable_log_bin=0 --manager_version=0.56 --slave_pass=xxx
Wed May 26 21:15:29 2021 - [info] 
MySQL client version is 10.3.28. Using --binary-mode.
Applying differential binary/relay log files /data/mastermha/app1//saved_master_binlog_from_10.0.0.8_3306_20210526211526.binlog on 10.0.0.18:3306. This may take long time...
Applying log files succeeded.
Wed May 26 21:15:29 2021 - [info]  All relay logs were successfully applied.
Wed May 26 21:15:29 2021 - [info] Getting new master's binlog name and position..
Wed May 26 21:15:29 2021 - [info]  mariadb-bin.000002:344
Wed May 26 21:15:29 2021 - [info]  All other slaves should start replication from here. Statement should be: CHANGE MASTER TO MASTER_HOST='10.0.0.18', MASTER_PORT=3306, MASTER_LOG_FILE='mariadb-bin.000002', MASTER_LOG_POS=344, MASTER_USER='repluser', MASTER_PASSWORD='xxx';
Wed May 26 21:15:29 2021 - [info] Executing master IP activate script:
Wed May 26 21:15:29 2021 - [info]   /usr/local/bin/master_ip_failover --command=start --ssh_user=root --orig_master_host=10.0.0.8 --orig_master_ip=10.0.0.8 --orig_master_port=3306 --new_master_host=10.0.0.18 --new_master_ip=10.0.0.18 --new_master_port=3306 --new_master_user='mhauser' --new_master_password='waluna'  
Unknown option: new_master_user
Unknown option: new_master_password

IN SCRIPT TEST====/sbin/ifconfig eth0:1 down===/sbin/ifconfig eth0:1 10.0.0.100/24;/sbin/arping -I eth0 -c 3 -s 10.0.0.100/24 10.0.0.2 >/dev/null 2>&1===

Enabling the VIP - 10.0.0.100/24 on the new master - 10.0.0.18 
Wed May 26 21:15:29 2021 - [info]  OK.
Wed May 26 21:15:29 2021 - [info] Setting read_only=0 on 10.0.0.18(10.0.0.18:3306)..
Wed May 26 21:15:29 2021 - [info]  ok.
Wed May 26 21:15:29 2021 - [info] ** Finished master recovery successfully.
Wed May 26 21:15:29 2021 - [info] * Phase 3: Master Recovery Phase completed.
Wed May 26 21:15:29 2021 - [info] 
Wed May 26 21:15:29 2021 - [info] * Phase 4: Slaves Recovery Phase..
Wed May 26 21:15:29 2021 - [info] 
Wed May 26 21:15:29 2021 - [info] * Phase 4.1: Starting Parallel Slave Diff Log Generation Phase..
Wed May 26 21:15:29 2021 - [info] 
Wed May 26 21:15:29 2021 - [info] -- Slave diff file generation on host 10.0.0.28(10.0.0.28:3306) started, pid: 2029. Check tmp log /data/mastermha/app1//10.0.0.28_3306_20210526211526.log if it takes time..
Wed May 26 21:15:30 2021 - [info] 
Wed May 26 21:15:30 2021 - [info] Log messages from 10.0.0.28 ...
Wed May 26 21:15:30 2021 - [info] 
Wed May 26 21:15:29 2021 - [info]  This server has all relay logs. No need to generate diff files from the latest slave.
Wed May 26 21:15:30 2021 - [info] End of log messages from 10.0.0.28.
Wed May 26 21:15:30 2021 - [info] -- 10.0.0.28(10.0.0.28:3306) has the latest relay log events.
Wed May 26 21:15:30 2021 - [info] Generating relay diff files from the latest slave succeeded.
Wed May 26 21:15:30 2021 - [info] 
Wed May 26 21:15:30 2021 - [info] * Phase 4.2: Starting Parallel Slave Log Apply Phase..
Wed May 26 21:15:30 2021 - [info] 
Wed May 26 21:15:30 2021 - [info] -- Slave recovery on host 10.0.0.28(10.0.0.28:3306) started, pid: 2031. Check tmp log /data/mastermha/app1//10.0.0.28_3306_20210526211526.log if it takes time..
Wed May 26 21:15:31 2021 - [info] 
Wed May 26 21:15:31 2021 - [info] Log messages from 10.0.0.28 ...
Wed May 26 21:15:31 2021 - [info] 
Wed May 26 21:15:30 2021 - [info] Sending binlog..
Wed May 26 21:15:30 2021 - [info] scp from local:/data/mastermha/app1//saved_master_binlog_from_10.0.0.8_3306_20210526211526.binlog to root@10.0.0.28:/data/mastermha/app1//saved_master_binlog_from_10.0.0.8_3306_20210526211526.binlog succeeded.
Wed May 26 21:15:30 2021 - [info] Starting recovery on 10.0.0.28(10.0.0.28:3306)..
Wed May 26 21:15:30 2021 - [info]  Generating diffs succeeded.
Wed May 26 21:15:30 2021 - [info] Waiting until all relay logs are applied.
Wed May 26 21:15:30 2021 - [info]  done.
Wed May 26 21:15:30 2021 - [info] Getting slave status..
Wed May 26 21:15:30 2021 - [info] This slave(10.0.0.28)'s Exec_Master_Log_Pos equals to Read_Master_Log_Pos(mariadb-bin.000002:973). No need to recover from Exec_Master_Log_Pos.
Wed May 26 21:15:30 2021 - [info] Connecting to the target slave host 10.0.0.28, running recover script..
Wed May 26 21:15:30 2021 - [info] Executing command: apply_diff_relay_logs --command=apply --slave_user='mhauser' --slave_host=10.0.0.28 --slave_ip=10.0.0.28  --slave_port=3306 --apply_files=/data/mastermha/app1//saved_master_binlog_from_10.0.0.8_3306_20210526211526.binlog --workdir=/data/mastermha/app1/ --target_version=10.3.28-MariaDB-log --timestamp=20210526211526 --handle_raw_binlog=1 --disable_log_bin=0 --manager_version=0.56 --slave_pass=xxx
Wed May 26 21:15:31 2021 - [info] 
MySQL client version is 10.3.28. Using --binary-mode.
Applying differential binary/relay log files /data/mastermha/app1//saved_master_binlog_from_10.0.0.8_3306_20210526211526.binlog on 10.0.0.28:3306. This may take long time...
Applying log files succeeded.
Wed May 26 21:15:31 2021 - [info]  All relay logs were successfully applied.
Wed May 26 21:15:31 2021 - [info]  Resetting slave 10.0.0.28(10.0.0.28:3306) and starting replication from the new master 10.0.0.18(10.0.0.18:3306)..
Wed May 26 21:15:31 2021 - [info]  Executed CHANGE MASTER.
Wed May 26 21:15:31 2021 - [info]  Slave started.
Wed May 26 21:15:31 2021 - [info] End of log messages from 10.0.0.28.
Wed May 26 21:15:31 2021 - [info] -- Slave recovery on host 10.0.0.28(10.0.0.28:3306) succeeded.
Wed May 26 21:15:31 2021 - [info] All new slave servers recovered successfully.
Wed May 26 21:15:31 2021 - [info] 
Wed May 26 21:15:31 2021 - [info] * Phase 5: New master cleanup phase..
Wed May 26 21:15:31 2021 - [info] 
Wed May 26 21:15:31 2021 - [info] Resetting slave info on the new master..
Wed May 26 21:15:31 2021 - [info]  10.0.0.18: Resetting slave info succeeded.
Wed May 26 21:15:31 2021 - [info] Master failover to 10.0.0.18(10.0.0.18:3306) completed successfully.
Wed May 26 21:15:31 2021 - [info] 

----- Failover Report -----

app1: MySQL Master failover 10.0.0.8(10.0.0.8:3306) to 10.0.0.18(10.0.0.18:3306) succeeded

Master 10.0.0.8(10.0.0.8:3306) is down!

Check MHA Manager logs at centos7.waluna.top:/data/mastermha/app1/manager.log for details.

Started automated(non-interactive) failover.
Invalidated master IP address on 10.0.0.8(10.0.0.8:3306)
The latest slave 10.0.0.18(10.0.0.18:3306) has all relay logs for recovery.
Selected 10.0.0.18(10.0.0.18:3306) as a new master.
10.0.0.18(10.0.0.18:3306): OK: Applying all logs succeeded.
10.0.0.18(10.0.0.18:3306): OK: Activated master IP address.
10.0.0.28(10.0.0.28:3306): This host has the latest relay log events.
Generating relay diff files from the latest slave succeeded.
10.0.0.28(10.0.0.28:3306): OK: Applying all logs succeeded. Slave started, replicating from 10.0.0.18(10.0.0.18:3306)
10.0.0.18(10.0.0.18:3306): Resetting slave info succeeded.
Master failover to 10.0.0.18(10.0.0.18:3306) completed successfully.
Wed May 26 21:15:31 2021 - [info] Sending mail..
/usr/local/bin/sendmail.sh: line 14: mail: command not found
Wed May 26 21:15:31 2021 - [error][/usr/share/perl5/vendor_perl/MHA/MasterFailover.pm, ln2065] Failed to send mail with return code 127:0

# 验证VIP漂移至新的master上
[root@slave1 ~]# ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
    inet 127.0.0.1/8 scope host lo
       valid_lft forever preferred_lft forever
    inet6 ::1/128 scope host 
       valid_lft forever preferred_lft forever
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP group default qlen 1000
    link/ether 00:0c:29:91:72:d3 brd ff:ff:ff:ff:ff:ff
    inet 10.0.0.18/24 brd 10.0.0.255 scope global noprefixroute eth0
       valid_lft forever preferred_lft forever
    inet 10.0.0.100/24 brd 10.0.0.255 scope global secondary eth0:1
       valid_lft forever preferred_lft forever
    inet6 fe80::20c:29ff:fe91:72d3/64 scope link 
       valid_lft forever preferred_lft forever
[root@slave1 ~]# 

# 查看read_only变量
[root@slave1 ~]# mysql
MariaDB [(none)]> select @@read_only;
+-------------+
| @@read_only |
+-------------+
|           0 |
+-------------+
1 row in set (0.000 sec)
# 记得将配置文件read-only删除或者设为=0或者注释掉,否则重启会出问题
[root@slave1 ~]# vim /etc/my.cnf.d/mariadb-server.cnf
[mysqld]
#read-only

# 验证成功将slave1设置为新主
[root@slave1 ~]# mysql
MariaDB [(none)]> show slave status\G
Empty set (0.000 sec)

[root@slave2 ~]# mysql
MariaDB [(none)]> show slave status\G
*************************** 1. row ***************************
                Slave_IO_State: Waiting for master to send event
                   Master_Host: 10.0.0.18       # 可以看出主服务器为10.0.0.18,表示成功

正常情况会受到报警邮件,这里没有配置邮件,所以收不到

5 Ansible常用模块总结

2015年底270多个模块,2016年达到540个,2018年01月12日有1378个模块,2018年07月15日1852个模块,2019年05月25日(ansible 2.7.10)时2080个模块,2020年03月02日有3387个模块

虽然模块众多,但最常用的模块也就2,30个而已,针对特定业务只用10几个模块

常用模块帮助文档参考:

https://docs.ansible.com/ansible/2.9/modules/modules_by_category.html
https://docs.ansible.com/ansible/2.9/modules/list_of_all_modules.html

https://docs.ansible.com/ansible/latest/modules/list_of_all_modules.html
https://docs.ansible.com/ansible/latest/modules/modules_by_category.html

5.1 Command模块

功能:在远程主机执行命令,此为默认模块,可忽略-m选项
注意:此命令不支持$VARNAME < > | ; &等,可以用shell模块实现

[root@ansible ~]# ansible websrvs -m command -a 'chdir=/etc cat centos-release'
10.0.0.6 | CHANGED | rc=0 >>
CentOS release 6.10 (Final)
10.0.0.7 | CHANGED | rc=0 >>
CentOS Linux release 7.9.2009 (Core)

[root@ansible ~]# ansible websrvs -m command -a 'chdir=/data touch f1.txt'
[WARNING]: Consider using the file module with state=touch rather than running 'touch'.
If you need to use command because file is insufficient you can add 'warn: false' to
this command task or set 'command_warnings=False' in ansible.cfg to get rid of this
message.
10.0.0.6 | CHANGED | rc=0 >>

10.0.0.7 | CHANGED | rc=0 >>

[root@ansible ~]# ansible websrvs -m command -a 'ls /data'
10.0.0.6 | CHANGED | rc=0 >>
f1.txt
lost+found
10.0.0.7 | CHANGED | rc=0 >>
f1.txt

# chdir进入目录,create表示如果/data/f1.txt存在,则不执行命令,不存在,就执行
[root@ansible ~]# ansible websrvs -m command -a 'chdir=/etc creates=/data/f1.txt cat centos-release'
10.0.0.6 | CHANGED | rc=0 >>
CentOS release 6.10 (Final)
10.0.0.7 | CHANGED | rc=0 >>
CentOS Linux release 7.9.2009 (Core)

# removes表示文件存在,则执行命令,不存在就不执行
[root@ansible ~]# ansible websrvs -m command -a 'chdir=/etc removes=/data/f1.txt cat centos-release'
10.0.0.6 | SUCCESS | rc=0 >>
skipped, since /data/f1.txt does not exist
10.0.0.7 | SUCCESS | rc=0 >>
skipped, since /data/f1.txt does not exist

ansible websrvs -m command -a 'service vsftpd start'

# command不支持| 所以显示面的话 并不会执行
[root@ansible ~]# ansible websrvs -m command -a 'echo waluna|passwd --stdin luna'
10.0.0.6 | CHANGED | rc=0 >>
waluna|passwd --stdin luna
10.0.0.7 | CHANGED | rc=0 >>
waluna|passwd --stdin luna

# 真会删除
[root@ansible ~]# ansible websrvs -m command -a 'rm -rf /data/'
[WARNING]: Consider using the file module with state=absent rather than running 'rm'.
If you need to use command because file is insufficient you can add 'warn: false' to
this command task or set 'command_warnings=False' in ansible.cfg to get rid of this
message.
10.0.0.6 | FAILED | rc=1 >>
rm: cannot remove `/data': Device or resource busynon-zero return code
10.0.0.7 | FAILED | rc=1 >>
rm: cannot remove ‘/data/’: Device or resource busynon-zero return code

# command不支持> < 所以不成功
[root@ansible ~]# ansible websrvs -m command -a 'echo hello > /data/hello.log'
10.0.0.6 | CHANGED | rc=0 >>
hello > /data/hello.log
10.0.0.7 | CHANGED | rc=0 >>
hello > /data/hello.log

# command不支持变量,所以显示本机的主机名
[root@ansible ~]# ansible websrvs -m command -a "echo $HOSTNAME"
10.0.0.6 | CHANGED | rc=0 >>
ansible
10.0.0.7 | CHANGED | rc=0 >>
ansible
5.2 Shel模块

功能:和command相似,用shell执行命令,支持各种符号,比如:*,$,>等

[root@ansible ~]# ansible websrvs -m shell -a 'echo $HOSTNAME'
10.0.0.7 | CHANGED | rc=0 >>
centos7.waluna.top
10.0.0.8 | CHANGED | rc=0 >>
centos8.waluna.top
[root@ansible ~]# ansible websrvs -m shell -a 'echo waluna | passwd --stdin test'
10.0.0.7 | CHANGED | rc=0 >>
Changing password for user test.
passwd: all authentication tokens updated successfully.
10.0.0.8 | CHANGED | rc=0 >>
Changing password for user test.
passwd: all authentication tokens updated successfully.
[root@ansible ~]# ansible websrvs -m shell -a 'echo hello > /data/hello.log'
10.0.0.7 | CHANGED | rc=0 >>

10.0.0.8 | CHANGED | rc=0 >>

[root@ansible ~]# ansible websrvs -m shell -a 'cat /data/hello.log'         
10.0.0.7 | CHANGED | rc=0 >>
hello
10.0.0.8 | CHANGED | rc=0 >>
hello

注意:调用bash执行命令类似cat /tmp/test.md | awk -F '|' '(print $1,$2}' &> /tmp/example.txt这些复杂命令,即使使用shell也可能会失败,解决办法:写到脚本里,copy到远程,执行,再把需要的结果拉回执行命令的机器

将shell模块代替command,设为默认模块

[root@ansible ~]# vim /etc/ansible/ansible.cfg
module_name = shell
5.3 Script模块

功能:在远程主机上运行ansible服务器上的脚本(无需执行权限)

ansible websrvs -m script -a /data/test.sh
5.4 Copy模块

功能:从ansible服务器主控端复制文件到远程主机

# 如目标存在,默认覆盖,此处指定先备份
ansible websrvs -m copy -a "src=/root/test1.sh dest=/tmp/test2.sh owner=luna mode=600 backup=yes"

# 指定内容,直接生成目标文件
ansible websrvs -m copy -a "content='test line1\ntest line2' dest=/tmp/test.txt"

# 复制/etc目录自身,注意/etc/后面没有/
ansible websrvs -m copy -a "src=/etc dest=/backup"

# 复制/etc/下的文件,不包括/etc/目录自身,注意/etc/后面有/

ansible websrvs -m copy -a "src=/etc/ dest=/backup"
5.5 Fetch模块

功能:从远程主机提取文件之ansible的主控端,copy相反,目前不支持目录

ansible websrvs -m fetch -a 'src=/root/test.sh dest=/data/scripts'

例:

[root@ansible ~]# ansible all -m fetch -a 'src=/etc/redhat-release dest=/data/os'
[root@ansible ~]# tree /data/os/
/data/os/
├── 10.0.0.6
│   └── etc
│       └── redhat-release
├── 10.0.0.7
│   └── etc
│       └── redhat-release
└── 10.0.0.8
    └── etc
        └── redhat-release

6 directories, 3 file
5.6 File模块

功能:设置文件属性吗,创建软连接等

# 创建空文件
ansible all -m file -a 'path=/data/test.txt state=touch'
ansible all -m file -a 'path=/data/test.txt state=absent'
ansible all -m file -a 'path=/root/test.sh state=touch'
ansible all -m file -a "path=/root/test.sh owner=luna mode=755"
ansible all -a 'ls -l /root'  # 不允许使用别名,注意使用别名会报错。
# 创建目录
ansible all -a 'useradd mysql'
ansible all -m file -a "path=/data/mysql state=directory owner=mysql group=mysql"
ansible all -a 'ls -ld /data/mysql'
# 创建软链接
ansible all -m file -a 'src=/data/testfile path|dest|name=/data/testfile-link state=link'
ansible all -m file -a 'src=/root/test.sh name=/root/test-link state=link'
ansible all -a 'ls -l /root' 
# 创建目录
ansible all -m file -a 'path=/data/testdir state=directory'
ansible all -a 'ls -ld /data/testdir'
# 递归修改目录属性
ansible all -m file -a "path=/data/mysql state=directory owner=mysql group=mysql recurse=yes"
ansible all -a 'ls -ld /data/mysql'
5.7 unarchive模块

功能:解包解压缩

实现有两种用法:

1、将ansible主机上的压缩包传到远程主机后解压缩至特定目录,设置copy=yes
2、将远程主机上的某个压缩包解压缩到指定路径下,设置copy=no

常见参数:

copy:默认为yes,当copy=yes,拷贝的文件是从ansible主机复制到远程主机上,如果设置为copy=no,会在远程主机上寻找src源文件
remote_src:和copy功能一样且互斥,yes表示在远程主机,不在ansible主机,no表示文件在ansible主机上
src:源路径,可以是ansible主机上的路径,也可以是远程主机(被管理端或者第三方主机)上的路径,如果是远程主机上的路径,则需要设置copy=no
dest:远程主机上的目标路径
mode:设置解压缩后的文件权限

例:

ansible all -m unarchive -a 'src=/data/foo.tgz dest=/var/lib/foo owner=luna group=bin'
ansible all -m unarchive -a 'src=/tmp/foo.zip dest=/data copy=no mode=0777'
ansible all -m unarchive -a 'src=https://example.com/example.zip dest=/data copy=no'

ansible websrvs -m unarchive -a 'src=https://releases.ansible.com/ansible/ansible-2.1.6.0-0.1.rc1.tar.gz dest=/data/owner=mysql remote_src=yes'
5.8 Archive模块

功能:打包压缩保存在被管理节点、

ansible websrvs -m archive -a 'path=/var/log/ dest=/data/log.tar.bz2 format=bz2 owner=luna mode-0600'
5.9 Hostname模块

功能:管理主机名

ansible node1 -m hostname -a "name=websrv"
ansible 10.0.0.18 -m hostname -a 'name=node18.waluna.top'
5.10 Cron模块

功能:计划任务
支持时间:minute,hour,day,month,weekday

# 备份数据库脚本
[root@centos8 ~]# cat mysql_backup.sh
#!/bin/bash
mysqldump -A -F --single-transaction --master-data=2 -q -uroot|gzip > /data/mysql+`date +%F_%T`.sql.gz

# 创建任务
ansible 10.0.0.8 -m cron -a 'hour=2 minute=30 weekday=1-5 name="backup mysql" job=/root/mysql_backup.sh'
ansible websrvs -m cron -a "minute=*/5 job='/usr/sbin/ntpdate ntp.aliyun.com &> /dev/null' name=Synctime"

# 禁用计划任务
ansible websrvs -m cron -a "minute=*/5 job='/usr/sbin/ntpdate ntp.aliyun.com &> /dev/null' name=synctime disabled=yes"

#启用计划任务
ansible websrvs -m cron -a "minute=*/5 job='/usr/sbin/ntpdate ntp.aliyun.com &> /dev/null' name=synctime disabled=no"

# 删除任务
ansible websrvs -m cron -a "name='backup mysql' state=absent"
ansible websrvs -m cron -a 'state=absent name=Synctime'
5.11 Yum和Apt模块

功能:

yum管理软件包,只支持RHEL,Centos,fedora,不支持Ubuntu其它版本

apt模块管理Debian相关版本的软件包

ansible websrvs -m yum -a 'name=httpd state=present'  # 安装
ansible websrvs -m yum -a 'name=httpd state=absent'       # 删除

[root@ansible ~]# ansible websrvs -m yum -a 'name=sl,cowsay'

范例:

[root@ansible ~]# ansible 10.0.0.100 -m apt -a
'name=bb,sl,cowsay,cmatrix,oneko,hollywood,boxes,libaa-bin,x11-apps'
[root@ansible ~]# ansible websrvs -m apt -a 'name=rsync,psmisc state=absent'
5.12 Service模块

功能:管理服务

ansible all -m service -a 'name=httpd state=started enabled=yes'
ansible all -m service -a 'name=httpd state=stopped'
ansible all -m service -a 'name=httpd state=reloaded'
ansible all -m shell -a "sed -i 's/^Listen 80/Listen 8080/' /etc/httpd/conf/httpd.conf"
ansible all -m service -a 'name=httpd state=restarted'
5.13 User模块

功能:管理用户

# 创建用户
ansible all -m user -a 'name=user1 comment="test user" uid=2048 home=/app/user1 group=root'

ansible all -m user -a 'name=nginx comment=nginx uid=88 group=nginx groups="root, daemon"  shell=/sbin/nologin system=yes create_home=no home=/data/nginx non_unique=yes'

# remove=yes表示删除用户及家目录等数据,默认remove=no
ansible all -m user -a 'name=nginx state=absent remove=yes'
5.14 Group模块

功能:管理组

ansible websrvs -m group -a 'name=nginx gid=88 system=yes'    # 创建组
ansible websrvs -m group -a 'name=nginx state=absent"     # 删除组
5.15 Lineinfile模块

ansible在使用sed进行替换时,经常会遇到需要转义的问题,而且ansible在遇到特殊符号进行替换时,存在问题,无法正常进行替换。其实在ansible自身提供了两个模块:lineinfile模块和replace模块,可以方便的进行替换

一般在ansible当中去修改某个文件的单行进行替换的时候需要使用lineinfile模块

regexp参数:使用正则表达式匹配对应的行,当替换文本时,如果有多行文本都能被匹配,则只有最后面被匹配到的那行文本才会被替换,当删除文本时,如果有多行文本都能被匹配,这么这些行都会被删除。

如果想进行多行匹配进行替换需要使用replace模块

功能:相当于sed,可以修改文件内容

ansible websrvs -m lineinfile -a "path=/etc/httpd/conf/httpd.conf regexp='^Listen' line='Listen 80'"

ansible all -m lineinfile -a "path=/etc/selinux/config regexp='^SELINUX=' line='SELINUX=disabled'"

ansible all -m lineinfile -a 'dest=/etc/fstab state=absent regexp="^#"'
5.16 Replace模块

该模块有点类似于sed命令,主要也是基于正则进行匹配和替换,建议使用

ansible all -m replace -a "path=/etc/fstab regexp='^(UUID.*)' replace='#\1'"
ansible all -m replace -a "path=/etc/fstab regexp='^#(UUID.*)' replace='\1'"
5.17 Setup模块

功能:setup模块来收集主机的系统信息,这些facts 信息可以直接以变量的形式使用,但是如果主机较多,会影响执行速度,可以使用gather_facts: no来禁止Ansible收集facts信息

ansible all -m setup
ansible all -m setup -a "filter=ansible_nodename"     # 主机名全称
ansible all -m setup -a "filter=ansible_hostname"     # 主机名
ansible all -m setup -a "filter=ansible_domain"           # 域名
ansible all -m setup -a "filter=ansible_memtotal_mb"  # 内存总大小
ansible all -m setup -a "filter=ansible_memory_mb"        # 内存使用情况
ansible all -m setup -a "filter=ansible_memfree_mb"       # 空闲内存大小
ansible all -m setup -a "filter=ansible_os_family"        # 系统版本家族
ansible all -m setup -a "filter=ansible_distribution_major_version"   # 主版本号
ansible all -m setup -a "filter=ansible_distribution_version"     # 版本号
ansible all -m setup -a "filter=ansible_processor_vcpus"      # CPU核心数
ansible all -m setup -a "filter=ansible_all_ipv4_addresses"       # 所有IPv4地址
ansible all -m setup -a "filter=ansible_architecture" # 系统架构
ansible all -m setup -a "filter=ansible_processor"        # CPU核心

查看python版本

[root@ansible ~]# ansible all -m setup -a 'filter=ansible_python_version'
10.0.0.7 | SUCCESS => {
    "ansible_facts": {
        "ansible_python_version": "2.7.5",
        "discovered_interpreter_python": "/usr/bin/python"
    },
    "changed": false
}
10.0.0.8 | SUCCESS => {
    "ansible_facts": {
        "ansible_python_version": "3.6.8",
        "discovered_interpreter_python": "/usr/libexec/platform-python"
    },
    "changed": false
}
10.0.0.9 | SUCCESS => {
    "ansible_facts": {
        "ansible_python_version": "3.9.5",
        "discovered_interpreter_python": "/usr/bin/python3"
    },
    "changed": false
}
10.0.0.6 | SUCCESS => {
    "ansible_facts": {
        "ansible_python_version": "2.6.6",
        "discovered_interpreter_python": "/usr/bin/python"
    },
    "changed": false
}

取IP地址

# 取所有IPv4地址
[root@ansible ~]# ansible 10.0.0.8 -m setup -a 'filter=ansible_all_ipv4_addresses'
10.0.0.8 | SUCCESS => {
    "ansible_facts": {
        "ansible_all_ipv4_addresses": [
            "10.0.0.8",
            "192.168.0.8",
            "172.16.0.8"
        ],
        "discovered_interpreter_python": "/usr/libexec/platform-python"
    },
    "changed": false
}

# 取默认IP
[root@ansible ~]# ansible 10.0.0.8 -m setup -a 'filter=ansible_default_ipv4'
10.0.0.8 | SUCCESS => {
    "ansible_facts": {
        "ansible_default_ipv4": {
            "address": "10.0.0.8",
            "alias": "eth0",
            "broadcast": "10.0.0.255",
            "gateway": "10.0.0.2",
            "interface": "eth0",
            "macaddress": "00:0c:29:19:bf:73",
            "mtu": 1500,
            "netmask": "255.255.255.0",
            "network": "10.0.0.0",
            "type": "ether"
        },
        "discovered_interpreter_python": "/usr/libexec/platform-python"
    },
    "changed": false
}