MySQL主从复制与集群部署方案

软件部署

MySQL主从复制与集群部署方案

MySQL是最流行的开源关系型数据库之一。本文将详细介绍MySQL的高可用架构设计和部署方法。

主从复制架构

1. 异步复制

主服务器配置

# /etc/mysql/my.cnf
[mysqld]
server-id = 1
log_bin = /var/log/mysql/mysql-bin
binlog_format = ROW
binlog_row_image = FULL
expire_logs_days = 7
max_binlog_size = 500M

# GTID配置(推荐)
gtid_mode = ON
enforce_gtid_consistency = ON

# 复制优化
sync_binlog = 1
innodb_flush_log_at_trx_commit = 1

从服务器配置

# /etc/mysql/my.cnf
[mysqld]
server-id = 2
relay_log = /var/log/mysql/mysql-relay-bin
log_bin = /var/log/mysql/mysql-bin
read_only = 1

# GTID配置
gtid_mode = ON
enforce_gtid_consistency = ON

创建复制用户

-- 在主服务器上执行
CREATE USER 'replica'@'%' IDENTIFIED BY 'StrongPassword123!';
GRANT REPLICATION SLAVE ON *.* TO 'replica'@'%';
FLUSH PRIVILEGES;

-- 查看主服务器状态
SHOW MASTER STATUS;
-- 记录File和Position

配置从服务器

-- 方法1:基于位置
CHANGE MASTER TO
  MASTER_HOST='master_host',
  MASTER_USER='replica',
  MASTER_PASSWORD='StrongPassword123!',
  MASTER_LOG_FILE='mysql-bin.000001',
  MASTER_LOG_POS=154;

-- 方法2:基于GTID(推荐)
CHANGE MASTER TO
  MASTER_HOST='master_host',
  MASTER_USER='replica',
  MASTER_PASSWORD='StrongPassword123!',
  MASTER_AUTO_POSITION = 1;

-- 启动复制
START SLAVE;

-- 查看复制状态
SHOW SLAVE STATUS\G

2. 半同步复制

主服务器配置

[mysqld]
# 半同步复制插件
plugin_load = "rpl_semi_sync_master=semisync_master.so"
rpl_semi_sync_master_enabled = 1
rpl_semi_sync_master_timeout = 1000
rpl_semi_sync_master_wait_for_slave_count = 1

从服务器配置

[mysqld]
plugin_load = "rpl_semi_sync_slave=semisync_slave.so"
rpl_semi_sync_slave_enabled = 1

读写分离实现

1. 使用ProxySQL

安装ProxySQL

# CentOS/RHEL
yum install proxysql

# Ubuntu/Debian
apt-get install proxysql

# 启动服务
systemctl start proxysql
systemctl enable proxysql

配置ProxySQL

-- 登录ProxySQL管理接口
mysql -u admin -padmin -h 127.0.0.1 -P 6032

-- 添加后端服务器
INSERT INTO mysql_servers(hostgroup_id, hostname, port, weight) 
VALUES 
  (10, 'master_ip', 3306, 1),    -- 写组
  (20, 'slave1_ip', 3306, 1),    -- 读组
  (20, 'slave2_ip', 3306, 1);    -- 读组

LOAD MYSQL SERVERS TO RUNTIME;
SAVE MYSQL SERVERS TO DISK;

-- 添加监控用户
INSERT INTO mysql_users(username, password, default_hostgroup) 
VALUES ('app_user', 'password', 10);

LOAD MYSQL USERS TO RUNTIME;
SAVE MYSQL USERS TO DISK;

-- 配置读写分离规则
INSERT INTO mysql_query_rules (
  rule_id, active, match_pattern, 
  destination_hostgroup, apply
) VALUES 
  (1, 1, '^SELECT.*FOR UPDATE$', 10, 1),
  (2, 1, '^SELECT', 20, 1),
  (3, 1, '.*', 10, 1);

LOAD MYSQL QUERY RULES TO RUNTIME;
SAVE MYSQL QUERY RULES TO DISK;

2. 使用MySQL Router

安装MySQL Router

# 下载安装包
wget https://dev.mysql.com/get/Downloads/MySQL-Router/mysql-router-8.0-linux-glibc2.28-x86_64.tar.xz

# 解压安装
tar -xvf mysql-router-8.0-linux-glibc2.28-x86_64.tar.xz
sudo mv mysql-router-8.0-linux-glibc2.28-x86_64 /usr/local/mysqlrouter

配置MySQL Router

# /etc/mysqlrouter/mysqlrouter.conf
[DEFAULT]
logging_folder = /var/log/mysqlrouter
runtime_folder = /var/run/mysqlrouter
config_folder = /etc/mysqlrouter

[logger]
level = INFO

[routing:primary]
bind_address = 0.0.0.0
bind_port = 6446
destinations = master_ip:3306
mode = read-write
protocol = classic

[routing:secondary]
bind_address = 0.0.0.0
bind_port = 6447
destinations = slave1_ip:3306,slave2_ip:3306
mode = read-only
protocol = classic

高可用架构

1. MHA(Master High Availability)

架构组件

  • Manager:管理节点,负责故障检测和切换
  • Node:数据节点,运行在每个MySQL服务器上

安装MHA

# 安装依赖
yum install perl-DBD-MySQL perl-Config-Tiny perl-Log-Dispatch \\
    perl-Parallel-ForkManager perl-Time-HiRes

# 安装MHA Node(所有MySQL服务器)
rpm -ivh mha4mysql-node-0.58-0.el7.centos.noarch.rpm

# 安装MHA Manager(管理节点)
rpm -ivh mha4mysql-manager-0.58-0.el7.centos.noarch.rpm

配置MHA

# /etc/masterha/app1.cnf
[server default]
manager_log = /var/log/masterha/app1/manager.log
manager_workdir = /var/log/masterha/app1
master_binlog_dir = /var/lib/mysql
master_ip_failover_script = /usr/local/bin/master_ip_failover
master_ip_online_change_script = /usr/local/bin/master_ip_online_change
password = StrongPassword123!
ping_interval = 1
remote_workdir = /tmp
repl_password = ReplicaPassword123!
repl_user = replica
secondary_check_script = masterha_secondary_check -s slave1 -s slave2
shutdown_script = ""
ssh_user = root
user = root

[server1]
candidate_master = 1
check_repl_delay = 0
hostname = master_ip
port = 3306

[server2]
candidate_master = 1
hostname = slave1_ip
port = 3306

[server3]
hostname = slave2_ip
port = 3306

启动MHA

# 检查SSH连接
masterha_check_ssh --conf=/etc/masterha/app1.cnf

# 检查复制状态
masterha_check_repl --conf=/etc/masterha/app1.cnf

# 启动Manager
nohup masterha_manager --conf=/etc/masterha/app1.cnf \\
    --remove_dead_master_conf --ignore_last_failover < /dev/null > \\
    /var/log/masterha/app1/manager.log 2>&1 &

# 查看状态
masterha_check_status --conf=/etc/masterha/app1.cnf

2. Orchestrator

安装Orchestrator

# 下载安装
git clone https://github.com/openark/orchestrator.git
cd orchestrator
cd build && bash build.sh

# 或使用包管理器
yum install orchestrator

配置Orchestrator

{
  "Debug": true,
  "ListenAddress": ":3000",
  "MySQLTopologyUser": "orchestrator",
  "MySQLTopologyPassword": "password",
  "MySQLTopologyCredentialsConfigFile": "",
  "MySQLTopologySSLPrivateKeyFile": "",
  "MySQLTopologySSLCertFile": "",
  "MySQLTopologySSLCAFile": "",
  "MySQLTopologySSLSkipVerify": true,
  "MySQLTopologyUseMutualTLS": false,
  "BackendDB": "sqlite",
  "SQLite3DataFile": "/var/lib/orchestrator/orchestrator.sqlite3",
  "DefaultInstancePort": 3306,
  "DiscoverByShowSlaveHosts": true,
  "InstancePollSeconds": 5,
  "UnseenInstanceForgetHours": 240,
  "SnapshotTopologiesIntervalHours": 0,
  "InstanceBulkOperationsWaitTimeoutSeconds": 10,
  "HostnameResolveMethod": "default",
  "MySQLHostnameResolveMethod": "@@hostname",
  "SkipBinlogServerUnresolveCheck": true,
  "ExpiryHostnameResolvesMinutes": 60,
  "RejectHostnameResolvePattern": "",
  "ReasonableReplicationLagSeconds": 10,
  "ProblemIgnoreHostnameFilters": [],
  "VerifyReplicationFilters": false,
  "ReasonableMaintenanceReplicationLagSeconds": 20,
  "CandidateInstanceExpireMinutes": 60,
  "AuditLogFile": "",
  "AuditToSyslog": false,
  "RemoveTextFromHostnameDisplay": ".mydomain.com:3306",
  "ReadOnly": false,
  "AuthenticationMethod": "",
  "HTTPAuthUser": "",
  "HTTPAuthPassword": "",
  "AuthUserHeader": "",
  "PowerAuthUsers": ["*"],
  "ClusterNameToAlias": {
    "127.0.0.1": "test suite"
  },
  "SlaveLagQuery": "",
  "DetectClusterAliasQuery": "SELECT SUBSTRING_INDEX(@@hostname, '.', 1)",
  "DetectClusterDomainQuery": "",
  "DetectInstanceAliasQuery": "",
  "PromotionIgnoreHostnameFilters": [],
  "SemiSyncMasterWaitForReplicaCount": 1,
  "RecoveryPeriodBlockSeconds": 3600,
  "RecoveryIgnoreHostnameFilters": [],
  "RecoverMasterClusterFilters": ["*"],
  "RecoverIntermediateMasterClusterFilters": ["*"],
  "OnFailureDetectionProcesses": [
    "echo 'Detected {failureType} on {failureCluster}. Affected replicas: {countSlaves}' >> /tmp/recovery.log"
  ],
  "PreFailoverProcesses": [
    "echo 'Will recover from {failureType} on {failureCluster}' >> /tmp/recovery.log"
  ],
  "PostFailoverProcesses": [
    "echo '(for all types) Recovered from {failureType} on {failureCluster}. Failed: {failedHost}:{failedPort}; Successor: {successorHost}:{successorPort}' >> /tmp/recovery.log"
  ],
  "PostUnsuccessfulFailoverProcesses": [],
  "PostMasterFailoverProcesses": [],
  "PostIntermediateMasterFailoverProcesses": [],
  "CoMasterRecoveryMustPromoteOtherCoMaster": true,
  "DetachLostSlavesAfterMasterFailover": true,
  "ApplyMySQLPromotionAfterMasterFailover": false,
  "PreventCrossDataCenterMasterFailover": false,
  "PreventCrossRegionMasterFailover": false,
  "MasterFailoverDetachReplicaMasterHost": false,
  "MasterFailoverLostInstancesDowntimeMinutes": 0,
  "PostponeReplicaRecoveryOnLagMinutes": 0,
  "OSCIgnoreHostnameFilters": [],
  "GraphiteAddr": "",
  "GraphitePath": "",
  "GraphiteConvertHostnameDotsToUnderscores": true
}

启动Orchestrator

# 初始化数据库
orchestrator -c relocate -i slave_host -d master_host

# 启动服务
systemctl start orchestrator
systemctl enable orchestrator

# Web界面访问
http://orchestrator_host:3000

集群方案

1. MySQL Group Replication

单主模式配置

# 所有节点配置
[mysqld]
server_id = 1  # 每个节点不同
bind_address = "0.0.0.0"

# Group Replication配置
plugin_load_add = group_replication.so
group_replication_group_name = "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee"
group_replication_start_on_boot = off
group_replication_local_address = "node1:33061"
group_replication_group_seeds = "node1:33061,node2:33061,node3:33061"
group_replication_bootstrap_group = off

# 单主模式
group_replication_single_primary_mode = ON
group_replication_enforce_update_everywhere_checks = OFF

# 事务一致性
group_replication_consistency = BEFORE_ON_PRIMARY_FAILOVER

启动Group Replication

-- 第一个节点(引导)
SET GLOBAL group_replication_bootstrap_group=ON;
START GROUP_REPLICATION;
SET GLOBAL group_replication_bootstrap_group=OFF;

-- 其他节点
START GROUP_REPLICATION;

-- 查看组成员
SELECT * FROM performance_schema.replication_group_members;

-- 查看主节点
SELECT * FROM performance_schema.replication_group_members 
WHERE MEMBER_ROLE = 'PRIMARY';

2. InnoDB Cluster

使用MySQL Shell配置

// 连接到第一个节点
mysqlsh root@node1:3306

// 创建集群
var cluster = dba.createCluster('myCluster');

// 添加其他节点
cluster.addInstance('root@node2:3306');
cluster.addInstance('root@node3:3306');

// 查看集群状态
cluster.status();

// 检查实例配置
dba.checkInstanceConfiguration('root@node1:3306');

// 配置本地实例
dba.configureLocalInstance('root@node1:3306');

集群管理

// 重新加入节点
cluster.rejoinInstance('root@node2:3306');

// 移除节点
cluster.removeInstance('root@node2:3306');

// 切换主节点
cluster.setPrimaryInstance('root@node2:3306');

// 解散集群
cluster.dissolve({force: true});

备份策略

1. 物理备份(Percona XtraBackup)

全量备份

# 创建备份
xtrabackup --backup --target-dir=/backup/full/$(date +%Y%m%d)

# 准备备份
xtrabackup --prepare --target-dir=/backup/full/$(date +%Y%m%d)

# 恢复备份
# 1. 停止MySQL
systemctl stop mysql

# 2. 清空数据目录
rm -rf /var/lib/mysql/*

# 3. 恢复数据
xtrabackup --copy-back --target-dir=/backup/full/$(date +%Y%m%d)

# 4. 修改权限
chown -R mysql:mysql /var/lib/mysql

# 5. 启动MySQL
systemctl start mysql

增量备份

# 基于全备的增量备份
xtrabackup --backup --target-dir=/backup/inc1 \\
    --incremental-basedir=/backup/full/$(date +%Y%m%d)

# 第二个增量
xtrabackup --backup --target-dir=/backup/inc2 \\
    --incremental-basedir=/backup/inc1

# 准备增量备份
xtrabackup --prepare --apply-log-only --target-dir=/backup/full
xtrabackup --prepare --apply-log-only --target-dir=/backup/full \\
    --incremental-dir=/backup/inc1
xtrabackup --prepare --target-dir=/backup/full \\
    --incremental-dir=/backup/inc2

2. 逻辑备份(mysqldump)

全库备份

mysqldump -u root -p --all-databases --single-transaction \\
    --routines --triggers --events \\
    > full_backup_$(date +%Y%m%d).sql

指定数据库备份

mysqldump -u root -p --databases db1 db2 db3 \\
    --single-transaction > db_backup_$(date +%Y%m%d).sql

定时备份脚本

#!/bin/bash
# /usr/local/bin/mysql_backup.sh

BACKUP_DIR="/backup/mysql"
DATE=$(date +%Y%m%d_%H%M%S)
RETENTION_DAYS=7

# 创建备份目录
mkdir -p $BACKUP_DIR

# 全量备份
xtrabackup --backup --target-dir=$BACKUP_DIR/full_$DATE

# 压缩备份
tar czf $BACKUP_DIR/full_$DATE.tar.gz -C $BACKUP_DIR full_$DATE

# 删除原目录
rm -rf $BACKUP_DIR/full_$DATE

# 删除过期备份
find $BACKUP_DIR -name "full_*.tar.gz" -mtime +$RETENTION_DAYS -delete

# 记录日志
echo "[$(date)] Backup completed: full_$DATE.tar.gz" >> $BACKUP_DIR/backup.log

监控告警

1. Prometheus + mysqld_exporter

安装mysqld_exporter

# 下载
wget https://github.com/prometheus/mysqld_exporter/releases/download/v0.15.0/mysqld_exporter-0.15.0.linux-amd64.tar.gz

# 解压
tar xzf mysqld_exporter-0.15.0.linux-amd64.tar.gz
sudo mv mysqld_exporter-0.15.0.linux-amd64/mysqld_exporter /usr/local/bin/

# 创建用户
CREATE USER 'exporter'@'localhost' IDENTIFIED BY 'password' WITH MAX_USER_CONNECTIONS 3;
GRANT PROCESS, REPLICATION CLIENT, SELECT ON *.* TO 'exporter'@'localhost';

配置监控

# /etc/prometheus/mysql.yml
- job_name: mysql
  static_configs:
    - targets:
      - mysql1:9104
      - mysql2:9104
      - mysql3:9104

2. 关键监控指标

# Prometheus告警规则
groups:
- name: mysql
  rules:
  - alert: MySQLDown
    expr: mysql_up == 0
    for: 1m
    labels:
      severity: critical
    annotations:
      summary: "MySQL instance is down"
      
  - alert: MySQLReplicationLag
    expr: mysql_slave_lag_seconds > 30
    for: 5m
    labels:
      severity: warning
    annotations:
      summary: "MySQL replication lag is high"
      
  - alert: MySQLHighConnections
    expr: (mysql_global_status_threads_connected / mysql_global_variables_max_connections) > 0.8
    for: 5m
    labels:
      severity: warning
    annotations:
      summary: "MySQL connection usage is high"
      
  - alert: MySQLSlowQueries
    expr: rate(mysql_global_status_slow_queries[5m]) > 1
    for: 5m
    labels:
      severity: warning
    annotations:
      summary: "MySQL has slow queries"

总结

MySQL高可用架构设计需要考虑:

  1. 复制架构:异步复制、半同步复制、GTID
  2. 读写分离:ProxySQL、MySQL Router
  3. 高可用方案:MHA、Orchestrator、Group Replication
  4. 备份策略:物理备份、逻辑备份、增量备份
  5. 监控告警:Prometheus、mysqld_exporter

根据业务需求和数据重要性选择合适的架构方案,确保数据库的高可用性和数据安全性。