Oc支持的钩子脚本
orchestrator
支持钩子脚本,即在恢复过程调用的外部脚本。这些是通过 shell 调用的命令数组
OnFailureDetectionProcesses // 故障发现阶段
PreGracefulTakeoverProcesses // 计划内切换流程之前执行,在master设置read_only 之前执行
PreFailoverProcesses // 自动
切换流程之前执行
PostMasterFailoverProcesses // 主库切换成功之后
PostIntermediateMasterFailoverProcesses
PostFailoverProcesses // 自动
切换流程之后执行
PostUnsuccessfulFailoverProcesses // 切换不成功事
PostGracefulTakeoverProcesses
// 计划内切换流程之后执行
脚本解释
OnFailureDetectionProcesses []string // 检测到故障转移场景时要执行的流程 (在决定是否进行故障转移之前).可以并且应该使用以下一些占位符: {failureType}, {instanceType}, {isMaster}, {isCoMaster}, {failureDescription}, {command}, {failedHost}, {failureCluster}, {failureClusterAlias}, {failureClusterDomain}, {failedPort}, {successorHost}, {successorPort}, {successorAlias}, {countReplicas}, {replicaHosts}, {isDowntimed}, {autoMasterRecovery}, {autoIntermediateMasterRecovery}
PreGracefulTakeoverProcesses []string // 在执行 failover 之前执行的流程(如果其中任何一个以非零代码退出,则中止操作;执行顺序未定义)。可以并且应该使用以下一些占位符: {failureType}, {instanceType}, {isMaster}, {isCoMaster}, {failureDescription}, {command}, {failedHost}, {failureCluster}, {failureClusterAlias}, {failureClusterDomain}, {failedPort}, {successorHost}, {successorPort}, {countReplicas}, {replicaHosts}, {isDowntimed}
PreFailoverProcesses []string // 在执行 failover 之前要执行的流程(如果其中任何一个以非零代码退出,则中止操作;执行顺序未定义)。 可以并且应该使用以下一些占位符: {failureType}, {instanceType}, {isMaster}, {isCoMaster}, {failureDescription}, {command}, {failedHost}, {failureCluster}, {failureClusterAlias}, {failureClusterDomain}, {failedPort}, {countReplicas}, {replicaHosts}, {isDowntimed}
PostFailoverProcesses []string // 执行故障转移后要执行的流程(执行顺序未定义)。 可以并且应该使用以下一些占位符: {failureType}, {instanceType}, {isMaster}, {isCoMaster}, {failureDescription}, {command}, {failedHost}, {failureCluster}, {failureClusterAlias}, {failureClusterDomain}, {failedPort}, {successorHost}, {successorPort}, {successorBinlogCoordinates}, {successorAlias}, {countReplicas}, {replicaHosts}, {isDowntimed}, {isSuccessful}, {lostReplicas}, {countLostReplicas}
PostUnsuccessfulFailoverProcesses []string // 未完全成功的故障转移后执行的流程(执行顺序未定义)。 可以并且应该使用以下一些占位符: {failureType}, {instanceType}, {isMaster}, {isCoMaster}, {failureDescription}, {command}, {failedHost}, {failureCluster}, {failureClusterAlias}, {failureClusterDomain}, {failedPort}, {successorHost}, {successorPort}, {successorBinlogCoordinates}, {successorAlias}, {countReplicas}, {replicaHosts}, {isDowntimed}, {isSuccessful}, {lostReplicas}, {countLostReplicas}
PostMasterFailoverProcesses []string // 执行主故障转移后要执行的流程(执行顺序未定义)。 使用与 PostFailoverProcesses 相同的占位符
PostIntermediateMasterFailoverProcesses []string // 执行 中间主库(级联复制)故障转移后要执行的进程(执行顺序未定义)。 使用与 PostFailoverProcesses 相同的占位符
PostGracefulTakeoverProcesses []string // 运行优雅的主接管后要执行的进程。 使用与 PostFailoverProcesses 相同的占位符
脚本返回状态码
PreFailoverProcesses : 非零代码退出,则终止操作
PostFailoverProcesses : 非零代码退出,则终止操作
如果你在该脚本中 调用了修改域名后端的IP,或者修改LVS后端的RS,在修改不成功时 ,则钩子脚本返回一个非零的值即可终止恢复流程。
占位符
可以并且应该使用以下一些占位符:
{failureType}, // 失败类型{instanceType}, // 实例类型{isMaster}, // 是否为master{isCoMaster}, // 是否为CoMaster{failureDescription}, // 失败描述{command},// 命令{failedHost}, // 故障实例主机{failureCluster}, // 故障实例所在集群{failureClusterAlias}, // 故障集群别名{failureClusterDomain}, // 故障集群域名{failedPort}, // 故障实例端口{successorHost}, // 新主库的{successorPort},// 新主库的端口{successorAlias},// {countReplicas}, // 从副本的个数{replicaHosts},// 从副本的主机名{isDowntimed},// 是否维护状态{autoMasterRecovery},// 是否开启自动恢复{autoIntermediateMasterRecovery}
配置文件中的配置例子
配置了两个脚本,第一个是shell 脚本,第二个是一个go脚本,
"OnFailureDetectionProcesses": ["echo 'Detected {failureType} on {failureCluster}. Affected replicas: {countSlaves}' >> /tmp/recovery.log","/usr/local/orchestrator/scripts/orcFailureDetection --failureType {failureType} --instanceType {instanceType} --failureDescription '{failureDescription}' --failedHost {failedHost} --failureCluster {failureCluster} --failureClusterAlias {failureClusterAlias} --failureClusterDomain {failureClusterDomain} --failedPort {failedPort} --successorHost {successorHost} --successorPort {successorPort} --successorAlias {successorAlias} --countReplicas {countReplicas}"],
核心函数调用流程:
executeProcesses
--> prepareCommand // 组装脚本
--> applyEnvironmentVariables // 环境变量
--> executeProcess // 执行脚本
--> os.CommandRun(command, env)
--> generateShellScript // 生成shell 脚本
--> cmd.CombinedOutput() // 执行命令
OC如何调用钩子脚本
1 将脚本中的占位符替换成实际的变量
// prepareCommand replaces agreed-upon placeholders with analysis data
// prepareCommand 通过分析得到的数据替换脚本的占位符
func prepareCommand(command string, topologyRecovery *TopologyRecovery) (result string, async bool) {analysisEntry := &topologyRecovery.AnalysisEntry// 从 command 字符串中移除前后的空白字符。command = strings.TrimSpace(command)// 检查 command 字符串是否以 "&" 结尾。if strings.HasSuffix(command, "&") {// 如果是,移除末尾的 "&" 并将 async 设置为 true。command = strings.TrimRight(command, "&")async = true}// 将命令中的 占位符替换为 analysisEntry 的字符串表示形式。command = strings.Replace(command, "{failureType}", string(analysisEntry.Analysis), -1)command = strings.Replace(command, "{instanceType}", string(analysisEntry.GetAnalysisInstanceType()), -1)command = strings.Replace(command, "{isMaster}", fmt.Sprintf("%t", analysisEntry.IsMaster), -1)command = strings.Replace(command, "{isCoMaster}", fmt.Sprintf("%t", analysisEntry.IsCoMaster), -1)command = strings.Replace(command, "{failureDescription}", analysisEntry.Description, -1)command = strings.Replace(command, "{command}", analysisEntry.CommandHint, -1)command = strings.Replace(command, "{failedHost}", analysisEntry.AnalyzedInstanceKey.Hostname, -1)command = strings.Replace(command, "{failedPort}", fmt.Sprintf("%d", analysisEntry.AnalyzedInstanceKey.Port), -1)command = strings.Replace(command, "{failureCluster}", analysisEntry.ClusterDetails.ClusterName, -1)command = strings.Replace(command, "{failureClusterAlias}", analysisEntry.ClusterDetails.ClusterAlias, -1)command = strings.Replace(command, "{failureClusterDomain}", analysisEntry.ClusterDetails.ClusterDomain, -1)command = strings.Replace(command, "{countSlaves}", fmt.Sprintf("%d", analysisEntry.CountReplicas), -1)command = strings.Replace(command, "{countReplicas}", fmt.Sprintf("%d", analysisEntry.CountReplicas), -1)command = strings.Replace(command, "{isDowntimed}", fmt.Sprint(analysisEntry.IsDowntimed), -1)command = strings.Replace(command, "{autoMasterRecovery}", fmt.Sprint(analysisEntry.ClusterDetails.HasAutomatedMasterRecovery), -1)command = strings.Replace(command, "{autoIntermediateMasterRecovery}", fmt.Sprint(analysisEntry.ClusterDetails.HasAutomatedIntermediateMasterRecovery), -1)command = strings.Replace(command, "{orchestratorHost}", process.ThisHostname, -1)command = strings.Replace(command, "{recoveryUID}", topologyRecovery.UID, -1)command = strings.Replace(command, "{isSuccessful}", fmt.Sprint(topologyRecovery.SuccessorKey != nil), -1)if topologyRecovery.SuccessorKey != nil {command = strings.Replace(command, "{successorHost}", topologyRecovery.SuccessorKey.Hostname, -1)command = strings.Replace(command, "{successorPort}", fmt.Sprintf("%d", topologyRecovery.SuccessorKey.Port), -1)// As long as SuccessorBinlogCoordinates != nil, we replace {successorBinlogCoordinates}// Format of the display string of binlog coordinates would be LogFile:LogPositonif topologyRecovery.SuccessorBinlogCoordinates != nil {command = strings.Replace(command, "{successorBinlogCoordinates}", topologyRecovery.SuccessorBinlogCoordinates.DisplayString(), -1)}// As long as SucesssorKey != nil, we replace {successorAlias}.// If SucessorAlias is "", it's fine. We'll replace {successorAlias} with "".command = strings.Replace(command, "{successorAlias}", topologyRecovery.SuccessorAlias, -1)}command = strings.Replace(command, "{lostSlaves}", topologyRecovery.LostReplicas.ToCommaDelimitedList(), -1)command = strings.Replace(command, "{lostReplicas}", topologyRecovery.LostReplicas.ToCommaDelimitedList(), -1)command = strings.Replace(command, "{countLostReplicas}", fmt.Sprintf("%d", len(topologyRecovery.LostReplicas)), -1)command = strings.Replace(command, "{slaveHosts}", analysisEntry.Replicas.ToCommaDelimitedList(), -1)command = strings.Replace(command, "{replicaHosts}", analysisEntry.Replicas.ToCommaDelimitedList(), -1)return command, async
}
2 根据命令生成一个临时的Shell 脚本
// generateShellScript 根据给定要执行的命令生成一个临时的 shell 脚本,
func generateShellScript(commandText string, env []string, arguments ...string) (*exec.Cmd, string, error) {// 获取配置中的 shell 命令shell := config.Config.ProcessesShellCommand// 将命令文本转换为字节切片commandBytes := []byte(commandText)// 创建临时文件tmpFile, err := ioutil.TempFile("", "orchestrator-process-cmd-")if err != nil {return nil, "", log.Errorf("generateShellScript() 失败创建临时文件: %v", err.Error())}// 将命令文本写入临时文件ioutil.WriteFile(tmpFile.Name(), commandBytes, 0640)// 构建 shell 命令的参数shellArguments := append([]string{}, tmpFile.Name())shellArguments = append(shellArguments, arguments...)// 创建 exec.Command,该命令可与创建的脚本名称一起执行cmd := exec.Command(shell, shellArguments...)cmd.Env = envreturn cmd, tmpFile.Name(), nil
}