在 DB 运维层而非应用层实现需求,以降低应用层的业务侵入性及性能影响。
maxwell 是一款 ETL 工具,基本原理是 实时解析 MySQL 的 binlog 丢到相应的 MQ 中供具体业务逻辑去消费。
比如最典型的一种大数据日志路径:
mysql binlog -> maxwell -> kafka
站内搜索引擎的路径:
mysql binlog -> maxwell -> mq -> logstash -> elasticsearch
Redis 路径:
mysql binlog -> maxwell -> redis
这几种路径都可以通过进一步消费来达成 数据迁移到 Mongodb 的目的,但是依赖路径稍长,耗费更多服务器资源。
反复比较后,选择利用 maxwell 的 custom producer 机制实现直接写入数据到 mongodb。搜索了一下,网上似乎没有直接实现的适配,需要自助。
参考文档:https://maxwells-daemon.io/producers/#custom-producer
准备 maxwell 环境
maxwell 和 canal 等一样,都是Java开发的应用,因此首先要准备好Java 开发环境。
然后下载 maxwell 运行包,以便得到依赖的 jar lib、以及进行相应调试。
安装指引:https://maxwells-daemon.io/quickstart/
/opt/maxwell/bin/maxwell --config /opt/maxwell/config.properties
主要用到的 jar 包 maxwell-*.jar 在 /opt/maxwell/lib/ 目录下。
配置 maven 项目
<dependencies><dependency><groupId>com.zendesk</groupId><artifactId>maxwell</artifactId><version>1.44.0</version><scope>system</scope><systemPath>/opt/maxwell/lib/maxwell-1.44.0.jar</systemPath></dependency><dependency><groupId>org.apache.logging.log4j</groupId><artifactId>slf4j-api</artifactId><version>2.0.0</version><scope>system</scope><systemPath>/opt/maxwell/lib/slf4j-api-2.0.0.jar</systemPath></dependency><dependency><groupId>org.apache.logging.log4j</groupId><artifactId>log4j-core</artifactId><version>2.17.1</version><scope>system</scope><systemPath>/opt/maxwell/lib/log4j-core-2.17.1.jar</systemPath></dependency><dependency><groupId>org.apache.logging.log4j</groupId><artifactId>log4j-slf4j2-impl</artifactId><version>2.24.3</version><scope>system</scope><systemPath>/opt/maxwell/lib/log4j-slf4j2-impl-2.24.3.jar</systemPath></dependency><dependency><groupId>org.apache.commons</groupId><artifactId>commons-lang3</artifactId><version>3.11</version><scope>system</scope><systemPath>/opt/maxwell/lib/commons-lang3-3.11.jar</systemPath></dependency><dependency><groupId>org.mongodb</groupId><artifactId>mongodb-driver-sync</artifactId><version>5.5.1</version><type>jar</type></dependency></dependencies>
代码
参照官方 Example 代码和 Redis Producer 的写法,实现一个基本可用的代码。
两个类:
MaxwellMongodbProducer.java
package com.abc.maxwell.producer;import com.zendesk.maxwell.MaxwellContext;
import com.zendesk.maxwell.producer.AbstractProducer;
import com.zendesk.maxwell.row.RowMap;
import com.zendesk.maxwell.util.StoppableTask;import java.util.ArrayList;
import java.util.Collection;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Properties;import com.mongodb.client.MongoDatabase;
import com.mongodb.client.MongoClient;
import com.mongodb.client.MongoClients;
import com.mongodb.client.MongoCollection;
import static com.mongodb.client.model.Filters.eq;
import com.mongodb.client.model.Updates;
import org.bson.Document;
import org.bson.conversions.Bson;import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.commons.lang3.StringUtils;/****/
public class MaxwellMongodbProducer extends AbstractProducer implements StoppableTask {private static final Logger logger = LoggerFactory.getLogger(MaxwellMongodbProducer.class);private static MongoClient mongoClient;private final String dbUri;private final String targetDbName;private String[] assignedDbNames = null;private String[] assignedTableNames = null;public MaxwellMongodbProducer(MaxwellContext context) {super(context);Properties props = context.getConfig().customProducerProperties;dbUri = props.getProperty("target_db_uri", "mongodb://localhost/");String dbNames = props.getProperty("assigned_dbs");if (dbNames != null) {assignedDbNames = dbNames.split(",");}String tableNames = props.getProperty("assigned_tables");if (tableNames != null) {assignedTableNames = tableNames.split(",");}targetDbName = props.getProperty("target_db_name");}@Overridepublic void push(RowMap r) throws Exception {if (!r.shouldOutput(outputConfig)) {context.setPosition(r.getNextPosition());return;}boolean sentToMongodb = false;for (int cxErrors = 0; cxErrors < 2; cxErrors++) {try {this.sendToMongodb(r);sentToMongodb = true;break;} catch (Exception e) {logger.error("Exception during put", e);if (!context.getConfig().ignoreProducerError) {throw new RuntimeException(e);}}}if (r.isTXCommit()) {context.setPosition(r.getNextPosition());}}private void sendToMongodb(RowMap msg) throws Exception {if (assignedDbNames != null && !Arrays.asList(assignedDbNames).contains(msg.getDatabase())) {return;}if (assignedTableNames != null && !Arrays.asList(assignedTableNames).contains(msg.getTable())) {return;}if (logger.isDebugEnabled()) {logger.debug("-> mongodb sync msg:{}", msg);}String pk = "id"; // 假定主键都是idif (msg.getRowType().contains("insert")) {createCollection(msg.getDatabase(), msg.getTable());Document doc = new Document(msg.getData());getCollection(msg.getDatabase(), msg.getTable()).insertOne(doc);} else if (msg.getRowType().contains("update")) {Long id = (Long) msg.getData().get(pk);if (id <= 0) {return;}Bson updateQuery = eq(pk, id);List<Bson> updates = new ArrayList<>();if (msg.getData() != null) {for (Map.Entry entry : msg.getData().entrySet()) {updates.add(Updates.set((String) entry.getKey(), entry.getValue()));}getCollection(msg.getDatabase(), msg.getTable()).updateOne(updateQuery,Updates.combine(updates));}} else if (msg.getRowType().contains("delete")) {Document doc = new Document(msg.getData());Long id = (Long) doc.get(pk);if (id <= 0) {return;}Bson deleteQuery = eq(pk, id);getCollection(msg.getDatabase(), msg.getTable()).deleteOne(deleteQuery);} else {logger.error("unsupported msg type", msg.getRowType());}}protected MongoCollection<Document> getCollection(String dbName, String collectionName) {return getDb(dbName).getCollection(collectionName);}protected void createCollection(String dbName, String collectionName) {try {getDb(dbName).createCollection(collectionName);} catch (Exception e) {System.out.println(e);}}protected MongoDatabase getDb(String dbName) {return getClient().getDatabase(targetDbName(dbName));}private String targetDbName(String dbName) {return !StringUtils.isBlank(targetDbName) ? targetDbName : dbName;}private MongoClient getClient() {if (mongoClient == null) {mongoClient = MongoClients.create(dbUri);}return mongoClient;}@Overridepublic void requestStop() {getClient().close();}@Overridepublic void awaitStop(Long timeout) {}@Overridepublic StoppableTask getStoppableTask() {return this;}}
工厂类 MaxwellMongodbProducerFactory.java:
package com.abc.maxwell.producer;import com.zendesk.maxwell.MaxwellContext;
import com.zendesk.maxwell.producer.AbstractProducer;
import com.zendesk.maxwell.producer.ProducerFactory;/****/
public class MaxwellMongodbProducerFactory implements ProducerFactory {@Overridepublic AbstractProducer createProducer(MaxwellContext context) {return new MaxwellMongodbProducer(context);}
}
调试
将 生成 jar 包的目标路径指向 /opt/maxwell/lib 目录。
然后修改 maxwell 配置文件, /opt/maxwell/config.properties
custom_producer.factory=包名加类名。target_db_uri="mongodb://localhost/
target_db_name=
assigned_dbs=
assigned_tables=