# 1. 查看表的存储目录(先执行show create table your_table;找到LOCATION) hdfs dfs -ls /user/hive/warehouse/your_db.db/your_table/pt=2026-01-06 # 2. 【新增】备份原目录小文件(防止误删,关键!) hdfs dfs -mkdir -p /user/hive/warehouse/your_db.db/your_table/pt=2026-01-06_bak hdfs dfs -cp /user/hive/warehouse/your_db.db/your_table/pt=2026-01-06/* /user/hive/warehouse/your_db.db/your_table/pt=2026-01-06_bak/ # 3. 将小文件合并为一个大文件到本地 hdfs dfs -getmerge /user/hive/warehouse/your_db.db/your_table/pt=2026-01-06/* /tmp/merged_file.txt # 4. 按128MB拆分本地合并文件(生成merged_file_000、merged_file_001...,数字后缀更易读) split -b 128M -d -a 3 /tmp/merged_file.txt /tmp/merged_file_ # 5. 删除原目录的小文件 hdfs dfs -rm /user/hive/warehouse/your_db.db/your_table/pt=2026-01-06/* # 6. 批量上传拆分后的128MB文件到原目录 for file in /tmp/merged_file_*; do hdfs dfs -put $file /user/hive/warehouse/your_db.db/your_table/pt=2026-01-06/ done # 7. 刷新Hive元数据 hive -e "MSCK REPAIR TABLE your_db.your_table;" # 8. 【新增】验证数据完整性(对比合并前后行数,可选但推荐) echo "原数据行数:$(hdfs dfs -cat /user/hive/warehouse/your_db.db/your_table/pt=2026-01-06_bak/* | wc -l)" echo "新数据行数:$(hdfs dfs -cat /user/hive/warehouse/your_db.db/your_table/pt=2026-01-06/* | wc -l)" # 9. 【可选】清理本地临时文件(释放磁盘空间) rm -rf /tmp/merged_file.txt /tmp/merged_file_*