Linux下Hadoop数据备份实用指南
一 备份策略与总体建议
二 常用备份方法与命令
hdfs dfsadmin -allowSnapshot /path/to/dir
hdfs dfs -createSnapshot /path/to/dir snap_20251118
hdfs dfsadmin -listSnapshots /path/to/dir
hdfs dfs -cp /path/to/dir/.snapshot/snap_20251118 /path/to/restore
hdfs dfs -deleteSnapshot /path/to/dir snap_20251118
hadoop distcp hdfs://src-nn:8020/user/hive/warehouse hdfs://backup-nn:8020/backup/hive/warehouse_20251118
hadoop distcp hdfs://src-nn:8020/user/hive/warehouse hdfs://backup-nn:8020/backup/hive/warehouse_$(date +%F)
hadoop distcp hdfs://backup-nn:8020/backup/hive/warehouse_20251118 hdfs://src-nn:8020/user/hive/warehouse_recovered
hdfs dfs -get /user/hive/warehouse /backup/hive_warehouse_$(date +%F)
tar -czvf /backup/hive_warehouse_$(date +%F).tar.gz -C /backup/hive_warehouse_$(date +%F) .
tar -xzvf /backup/hive_warehouse_20251118.tar.gz -C /restore
hdfs dfs -put -f /restore /user/hive/warehouse
tar -czvf /backup/hdfs-config-$(date +%F_%H%M%S).tar.gz /etc/hadoop/conf /usr/local/hadoop/etc/hadoop
0 2 * * * /opt/backup/backup_hdfs_distcp.sh >> /var/log/hdfs_backup.log 2>&1
三 恢复与校验
hdfs fsck /path -files -blocks -locations
hdfs debug recoverLease -path /path/to/file -retries 10
四 备份策略示例
五 注意事项