在 Ubuntu 上简化 Hadoop 安装的可行方案
一、最快路径 单机伪分布式一键脚本
#!/usr/bin/env bash
set -e
echo "=== 简化安装 Hadoop 伪分布式(Ubuntu 20.04/22.04) ==="
# 0) 参数
HADOOP_VER=${HADOOP_VER:-"3.3.6"}
JAVA_PKG=${JAVA_PKG:-"openjdk-11-jdk"}
HADOOP_HOME=${HADOOP_HOME:-"$HOME/hadoop"}
# 1) 基础依赖
sudo apt update -y
sudo apt install -y $JAVA_PKG openssh-server rsync
# 2) SSH 免密(本机)
mkdir -p ~/.ssh
ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
chmod 600 ~/.ssh/authorized_keys
ssh-keyscan -H localhost >> ~/.ssh/known_hosts
ssh localhost true
# 3) 安装 Hadoop
cd ~
wget -q https://archive.apache.org/dist/hadoop/core/hadoop-${HADOOP_VER}/hadoop-${HADOOP_VER}.tar.gz
tar -xzf hadoop-${HADOOP_VER}.tar.gz
rm -f hadoop-${HADOOP_VER}.tar.gz
[ -d "$HADOOP_HOME" ] && mv "$HADOOP_HOME" "$HADOOP_HOME.bak_$(date +%F_%T)"
mv hadoop-${HADOOP_VER} "$HADOOP_HOME"
# 4) 环境变量
grep -qxF "export HADOOP_HOME=$HADOOP_HOME" ~/.bashrc || echo "export HADOOP_HOME=$HADOOP_HOME" >> ~/.bashrc
grep -qxF 'export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin' ~/.bashrc || echo 'export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin' >> ~/.bashrc
grep -qxF "export JAVA_HOME=$(readlink -f /usr/bin/java | sed 's:/bin/java::')" ~/.bashrc || echo "export JAVA_HOME=$(readlink -f /usr/bin/java | sed 's:/bin/java::')" >> ~/.bashrc
source ~/.bashrc
# 5) 写入 Hadoop 配置
cat > "$HADOOP_HOME/etc/hadoop/core-site.xml" <<'EOF'
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value>
</property>
</configuration>
EOF
cat > "$HADOOP_HOME/etc/hadoop/hdfs-site.xml" <<'EOF'
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
</configuration>
EOF
if [ ! -f "$HADOOP_HOME/etc/hadoop/mapred-site.xml" ]; then
cp "$HADOOP_HOME/etc/hadoop/mapred-site.xml.template" "$HADOOP_HOME/etc/hadoop/mapred-site.xml"
fi
cat > "$HADOOP_HOME/etc/hadoop/mapred-site.xml" <<'EOF'
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
EOF
cat > "$HADOOP_HOME/etc/hadoop/yarn-site.xml" <<'EOF'
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>
EOF
# 6) 格式化并启动
"$HADOOP_HOME/bin/hdfs" namenode -format -force
"$HADOOP_HOME/sbin/start-dfs.sh"
"$HADOOP_HOME/sbin/start-yarn.sh"
echo "✅ 安装完成,进程:"
jps
echo "✅ Web UI:"
echo " HDFS NameNode: http://localhost:9870"
echo " YARN ResourceManager: http://localhost:8088"
echo "✅ 测试命令:"
echo " hdfs dfs -ls /"
echo " hdfs dfs -mkdir -p /test && echo 'hello hadoop' | hdfs dfs -put - /test/hello.txt"
二、常见坑与快速修复
JAVA_HOME 未生效
SSH 免密失败
端口被占用
三、进阶 使用现成脚本与自动化工具
四、验证与常用命令