Debian Hadoop 虚拟机部署教程
一 环境准备与规划
192.168.1.101 master
192.168.1.102 slave1
192.168.1.103 slave2
二 单机伪分布式部署步骤
sudo apt update
sudo apt install -y openjdk-8-jdk
java -version
sudo apt install -y openssh-server
ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
chmod 600 ~/.ssh/authorized_keys
ssh localhost
wget https://downloads.apache.org/hadoop/common/hadoop-3.3.6/hadoop-3.3.6.tar.gz
sudo tar -xzvf hadoop-3.3.6.tar.gz -C /usr/local
sudo ln -sfn /usr/local/hadoop-3.3.6 /usr/local/hadoop
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
export HADOOP_HOME=/usr/local/hadoop
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
使配置生效:source /etc/profileexport JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/usr/local/hadoop/tmp</value>
</property>
</configuration>
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/usr/local/hadoop/hdfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/usr/local/hadoop/hdfs/data</value>
</property>
</configuration>
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.env-whitelist</name>
<value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
</property>
</configuration>
sudo mkdir -p /usr/local/hadoop/tmp /usr/local/hadoop/hdfs/{name,data}
sudo chown -R $USER:$USER /usr/local/hadoop
hdfs namenode -format
start-dfs.sh
start-yarn.sh
hdfs dfs -mkdir -p /user/$USER/input
hdfs dfs -put $HADOOP_HOME/etc/hadoop/*.xml /user/$USER/input
hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.3.6.jar \
wordcount /user/$USER/input /user/$USER/output
hdfs dfs -cat /user/$USER/output/part-r-00000
三 多节点集群部署步骤
# 每台机器生成密钥
ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
# 在 master 上汇总公钥并分发
ssh-copy-id master
ssh-copy-id slave1
ssh-copy-id slave2
# 互相验证
ssh slave1 true && ssh slave2 true
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
slave1
slave2
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://master:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/usr/local/hadoop/tmp</value>
</property>
</configuration>
<configuration>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/usr/local/hadoop/hdfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/usr/local/hadoop/hdfs/data</value>
</property>
</configuration>
sudo mkdir -p /usr/local/hadoop/tmp /usr/local/hadoop/hdfs/{name,data}
sudo chown -R hadoop:hadoop /usr/local/hadoop
hdfs namenode -format
start-dfs.sh
start-yarn.sh
四 防火墙与常见问题处理
sudo ufw disable
# 或按需放行
sudo ufw allow 22,8088,9870,9000/tcp
sudo ufw reload
ss -tulpen | grep -E '9870|8088|9000')。