在 Ubuntu 上运行 Hadoop 程序的完整步骤
一 环境准备与安装
sudo apt update && sudo apt install openjdk-11-jdkjava -versionwget https://downloads.apache.org/hadoop/common/hadoop-3.3.1/hadoop-3.3.1.tar.gztar -xzf hadoop-3.3.1.tar.gz && sudo mv hadoop-3.3.1 /usr/local/hadoop~/.bashrc 或 /etc/profile.d/hadoop.sh):
export HADOOP_HOME=/usr/local/hadoop
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
source ~/.bashrc二 配置 Hadoop 伪分布式
$HADOOP_CONF_DIR(即 /usr/local/hadoop/etc/hadoop)<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value>
</property>
</configuration>
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/usr/local/hadoop/data/namenode</value>
</property>
</configuration>
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
<configuration>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>localhost</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
</configuration>
hdfs namenode -formatstart-dfs.shstart-yarn.shjps(应看到 NameNode、DataNode、ResourceManager、NodeManager)三 运行第一个 MapReduce 程序 WordCount
hdfs dfs -mkdir -p /user/$USER/inputhdfs dfs -put $HADOOP_HOME/etc/hadoop/*.xml /user/$USER/inputhadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.3.1.jar wordcount /user/$USER/input /user/$USER/output$HADOOP_HOME/bin/hadoop com.sun.tools.javac.Main WordCount.javajar cf wc.jar WordCount*.classhadoop jar wc.jar WordCount /user/$USER/input /user/$USER/output2hdfs dfs -cat /user/$USER/output/part-r-00000四 常用管理与排错命令
start-dfs.sh、start-yarn.shstop-dfs.sh、stop-yarn.shhdfs dfs -ls /hdfs dfs -put localfile /user/$USER/hdfs dfs -get /user/$USER/file ./yarn application -listyarn logs -applicationId <app_id>jps 确认守护进程五 常见问题与优化建议
yarn.nodemanager.resource.memory-mb),或减少作业并发。