本机IP:192.168.1.190 (IP配置参考)
# This file describes the network interfaces available on your system
# and how to activate them. For more information, see interfaces(5).
source /etc/network/interfaces.d/*
# The loopback network interface
auto lo
iface lo inet loopback
# The primary network interface
auto enp0s3
iface enp0s3 inet static
address 192.168.1.190/24
network 192.168.1.0
broadcast 192.168.1.255
gateway 192.168.1.1
dns-servers 8.8.8.8
dns-servers 114.114.114.114
$ useradd -m hdp -s /bin/bash
$ passwd hdp
$ wget https://mirrors.huaweicloud.com/java/jdk/8u202-b08/jdk-8u202-linux-x64.tar.gz
$ wget https://archive.apache.org/dist/hadoop/common/hadoop-3.3.0/hadoop-3.3.0.tar.gz
$ tar -zxvf jdk-8u202-linux-x64.tar.gz -C /usr/local/
$ tar -zxvf hadoop-3.3.0.tar.gz -C /usr/local/
$ sudo chown -R hdp:hdp /usr/local/hadoop-3.3.0
在/etc/profile中添加环境变量
vi /etc/profile
# jdk configuration
export JAVA_HOME=/usr/local/jdk1.8.0_202
export PATH=$JAVA_HOME/bin:$PATH
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
[hdp@master ~]$ source /etc/profile
[hdp@master ~]$ java -version
java version "1.8.0_341"
Java(TM) SE Runtime Environment (build 1.8.0_341-b10)
Java HotSpot(TM) 64-Bit Server VM (build 25.341-b10, mixed mode)
在/etc/profile中添加环境变量
vi /etc/profile
export HADOOP_HOME=/usr/local/hadoop-3.3.0
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
# export JAVA_LIBRARY_PATH=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib:$HADOOP_COMMON_LIB_NATIVE_DIR"
[hdp@master ~]$ vi /etc/profile
[hdp@master ~]$ source /etc/profile
[hdp@master ~]$ hadoop version
Hadoop 3.3.5
Source code repository https://github.com/apache/hadoop.git -r 706d88266abcee09ed78fbaa0ad5f74d818ab0e9
Compiled by stevel on 2023-03-15T15:56Z
Compiled with protoc 3.7.1
From source with checksum 6bbd9afcf4838a0eb12a5f189e9bd7
This command was run using /usr/local/hadoop-3.3.0/share/hadoop/common/hadoop-common-3.3.5.jar
修改主机名、IP、hosts文件中的指向
[hdp@master ~]$ vi /etc/hosts
192.168.1.190 master
127.0.0.1 localhost
ssh localhost # 生成.ssh目录
ssh-keygen -t rsa
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys # 在master上执行
# 测试免密登录
ssh localhost
[hdp@master ~]$ cd /usr/local/hadoop-3.3.0/etc/hadoop
[hdp@master hadoop]$ vi hadoop-env.sh
export JAVA_HOME=/usr/local/jdk1.8.0_202 # 添加变量
[hdp@master hadoop]$ vi yarn-env.sh
export JAVA_HOME=/usr/local/jdk1.8.0_202 # 添加变量
[hdp@master hadoop]$ vi workers
master
# 创建目录
$ mkdir -p /usr/local/hadoop-3.3.0/tmp
$ mkdir -p /usr/local/hadoop-3.3.0/tmp/dfs/name
$ mkdir -p /usr/local/hadoop-3.3.0/tmp/dfs/data
$ mkdir -p /usr/local/hadoop-3.3.0/tmp/dfs/namesecondary
$ chmod -R +777 /usr/local/hadoop-3.3.0/tmp
[hdp@master hadoop]$ vi core-site.xml
添加如下内容
<configuration>
<property>
<name>hadoop.tmp.dir</name>
<value>file:/usr/local/hadoop-3.3.0/tmp</value>
<description>A base for other temporary directories.</description>
</property>
<property>
<name>fs.defaultFS</name>
<value>hdfs://master:9000</value>
</property>
</configuration>
[hdp@master hadoop]$ vi hdfs-site.xml
添加如下内容
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/usr/local/hadoop-3.3.0/tmp/dfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/usr/local/hadoop-3.3.0/tmp/dfs/data</value>
</property>
<!-- 以下可不设置,为默认-->
<!-- nn web端访问地址-->
<property>
<name>dfs.namenode.http-address</name>
<value>master:9870</value>
</property>
<!-- 2nn web端访问地址-->
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>master:9868</value>
</property>
<!-- -->
<property>
<name>dfs.http.address</name>
<value>master:9870</value>
</property>
<!-- 如下3项如不设置,会报错: java.net.BindException: Cannot assign requested address -->
<!-- worker中要改主机名为worker的hostname -->
<property>
<name>dfs.datanode.address</name>
<value>master:9866</value>
</property>
<property>
<name>dfs.datanode.ipc.address</name>
<value>master:9867</value>
</property>
<property>
<name>dfs.datanode.http.address</name>
<value>master:9864</value>
</property>
</configuration>
# 以下可不设置,为默认
[hdp@master hadoop]$ vi mapred-site.xml
添加如下内容
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
# 以下可不设置,为默认
[hdp@master hadoop]$ vi yarn-site.xml
添加如下内容
<configuration>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>1024</value>
</property>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>master</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>master:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>master:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>master:8035</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>master:8033</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>master:8088</value>
</property>
<!-- 如下3项如不设置,会报错: java.net.BindException: Cannot assign requested address -->
<!-- worker中要改主机名为worker的hostname -->
<!-- localizer IPC -->
<property>
<name>yarn.nodemanager.localizer.address</name>
<value>master:8040</value>
</property>
<!-- http服务端口 -->
<property>
<name>yarn.nodemanager.webapp.address</name>
<value>master:8042</value>
</property>
<!-- NM中container manager的端口 -->
<property>
<name>yarn.nodemanager.address</name>
<value>master:8041</value>
</property>
</configuration>
# 在机器上开放端口
sudo firewall-cmd --zone=public --add-port=20001-20007/tcp --permanent
sudo firewall-cmd --zone=public --add-port=9870/tcp --permanent
sudo ufw allow 20001:20007/tcp
sudo ufw allow 8000:10000/tcp
sudo systemctl restart ufw
sudo firewall-cmd --reload
# 启动Hadoop
[hdp@master hadoop]$ hdfs namenode -format
[hdp@master hadoop]$ hdfs datanode -format
# 启动Hadoop集群
[hdp@master hadoop]$ start-all.sh
[hdp@master ~]$ jps # jps -l 显示包名
18114 DataNode
19044 Jps
18666 NodeManager
18555 ResourceManager
17996 NameNode
18302 SecondaryNameNode
# 出现NameNode和SecondaryNameNode等5项证明配置没有问题。
如果还是需要密码登录,检查worker1上的/var/log/secure中的日志:
worker1 sshd[1383]: Authentication refused: bad ownership or modes for file /home/hdp/.ssh/authorized_keys
修改三台服务器上ssh文件的权限:
$ chmod 700 ~/.ssh
$ chmod 600 ~/.ssh/*
192.168.1.190:9870 –访问hadoop集群前台页面
192.168.1.190:8088 –访问hadoop的所有应用页面
还可以通过各个节点jps命令查看启动的任务节点状态。