Hadoop下载地址:https://archive.apache.org/dist/hadoop/common/
该部署以Red Hat 7为例
安装前提:需先配置好1.8的JAVA环境,可参考JDK的安装配置(Windows、Linux),Hadoop和Java版本对应关系可参考https://blog.csdn.net/m0_67393619/article/details/123933614
tar -zxvf hadoop-3.0.3.tar.gz -C /usr/local/
vim /etc/profile
export HADOOP_HOME=/usr/local/hadoop-3.0.3 export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
source /etc/profile
cd /usr/local/hadoop-3.0.3/etc/hadoop
vim hadoop-env.sh
export JAVA_HOME=/usr/local/java/jdk1.8.0_60 export HADOOP_HOME=/usr/local/hadoop-3.0.3
vim core-site.xml
<configuration> <property> <name>fs.defaultFS</name> <value>hdfs://localhost:9001</value> <description>hdfs内部通讯访问地址</description> </property> <property> <name>hadoop.tmp.dir</name> <value>file:/usr/local/hadoop-3.0.3/tmp</value> <description>hadoop数据存放</description> </property> </configuration>
vim hdfs-site.xml
<configuration> <!--不是root用户也可以写文件到hdfs--> <property> <name>dfs.permissions</name> <value>false</value> </property> <!--namenode数据的存储位置--> <property> <name>dfs.namenode.name.dir</name> <value>file:/usr/local/hadoop-3.0.3/hdfs/name</value> </property> <!--datanode数据存储位置--> <property> <name>dfs.datanode.data.dir</name> <value>file:/usr/local/hadoop-3.0.3/hdfs/data</value> </property> <!--数据文件的副本数量, 默认是3--> <property> <name>dfs.replication</name> <value>1</value> </property> <!--修改nameNode web页面端口为50070,默认9870--> <property> <name>dfs.http.address</name> <value>0.0.0.0:50070</value> </property> </configuration>
mkdir -p /usr/local/hadoop-3.0.3/hdfs/name
mkdir -p /usr/local/hadoop-3.0.3/hdfs/data
mkdir -p /usr/local/hadoop-3.0.3/tmp
#第一次启动HDFS需要先进行格式化
cd /usr/local/hadoop-3.0.3/bin
./hdfs namenode -format
cd /usr/local/hadoop-3.0.3/bin
./hdfs --daemon start namenode
jps 6355 NameNode 6424 Jps
cd /usr/local/hadoop-3.0.3/bin
./hdfs --daemon start datanode
jps 6544 Jps 6355 NameNode 6477 DataNode
cd /usr/local/hadoop-3.0.3/bin
./hdfs --daemon start secondarynamenode
jps 166978 Jps 166939 SecondaryNameNode 166684 NameNode 166812 DataNode
#因为hadoop很多启动命令其实是通过ssh过去的形式进行的服务启停,所以哪怕我们是单机版也要进行该配置
ssh-keygen -t rsa -C ''
ssh-copy-id -i /root/.ssh/id_rsa.pub root@[本机IP地址]
NameNode和DataNode启动之后,即可通过http://ip:50070/访问
cd /usr/local/hadoop-3.0.3/etc/hadoop
vim mapred-site.xml
<configuration> <property> <name>mapreduce.framework.name</name> <value>yarn</value> <final>true</final> <description>通知MR使用YARN</description> </property> <property> <name>yarn.app.mapreduce.am.env</name> <!-- 注意修改,这里为你HADOOP的安装目录 --> <value>HADOOP_MAPRED_HOME=/usr/local/hadoop-3.0.3</value> </property> <property> <name>mapreduce.map.env</name> <!-- 注意修改,这里为你HADOOP的安装目录 --> <value>HADOOP_MAPRED_HOME=/usr/local/hadoop-3.0.3</value> </property> <property> <name>mapreduce.reduce.env</name> <!-- 注意修改,这里为你HADOOP的安装目录 --> <value>HADOOP_MAPRED_HOME=/usr/local/hadoop-3.0.3</value> </property> </configuration>
vim yarn-site.xml
<configuration> <!--指定Yarn的老大(ResourceManager)的地址--> <property> <name>yarn.resourcemanager.hostname</name> <value>192.168.111.129</value> </property> <!--RM对客户端暴露的地址,客户端通过该地址向RM提交应用程序等--> <property> <description>The address of the applications manager interface in the RM. </description> <name>yarn.resourcemanager.address</name> <value>${yarn.resourcemanager.hostname}:8032</value> </property> <!--RM对AM暴露的地址,AM通过地址想RM申请资源,释放资源等--> <property> <description>The address of the scheduler interface.</description> <name>yarn.resourcemanager.scheduler.address</name> <value>${yarn.resourcemanager.hostname}:8030</value> </property> <!--RM对外暴露的web http地址,用户可通过该地址在浏览器中查看集群信息--> <property> <description>The http address of the RM web application.</description> <name>yarn.resourcemanager.webapp.address</name> <value>${yarn.resourcemanager.hostname}:8088</value> </property> <!--RM对NM暴露地址,NM通过该地址向RM汇报心跳,领取任务等--> <property> <name>yarn.resourcemanager.resource-tracker.address</name> <value>${yarn.resourcemanager.hostname}:8031</value> </property> <!--管理员可以通过该地址向RM发送管理命令等--> <property> <description>The address of the RM admin interface.</description> <name>yarn.resourcemanager.admin.address</name> <value>${yarn.resourcemanager.hostname}:8033</value> </property> <!--NodeManager上运行的附属服务。需配置成mapreduce_shuffle,才可运行MapReduce程序--> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> <!--可申请的最大内存资源,以MB为单位--> <property> <name>yarn.scheduler.maximum-allocation-mb</name> <value>1024</value> <discription>每个节点可用内存,单位MB,默认8182MB</discription> </property> <property> <name>yarn.nodemanager.vmem-pmem-ratio</name> <value>2.1</value> </property> <property> <name>yarn.nodemanager.resource.memory-mb</name> <value>1024</value> </property> <!--yarn.nodemanager.vmem-check-enabled这个的意思是忽略虚拟内存的检查,如果你是安装在虚拟机上,这个配置很有用,配上去之后后续操作不容易出问题。如果是实体机上,并且内存够多,可以将这个配置去掉--> <property> <name>yarn.nodemanager.vmem-check-enabled</name> <value>false</value> </property> </configuration>
下列两种方式任选其一即可
(1)配置环境变量,/etc/profile文件
export HDFS_NAMENODE_USER=root export HDFS_DATANODE_USER=root export HDFS_SECONDARYNAMENODE_USER=root export YARN_RESOURCEMANAGER_USER=root export YARN_NODEMANAGER_USER=root
(2)在start-yarn.sh,stop-yarn.sh两个文件顶部添加以下参数(在hadoop安装目录的sbin里)
YARN_RESOURCEMANAGER_USER=root HADOOP_SECURE_DN_USER=yarn YARN_NODEMANAGER_USER=root
在start-dfs.sh,stop-dfs.sh两个文件顶部添加以下参数(在hadoop安装目录的sbin里),这两个命令是我们一键启停HDFS脚本命令
start-dfs.sh和stop-dfs.sh命令可以将我们NameNode,DateNode,SecondaryNameNode组件一键启动
HDFS_DATANODE_USER=root HADOOP_SECURE_DN_USER=hdfs HDFS_NAMENODE_USER=root HDFS_SECONDARYNAMENODE_USER=root
扩:除了以上命令,还有start-all.sh和stop-all.sh可以一键启停我们hadoop的所有组件(含YARN)
cd /usr/local/hadoop-3.0.3/sbin
./start-yarn.shjps 4177 ResourceManager 3673 SecondaryNameNode 3466 NameNode 3565 DataNode 4319 NodeManager 4639 Jps