时间:2023-06-28 19:39:02 | 来源:网站运营
时间:2023-06-28 19:39:02 来源:网站运营
使用虚拟机搭建Hadoop集群:vim /etc/hostname # 修改主机名
2. 修改hosts映射vim /etc/hosts
3. 关闭防火墙# centossystemctl stop firewalld service # 关闭防火墙systemctl disable firewalld service # 禁用防火墙# ubuntuufw status # 查看防火墙状态ufw disable # 禁用防火墙ufw enable # 打开防火墙
4. ssh免密登录ssh-keygen # 生成公钥私钥ssh-copy-id hadoop01ssh-copy-id hadoop02ssh-copy-id hadoop03
5. 集群时间同步systemctl start chronydsystemctl status chronydsystemctl enable chronyd
ubuntu可以使用timedatectltimedatectl # 查看时间同步状态timedatectl set-ntp true # 开启时间同步
6. 安装jdk8mkdir -p /export/server # 软件安装路径mkdir -p /export/data # 数据存储路径mkdir -p /export/software # 压缩包存放路径
8. 上传压缩包并解压HADOOP_HOME
,例如 export HADOOP_HOME=/export/server/hadoop-3.3.1
# 配置JAVA_HOMEexport JAVA_HOME=/export/server/jdk1.8.0_291# 配置HADOOP_HOMEexport HADOOP_HOME=/export/server/hadoop-3.3.1# 设置用户以执行对应角色shell命令export HDFS_NAMENODE_USER=rootexport HDFS_DATANODE_USER=rootexport HDFS_SECONDARYNAMENODE_USER=rootexport YARN_RESOURCEMANAGER_USER=rootexport YARN_NODEMANAGER_USER=root
=============== hadoop2 ==============# 配置JAVA_HOMEexport JAVA_HOME=/export/server/jdk1.8.0_291# 配置HADOOP_HOMEexport HADOOP_HOME=/export/server/hadoop-2.7.2
2. core-site.xml<configuration> <property> <name>fs.defaultFS</name> <value>hdfs://hadoop01:9820</value> </property> <property> <name>hadoop.tmp.dir</name> <value>/export/data/hadoop-3.3.1</value> </property> <property> <name>hadoop.http.staticuser.user</name> <value>root</value> </property></configuration>
=============== hadoop2 ==============<configuration> <property> <name>fs.defaultFS</name> <value>hdfs://hadoop01:8020</value> </property> <property> <name>hadoop.tmp.dir</name> <value>/export/data/hadoop-2.7.2</value> </property></configuration>
3. hdfs-site.xml<configuration> <property> <name>dfs.namenode.http-address</name> <value>hadoop01:9870</value> </property> <property> <name>dfs.namenode.secondary.http-address</name> <value>hadoop02:9868</value> </property></configuration>
=============== hadoop2 ==============<configuration> <property> <name>dfs.namenode.http-address</name> <value>hadoop01:50070</value> </property> <property> <name>dfs.namenode.secondary.http-address</name> <value>hadoop02:50090</value> </property></configuration>
4. mapred-site.xml<configuration> <property> <name>mapreduce.framework.name</name> <value>yarn</value> </property> <property> <name>yarn.app.mapreduce.am.env</name> <value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value> </property> <property> <name>mapreduce.map.env</name> <value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value> </property> <property> <name>mapreduce.reduce.env</name> <value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value> </property></configuration>
5. yarn-site.xml<configuration> <property> <name>yarn.resourcemanager.hostname</name> <value>hadoop01</value> </property> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> <property> <name>yarn.scheduler.minimum-allocation-mb</name> <value>512</value> </property> <property> <name>yarn.scheduler.maximum-allocation-mb</name> <value>2048</value> </property> <property> <name>yarn.nodemanager.vmem-pmem-ratio</name> <value>4</value> </property></configuration>
6. workers(hadoop3)或slaves(hadoop2)hadoop01hadoop02hadoop03
7. 修改环境变量export JAVA_HOME=/export/server/jdk1.8.0_291export HADOOP_HOME=/export/server/hadoop-3.3.1export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
source /etc/profile.d/hadoop.sh以上配置需要分发到各个节点
hdfs namenode -format # 仅在hadoop01上运行
多次format会导致主从角色数据不一致。通过删除hadoop.tmp.dir目录,并重新format解决。dfs
和yarn
# 仅在hadoop01运行./sbin/start-dfs.sh # 启动 Namenode 和 Datanode./sbin/start-yarn.sh # 启动 ResourceManager 和 NodeManager./sbin/mr-jobhistory-daemon.sh start historyserver # 启动 HistoryServer
查看webhadoop01:9870 # hadoop3 namenodehadoop01:50070 # hadoop2 namenodehadoop01:8088 # hadoop resource managerhadoop01:19888 # history server
# 计算圆周率hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-3.3.1.jar pi 2 4# 文件写入测试hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-3.3.1-tests.jar TestDFSIO -write -nrFiles 10 -fileSize 10MB# 文件读取测试hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-3.3.1-tests.jar TestDFSIO -read -nrFiles 10 -fileSize 10MB
关键词:虚拟,使用