Hadoop install:修订间差异
跳到导航
跳到搜索
无编辑摘要 |
无编辑摘要 |
||
第45行: | 第45行: | ||
====Setup==== | ====Setup==== | ||
===== | =====core-site.xml (Common组件)===== | ||
<configuration> | <configuration> | ||
<property> | |||
<!-- 配置hdfs地址 --> | |||
<name>fs.defaultFS</name> | |||
<value>hdfs://g2-hdfs-01:9000</value> | |||
</property> | |||
<property> | |||
<name>io.file.buffer.size</name> | |||
<value>131072</value> | |||
</property> | |||
<property> | |||
<!-- 保存临时文件目录 --> | |||
<name>hadoop.tmp.dir</name> | |||
<value>/u01/hdfs/tmp</value> | |||
</property> | |||
</configuration> | |||
=====hdfs-site.xml (HDFS组件)===== | |||
<configuration> | |||
<property> | |||
<!-- 主节点地址 --> | |||
<name>dfs.namenode.http-address</name> | |||
<value>g2-hdfs-01:50070</value> | |||
</property> | |||
<property> | |||
<!-- 第二节点地址 --> | |||
<name>dfs.namenode.secondary.http-address</name> | |||
<value>g2-hdfs-02:50170</value> | |||
</property> | |||
<property> | |||
<name>dfs.namenode.name.dir</name> | |||
</configuration> | <value>file:/u01/hdfs/dfs/nn</value> | ||
</property> | |||
<property> | |||
===== | <name>dfs.datanode.data.dir</name> | ||
<configuration> | <value>file:/u01/hdfs/dfs/dn</value> | ||
</property> | |||
<property> | |||
<name>dfs.webhdfs.enabled</name> | |||
<value>true</value> | |||
</property> | |||
<property> | |||
<!-- 配置false后,无需权限即可生成dfs上的文件 --> | |||
<name>dfs.permissions</name> | |||
<value>false</value> | |||
</property> | |||
</configuration> | |||
-- del | |||
<property> | |||
<!-- 备份数为默认值3 --> | |||
<name>dfs.replication</name> | |||
<value>3</value> | |||
</property> | |||
<property> | |||
<name>dfs.blocksize</name> | |||
<value>268435456</value> | |||
</property> | |||
<property> | |||
<name>dfs.namenode.handler.count</name> | |||
<value>100</value> | |||
</property> | |||
=====mapred-site.xml===== | |||
<configuration> | |||
<property> | |||
<name>mapreduce.framework.name</name> | |||
<value>yarn</value> | |||
</property> | |||
</configuration> | |||
-- del | |||
<property> | |||
<name>mapreduce.jobhistory.address</name> | |||
<value>g2-hdfs-01:10020</value> | |||
</property> | |||
<property> | |||
<name>mapreduce.jobhistory.webapp.address</name> | |||
<value>g2-hdfs-01:19888</value> | |||
</property> | |||
<property> | |||
<name>mapreduce.application.classpath</name> | |||
<value>$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*:$HADOOP_MAPRED_HOME/share/ hadoop/mapreduce/lib/*</value> | |||
</property> | |||
=====yarn-site.xml===== | |||
<configuration> | |||
<property> | |||
<name>yarn.resourcemanager.hostname</name> | |||
<value>g2-hdfs-01</value> | |||
</property> | |||
<property> | |||
<name>yarn.nodemanager.aux-services</name> | |||
</configuration> | <value>mapreduce_shuffle</value> | ||
</property> | |||
<property> | |||
-- del | <name>yarn.resourcemanager.webapp.address</name> | ||
<value>g2-hdfs-01:8088</value> | |||
</property> | |||
<property> | |||
<name>yarn.scheduler.maximum-allocation-mb</name> | |||
<value>32768</value> | |||
</property> | |||
<property> | |||
<name>yarn.nodemanager.vmem-check-enabled</name> | |||
<value>false</value> | |||
</property> | |||
<property> | |||
<name>yarn.nodemanager.env-whitelist</name> | |||
<value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPE ND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value> | |||
</property> | |||
</configuration> | |||
<property> | |||
<name>yarn.resourcemanager.webapp.address</name> | |||
<value>hadoop01/192.168.44.5:8088</value> | |||
<description>配置外网只需要替换外网ip为真实ip,否则默认为 localhost:8088</description> | |||
</property> | |||
yarn.resourcemanager.hostname | |||
指定yarn的ResourceManager管理界面的地址,不配的话,Active Node始终为0 | |||
yarn.scheduler.maximum-allocation-mb | |||
每个节点可用内存,单位MB,默认8182MB | |||
yarn.nodemanager.aux-services | |||
reducer获取数据的方式 | |||
yarn.nodemanager.vmem-check-enabled | |||
===== | false = 忽略虚拟内存的检查 | ||
<configuration> | |||
</configuration> | |||
-- del | |||
===== | |||
<configuration> | |||
</ | |||
yarn.resourcemanager.hostname | |||
指定yarn的ResourceManager管理界面的地址,不配的话,Active Node始终为0 | |||
yarn.scheduler.maximum-allocation-mb | |||
每个节点可用内存,单位MB,默认8182MB | |||
yarn.nodemanager.aux-services | |||
reducer获取数据的方式 | |||
yarn.nodemanager.vmem-check-enabled | |||
false = 忽略虚拟内存的检查 | |||
2023年2月12日 (日) 10:18的版本
ENV
USER
groupadd hadoop -g 1001
useradd hdfs -g hadoop -u 1001
Java
/usr/bin/java -> /etc/alternatives/java -> /usr/java/jdk1.8.0_221-amd64/jre/bin/java
# /opt/hadoop-3.3.0
ln -s /opt/hadoop-3.3.0 /opt/hadoop
# .bash_profile
# hadoop, 20201010, Adam
export HADOOP_HOME=/opt/hadoop
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin
export HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop
Hadoop 配置
配置 Hadoop 环境脚本文件中的 JAVA_HOME 参数
# hadoop是守护线程 读取不到 /etc/profile 里面配置的JAVA_HOME路径
# /opt/hadoop/etc/hadoop/
# hadoop-env.sh, mapred-env.sh, yarn-env.sh
cp hadoop-env.sh hadoop-env.sh.20210409
cp mapred-env.sh mapred-env.sh.20210409
cp yarn-env.sh yarn-env.sh.20210409
echo '
# hdfs, 20210409, Adam
export JAVA_HOME=/usr/java/jdk1.8.0_361' >>
Setup
core-site.xml (Common组件)
<configuration> <property> <name>fs.defaultFS</name> <value>hdfs://g2-hdfs-01:9000</value> </property> <property> <name>io.file.buffer.size</name> <value>131072</value> </property> <property> <name>hadoop.tmp.dir</name> <value>/u01/hdfs/tmp</value> </property> </configuration>
hdfs-site.xml (HDFS组件)
<configuration> <property> <name>dfs.namenode.http-address</name> <value>g2-hdfs-01:50070</value> </property> <property> <name>dfs.namenode.secondary.http-address</name> <value>g2-hdfs-02:50170</value> </property> <property> <name>dfs.namenode.name.dir</name> <value>file:/u01/hdfs/dfs/nn</value> </property> <property> <name>dfs.datanode.data.dir</name> <value>file:/u01/hdfs/dfs/dn</value> </property> <property> <name>dfs.webhdfs.enabled</name> <value>true</value> </property> <property> <name>dfs.permissions</name> <value>false</value> </property> </configuration> -- del <property> <name>dfs.replication</name> <value>3</value> </property> <property> <name>dfs.blocksize</name> <value>268435456</value> </property> <property> <name>dfs.namenode.handler.count</name> <value>100</value> </property>
mapred-site.xml
<configuration> <property> <name>mapreduce.framework.name</name> <value>yarn</value> </property> </configuration> -- del <property> <name>mapreduce.jobhistory.address</name> <value>g2-hdfs-01:10020</value> </property> <property> <name>mapreduce.jobhistory.webapp.address</name> <value>g2-hdfs-01:19888</value> </property> <property> <name>mapreduce.application.classpath</name> <value>$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*:$HADOOP_MAPRED_HOME/share/ hadoop/mapreduce/lib/*</value> </property>
yarn-site.xml
<configuration> <property> <name>yarn.resourcemanager.hostname</name> <value>g2-hdfs-01</value> </property> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> <property> <name>yarn.resourcemanager.webapp.address</name> <value>g2-hdfs-01:8088</value> </property> <property> <name>yarn.scheduler.maximum-allocation-mb</name> <value>32768</value> </property> <property> <name>yarn.nodemanager.vmem-check-enabled</name> <value>false</value> </property> <property> <name>yarn.nodemanager.env-whitelist</name> <value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPE ND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value> </property> </configuration> <property> <name>yarn.resourcemanager.webapp.address</name> <value>hadoop01/192.168.44.5:8088</value> <description>配置外网只需要替换外网ip为真实ip,否则默认为 localhost:8088</description> </property> yarn.resourcemanager.hostname 指定yarn的ResourceManager管理界面的地址,不配的话,Active Node始终为0 yarn.scheduler.maximum-allocation-mb 每个节点可用内存,单位MB,默认8182MB yarn.nodemanager.aux-services reducer获取数据的方式 yarn.nodemanager.vmem-check-enabled false = 忽略虚拟内存的检查
# workers