<configuration> <property> <name>dfs.namenode.name.dir</name> <value>/home/hadoop/hadoop_dfs/hadoop2/name</value> <description>Path on the local filesystem where the NameNode stores the namespace and transactions logs persistently.</description> </property> <property> <name>dfs.datanode.data.dir</name> <value>/home/hadoop/hadoop_dfs/hadoop2/data</value> <description>Comma separated list of paths on the local filesystem of a DataNode where it should store its blocks.</description> </property> <property> <name>dfs.permissions</name> <value>false</value> </property> </configuration>
<configuration> <property> <name>yarn.resourcemanager.resource-tracker.address</name> <value>zhenlong-master:8031</value> <description>host is the hostname of the resource manager and port is the port on which the NodeManagers contact the Resource Manager. </description> </property> <property> <name>yarn.resourcemanager.scheduler.address</name> <value>zhenlong-master:8030</value> <description>host is the hostname of the resourcemanager and port is the port on which the Applications in the cluster talk to the Resource Manager. </description> </property> <property> <name>yarn.resourcemanager.scheduler.class</name> <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value> <description>In case you do not want to use the default scheduler</description> </property> <property> <name>yarn.resourcemanager.address</name> <value>zhenlong-master:8032</value> <description>the host is the hostname of the ResourceManager and the port is the port on which the clients can talk to the Resource Manager. </description> </property> <property> <name>yarn.nodemanager.local-dirs</name> <value>${hadoop.tmp.dir}/nodemanager/local</value> <description>the local directories used by the nodemanager</description> </property> <property> <name>yarn.nodemanager.address</name> <value>0.0.0.0:8034</value> <description>the nodemanagers bind to this port</description> </property> <property> <name>yarn.nodemanager.remote-app-log-dir</name> <value>${hadoop.tmp.dir}/nodemanager/remote</value> <description>directory on hdfs where the application logs are moved to </description> </property> <property> <name>yarn.nodemanager.log-dirs</name> <value>${hadoop.tmp.dir}/nodemanager/logs</value> <description>the directories used by Nodemanagers as log directories</description> </property> <!-- Use mapreduce_shuffle instead of mapreduce.suffle (YARN-1229)--> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> <property> <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name> <value>org.apache.hadoop.mapred.ShuffleHandler</value> </property> </configuration>
slaves
1
zhenlong-slave1
hadoop-env.sh
1
export JAVA_HOME=/home/hadoop/jdk1.6.0_45
此处 JAVA_HOME 可以根据每天 Server 情况设定。
格式化 NameNode
第一次启动,需要先格式化 NameNode。
1
hadoop namenode -format
Start Hadoop
1
~/hadoop-2.2.0/sbin/start-all.sh
In master
In slaves
Test
HDFS Web UI: http://zhenlong-master:50070
YARN Web UI: http://zhenlong-master:8088
YARN && MapReduce 测试:
1
~/hadoop-2.2.0/bin/hadoop jar ~/hadoop-2.2.0/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.2.0.jar wordcount file wordcount_out