Hadoop环境配置

环境配置

安装比较简单,从官网下载下来,解压好,配置好在.bashrc中配置好JAVA_HOME并添加执行目录。以下是测试环境参数

1
2
export JAVA_HOME=/usr/lib/jvm/java-1.8.0
export PATH=$PATH:/data/servers/hadoop/hadoop-2.8.0/bin

建一个目录作为Hadoop数据的主目录, 并添加几个目录,并确保目录可写可读

1
2
3
4
mkdir hadoop
mkdir hadoop/hdfs
mkdir hadoop/tmp
mkdir hadoop/hdfs/name hadoop/hdfs/data

问题解决

1
2
3
4
1. 启动如果有问题,查看hadoop解压目录下的logs文件,找出问题的原因。
2. 最开始要初始化namenode, 执行: hadoop namenode -format
3. 初始化完要新建目录: hadoop fs mkdir /user /user/{USER_NAME}
4. 在hadoop/bin/下建一个可执行文件hf,用于快速执行hadoop fs 命令: hadoop fs -$@

配置文件

core-site.xml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
<!-- core-site.xml 默认端口是9000, 与php-fpm冲突, 改成9002 -->
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhadoop.com:9002</value>
</property>
<property>
<name>fs.default.name</name>
<value>hdfs://localhadoop.com:9002</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>file:/data/servers/hadoop/tmp</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131702</value>
</property>
<property>
<name>hadoop.proxyuser.hduser.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hduser.groups</name>
<value>*</value>
</property>
</configuration>

hdfs-site.xml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
<!-- hdfs-site.xml,目录一定要写对 dfs.replication单台的话设置成1 -->
<configuration>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/data/servers/hadoop/hdfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/data/servers/hadoop/hdfs/data</value>
</property>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>localhadoop.com:9001</value>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.permissions</name>
<value>true</value>
<description>
If "true", enable permission checking in HDFS.
If "false", permission checking is turned off,
but all other behavior is unchanged.
Switching from one parameter value to the other does not change the mode,
owner or group of files or directories.
</description>
</property>
</configuration>

mapred-site.xml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
<!-- mapred-site.xml -->
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>localhadoop.com:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>localhadoop.com:19888</value>
</property>
</configuration>

yarn-site.xml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
<!-- yarn-site.xml -->
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>1536</value>
</property>
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>256</value>
</property>
<property>
<name>yarn.nodemanager.vmem-pmem-ratio</name>
<value>2.1</value>
</property>
</configuration>