vim ~/.profile
export HADOOP_HOME=/home/mmc/hadoop
vim etc/hadoop/hadoop-env.sh
export JAVA_HOME=/opt/java/jdk1.8.0_151
vim etc/hadoop/core-site.xml
<configuration> <property> <name>fs.defaultFS</name> <value>hdfs://0.0.0.0:9000</value> <description>hdfs通讯访问地址</description> </property> <property> <name>dfs.permissions</name> <value>false</value> </property> <property> <name>hadoop.tmp.dir</name> <value>file:/home/mmc/hadoop/tmp</value> <description>hadoop数据存放</description> </property> </configuration>
vim etc/hadoop/hdfs-site.xml
<configuration> <property> <name>dfs.namenode.name.dir</name> <value>file:/home/mmc/hadoop/hdfs/name</value> </property> <property> <name>dfs.datanode.data.dir</name> <value>file:/home/mmc/hadoop/hdfs/data</value> </property> <property> <name>dfs.replication</name> <value>1</value> </property> </configuration>
本地免密访问
ssh-keygen cd ~/.ssh touch authorized_keys chmod 600 authorized_keys cat id_rsa.pub >> authorized_keys
格式化
./bin/hdfs namenode -format
服务端打印日志
export HADOOP_ROOT_LOGGER=DEBUG,console ./sbin/start-all.sh
检查
./bin/hadoop fs -ls / ./bin/hadoop fs -mkdir -p /user/hadoop/input ./bin/hadoop fs -ls /user/hadoop/input
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-common --> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <version>3.3.0</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-hdfs --> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-hdfs</artifactId> <version>3.3.0</version> <scope>test</scope> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-client --> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>3.3.0</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-yarn-common --> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-yarn-common</artifactId> <version>3.3.0</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-yarn-api --> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-yarn-api</artifactId> <version>3.3.0</version> </dependency>
src/main/resources/log4j.properties
# Set root logger level to DEBUG and its only appender to A1. log4j.rootLogger=DEBUG, A1 # A1 is set to be a ConsoleAppender. log4j.appender.A1=org.apache.log4j.ConsoleAppender # A1 uses PatternLayout. log4j.appender.A1.layout=org.apache.log4j.PatternLayout log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
WordCount.java
package org.example; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; public class WordCount { static class WordCountMapper extends Mapper<LongWritable,Text,Text,IntWritable>{ @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { //拿到一行数据,将输入的序列化数据转换成字符串 String line = value.toString(); //将一行数据按照分隔符拆分 String[] words = line.split("\t"); //遍历单词数据,输出单词<k,1> for(String word:words){ //需要序列化写出 context.write(new Text(word),new IntWritable(1)); } } } static class WordCountReducer extends Reducer<Text,IntWritable,Text,IntWritable>{ //reduce方法是针对输入的一组数据,一个key和它的所有value组成一组(k:v1,v2,v3) @Override protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { //定义一个计数器 int count = 0; //遍历一组数据,将key出现次数累加到count for(IntWritable value : values){ count += value.get(); } context.write(key,new IntWritable(count)); } } }
App.java
package org.example; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class App { public static void main( String[] args ) throws Exception{ String jobName = "word count"; // String inputPath = "hdfs://192.168.56.200:9000/user/hadoop/input/"; String inputPath = "hdfs://192.168.0.24:9000/user/hadoop/input/"; // String inputPath = "/user/hadoop/input/"; // String outputPath = "hdfs://192.168.56.200:9000/user/hadoop/output/"; String outputPath = "/home/mmc/downloads/hadoop/output"; Configuration conf = new Configuration(); // conf.set("fs.defaultFS", "hdfs://192.168.56.200:9000"); conf.set("fs.hdfs.impl","org.apache.hadoop.hdfs.DistributedFileSystem"); conf.set("dfs.client.block.write.replace-datanode-on-failure.policy", "NEVER"); conf.set("dfs.client.use.datanode.hostname", "true"); Job job = Job.getInstance(conf); job.setJobName(jobName); job.setJarByClass(WordCount.class); job.setMapperClass(WordCount.WordCountMapper.class); job.setReducerClass(WordCount.WordCountReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.setInputPaths(job,new Path(inputPath)); FileOutputFormat.setOutputPath(job,new Path(outputPath)); System.exit(job.waitForCompletion(true)?0:1); } }