WordCount使用步骤
#创建一个文件如是
root@localhost:~# cat myfile.txt
This is my file.
#首先查找到hadoop-examples-1.2.1.jar这个文件在哪儿
root@localhost:~# find / -name "hadoop-examples-*.jar"
/opt/hadoop-1.2.1/hadoop-examples-1.2.1.jar
#创建输入文件夹,输出文件夹不用创建
root@localhost:~# hadoop fs -mkdir /input
#将本地文件放在hadoop上
root@localhost:~# hadoop fs -put /root/myfile.txt /input
#进行文件统计
root@localhost:~# hadoop jar /opt/hadoop-1.2.1/hadoop-examples-1.2.1.jar wordcount /input /output
#展示文件以及文件内容
root@localhost:~# hadoop fs -ls /output
Found 3 items
-rw-r--r-- 3 root supergroup 0 2016-08-30 14:08 /output/_SUCCESS
drwxr-xr-x - root supergroup 0 2016-08-30 14:07 /output/_logs
-rw-r--r-- 3 root supergroup 25 2016-08-30 14:08 /output/part-r-00000
root@localhost:~# hadoop fs -cat /output/part-r-00000
This 1
file. 1
is 1
my 1
WordCount输出
root@localhost:~# hadoop jar /opt/hadoop-1.2.1/hadoop-examples-1.2.1.jar wordcount /input /output
16/08/30 14:07:42 INFO input.FileInputFormat: Total input paths to process : 1
16/08/30 14:07:42 INFO util.NativeCodeLoader: Loaded the native-hadoop library
16/08/30 14:07:42 WARN snappy.LoadSnappy: Snappy native library not loaded
16/08/30 14:07:43 INFO mapred.JobClient: Running job: job_201608301153_0005
16/08/30 14:07:44 INFO mapred.JobClient: map 0% reduce 0%
16/08/30 14:07:58 INFO mapred.JobClient: map 100% reduce 0%
16/08/30 14:08:21 INFO mapred.JobClient: map 100% reduce 100%
16/08/30 14:08:22 INFO mapred.JobClient: Job complete: job_201608301153_0005
16/08/30 14:08:22 INFO mapred.JobClient: Counters: 29
16/08/30 14:08:22 INFO mapred.JobClient: Job Counters
16/08/30 14:08:22 INFO mapred.JobClient: Launched reduce tasks=1
16/08/30 14:08:22 INFO mapred.JobClient: SLOTS_MILLIS_MAPS=13444
16/08/30 14:08:22 INFO mapred.JobClient: Total time spent by all reduces waiting after reserving slots (ms)=0
16/08/30 14:08:22 INFO mapred.JobClient: Total time spent by all maps waiting after reserving slots (ms)=0
16/08/30 14:08:22 INFO mapred.JobClient: Launched map tasks=1
16/08/30 14:08:22 INFO mapred.JobClient: Data-local map tasks=1
16/08/30 14:08:22 INFO mapred.JobClient: SLOTS_MILLIS_REDUCES=22709
16/08/30 14:08:22 INFO mapred.JobClient: File Output Format Counters
16/08/30 14:08:22 INFO mapred.JobClient: Bytes Written=25
16/08/30 14:08:22 INFO mapred.JobClient: FileSystemCounters
16/08/30 14:08:22 INFO mapred.JobClient: FILE_BYTES_READ=47
16/08/30 14:08:22 INFO mapred.JobClient: HDFS_BYTES_READ=120
16/08/30 14:08:22 INFO mapred.JobClient: FILE_BYTES_WRITTEN=104166
16/08/30 14:08:22 INFO mapred.JobClient: HDFS_BYTES_WRITTEN=25
16/08/30 14:08:22 INFO mapred.JobClient: File Input Format Counters
16/08/30 14:08:22 INFO mapred.JobClient: Bytes Read=17
16/08/30 14:08:22 INFO mapred.JobClient: Map-Reduce Framework
16/08/30 14:08:22 INFO mapred.JobClient: Map output materialized bytes=47
16/08/30 14:08:22 INFO mapred.JobClient: Map input records=1
16/08/30 14:08:22 INFO mapred.JobClient: Reduce shuffle bytes=47
16/08/30 14:08:22 INFO mapred.JobClient: Spilled Records=8
16/08/30 14:08:22 INFO mapred.JobClient: Map output bytes=33
16/08/30 14:08:22 INFO mapred.JobClient: Total committed heap usage (bytes)=175837184
16/08/30 14:08:22 INFO mapred.JobClient: CPU time spent (ms)=1400
16/08/30 14:08:22 INFO mapred.JobClient: Combine input records=4
16/08/30 14:08:22 INFO mapred.JobClient: SPLIT_RAW_BYTES=103
16/08/30 14:08:22 INFO mapred.JobClient: Reduce input records=4
16/08/30 14:08:22 INFO mapred.JobClient: Reduce input groups=4
16/08/30 14:08:22 INFO mapred.JobClient: Combine output records=4
16/08/30 14:08:22 INFO mapred.JobClient: Physical memory (bytes) snapshot=233431040
16/08/30 14:08:22 INFO mapred.JobClient: Reduce output records=4
16/08/30 14:08:22 INFO mapred.JobClient: Virtual memory (bytes) snapshot=1469337600
16/08/30 14:08:22 INFO mapred.JobClient: Map output records=4