[root@bogon ~]# echo "foo foo quux labs foo bar zo zoo hying" | python3 mapper.py | sort -k 1,1 | python3 reducer.py ^C [root@bogon ~]# cat reducer.py #! /usr/bin/python3 from operator import itemgetter import sys current_word = None current_count = 0 word = None for line in sys.stdin: words = line.strip() word, count = words.split('\t') try: count = int(count) except ValueError: continue if current_word == word: current_count += count else: if current_word: print("%s\t%s" %(current_word,current_count)) current_count = count current_word = word if current_word == word: print("%s\t%s" %(current_word, current_count))
[root@bogon ~]# cat mapper.py #! /usr/bin/python3 import sys for line in sys.stdin: line = line.strip() words = line.split() for word in words: print('%s\t%s' %(word,1)) [root@bogon ~]#
[root@bogon Python-3.7.5]# echo "foo foo quux labs foo bar zo zoo hying" | python3 mapper.py | more
foo 1
foo 1
quux 1
labs 1
foo 1
bar 1
zo 1
zoo 1
hying 1
[root@bogon ~]# echo "foo foo quux labs foo bar zo zoo hying" | python3 mapper.py | sort -k 1,1 | python3 reducer.py
bar 1
foo 3
hying 1
labs 1
quux 1
zo 1
zoo 1