未配置好hadoop,去~/.bashrc添加环境变量
export PATH=$PATH:$HADOOP_HOME/sbin
添加spark环境变量
export SPARK_HOME=/usr/local/spark export PYTHONPATH=$SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.9-src.zip:PYTHONPATH export PYSPARK_PYTHON=python3 export PATH=$PATH:$SPARK_HOME/bin
path='/home/hadoop/wc/f1.txt' with open(path) as f: text=f.read() words = text.split() wc={} for word in words: wc[word]=wc.get(word,0)+1 wclist=list(wc.items()) wclist.sort(key=lambda x:x[1],reverse=True) print(wclist)