str.split(sep=None, maxsplit=-1) 第二个参数代表最大分割数。是1的话最大分割数为2.
maxsplit默认为-1(既不指定maxsplit的值),最大分割。
x = [ '1', '2', '3' ] x = list(map(int, x))
import json data = "{'field1': 0, 'field2': 'hehehehe', 'field3': 'hahaha'}" json.loads(data) # json.decoder.JSONDecodeError: Expecting property name enclosed in double quotes: line 1 column 2 (char 1) data = json.dumps(eval(data)) # 不报错
items = adict.items() items.sort() return [value for key, value in items]
普通字典调用时用法 dict[element] = xxx, 前提是element字典里,如果不在字典里就会报错
defaultdict的作用是在于,当字典里的key不存在但被查找时,返回的不是keyError而是一个默认值
默认值是什么?
# dict =defaultdict( factory_function) # defaultdict接受一个工厂函数作为参数,这个factory_function可以是list、set、str等等,作用是当key不存在时,返回的是工厂函数的默认值,比如list对应[ ],str对应的是空字符串,set对应set( ),int对应0 from collections import defaultdict dict1 = defaultdict(int) dict2 = defaultdict(set) dict3 = defaultdict(str) dict4 = defaultdict(list) dict1[2] ='two' print(dict1[1]) print(dict2[1]) print(dict3[1]) print(dict4[1])
out
0 set() []
和linux自带的md5sum命令计算结果一致
计算字符串md5值
import hashlib def string_to_md5(string): md5_val = hashlib.md5(string.encode('utf8')).hexdigest() return md5_val
计算文件md5值
#coding: utf-8 import os import hashlib import sys def md5sum(fname): if not os.path.isfile(fname): return False try: f = file(fname, 'rb') except: return False m = hashlib.md5() # 大文件处理 while True: d = f.read(8096) if not d: break m.update(d) ret = m.hexdigest() f.close() return ret if __name__ == "__main__": if len(sys.argv) < 2: print(u"请输入文件路径") sys.exit(1) filepath = sys.argv[1] print(md5sum(filepath))
with codecs.open('data/journalName1.csv', 'rU', errors="ignore") as file: reader = csv.reader(file) for line in reader: print(line)
with open('file_name', 'r') as f: lines = f.readlines()[1:] # skip the first line. for line in lines: print(line) with open('file_name', 'r') as f: next(f) # skip the first line. for line in f.readlines(): print(line)
import os for i in os.walk(path): print(i) import os filenames = os.listdir(path) for filename in filenames: print(filename)
import xlrd def read_excel(): # 打开文件 workbook = xlrd.open_workbook('./test.xlsx') # 获取所有sheet print(workbook.sheet_names()) # [u'sheet1', u'sheet2'] # 根据sheet索引或者名称获取sheet内容 sheet1 = workbook.sheet_by_index(0) # sheet索引从0开始 # sheet的名称,行数,列数 print(sheet1.name, sheet1.nrows, sheet1.ncols) # 获取整行和整列的值(数组) rows = sheet1.row_values(3) # 获取第四行内容 cols = sheet1.col_values(2) # 获取第三列内容 print(rows) print(cols) # 获取单元格内容 print(sheet1.cell(1, 0).value.encode('utf-8')) print(sheet1.cell_value(1, 0).encode('utf-8')) print(sheet1.row(1)[0].value.encode('utf-8')) # 获取单元格内容的数据类型 print(sheet1.cell(1, 0).ctype) if __name__ == '__main__': read_excel()
xls文件内容的编码问题使用xlrd报错XLRDError,编码转换
file="Important_declaredate.xls" f = open(file, 'rb') lines = f.readlines() for line in lines: line = line.decode('gb2312').encode('utf8') print line
字符串在Python内部的表示是unicode编码,因此,在做编码转换时,通常需要以unicode作为中间编码,即先将其他编码的字符串解码(decode)成unicode,再从unicode编码(encode)成另一种编码。
url = 'http://serving.c7e6379018aad473385b99f37fbab2471.cn-shanghai.alicontainer.com/v1/models/push_rank:predict' headers = {'Content-Type':'application/json'} features = {"feat_ids":[[1]*816], "feat_vals":[[1]*816]} d = json.dumps({"signature_name":"serving_default","inputs":features}) response = requests.post(url, data=d, headers=headers) print(response.text)