list_1 = [1, [22, 33, 44], (5, 6, 7), {"name": "Sarah"}]
# list_3 = list_1 # 错误!!!只是起了个别名 list_2 = list_1.copy() # 或者list_1[:]\list(list_1)均可实现浅拷贝
list_2[1].append(55) print("list_1: ", list_1) print("list_2: ", list_2)
但是结果却不是只对list_2进行append操作…
list_1: [1, [22, 33, 44, 55], (5, 6, 7), {‘name’: ‘Sarah’}]
list_2: [1, [22, 33, 44, 55], (5, 6, 7), {‘name’: ‘Sarah’}]
引用数组的概念
列表内元素可以分散的存储在内存中
列表存储的,实际上是这些元素的地址——地址的存储在内存中是连续的
list_1.append(100) list_2.append("n") print("list_1: ", list_1) print("list_2: ", list_2)
list_1: [1, [22, 33, 44], (5, 6, 7), {‘name’: ‘Sarah’}, 100]
list_2: [1, [22, 33, 44], (5, 6, 7), {‘name’: ‘Sarah’}, ‘n’]
新增地址不同,因此结果不同
list_1[0] = 10 list_2[0] = 20 print("list_1: ", list_1) print("list_2: ", list_2)
list_1: [10, [22, 33, 44], (5, 6, 7), {‘name’: ‘Sarah’}, 100]
list_2: [20, [22, 33, 44], (5, 6, 7), {‘name’: ‘Sarah’}, ‘n’]
list_1与list_2分别存储新元素的地址,与原先1的地址无关
list_1[1].remove(44) list_2[1] += [55, 66] print("list_1: ", list_1) print("list_2: ", list_2)
list_1: [10, [22, 33, 55, 66], (5, 6, 7), {‘name’: ‘Sarah’}, 100]
list_2: [20, [22, 33, 55, 66], (5, 6, 7), {‘name’: ‘Sarah’}, ‘n’]
对地址内元素进行修改,但都指向同一地址列表,故存储内容与操作对象都是一样的
list_2[2] += (8, 9) print("list_1: ", list_1) print("list_2: ", list_2)
list_1: [10, [22, 33, 55, 66], (5, 6, 7), {‘name’: ‘Sarah’}, 100]
list_2: [20, [22, 33, 55, 66], (5, 6, 7, 8, 9), {‘name’: ‘Sarah’}, ‘n’]
究其原因,元组是不可变的!
list_2指向的是新的地址元组,与原地址元组脱离关系
list_1[-2]["age"] = 18 print("list_1: ", list_1) print("list_2: ", list_2)
list_1: [1, [22, 33, 55, 66], (5, 6, 7), {‘name’: ‘Sarah’, ‘age’: 18}, 100]
list_2: [1, [22, 33, 55, 66], (5, 6, 7, 8, 9), {‘name’: ‘Sarah’, ‘age’: 18}, ‘n’]
原因与列表相似,地址不变,内容改变
引入深拷贝
import copy list_1 = [1, [22, 33, 44], (5, 6, 7), {"name": "Sarah"}] list_2 = copy.deepcopy(list_1) list_1[-1]["age"] = 18 list_2[1].append(55) print("list_1: ", list_1) print("list_2: ", list_2)
list_1: [1, [22, 33, 44], (5, 6, 7), {‘name’: ‘Sarah’, ‘age’: 18}]
list_2: [1, [22, 33, 44, 55], (5, 6, 7), {‘name’: ‘Sarah’}]
让我们来比较一下列表查找与字典查找的速度
首先我们利用列表查找
import time ls_1 = list(range(1000000)) ls_2 = list(range(500))+[-10]*500 start = time.time() count = 0 for n in ls_2: if n in ls_1: count += 1 end = time.time() print("查找{}个元素,在ls_1列表的有{}个,共用时{}秒".format(len(ls_2), count, round((end-start), 2)))
查找1000个元素,在ls_1列表的有500个,共用时3.56秒
我们再利用字典查找
import time d = {i: i for i in range(1000000)} ls_2 = list(range(500)) + [-10] * 500 start = time.time() count = 0 for n in ls_2: try: d[n] except: pass else: count += 1 end = time.time() print("查找{}个元素,在ls_1列表的有{}个,共用时{}秒".format(len(ls_2), count, round(end - start)))
查找1000个元素,在ls_1列表的有500个,共用时0秒
通过稀疏数组来实现值得存储与访问
字典的创建过程
d = {}
print(hash("python")) print(hash(1024)) print(hash((1, 2)))
-389441618299413159
1024
-3550055125485641917
d["age"] = 18 # 增加键值对的操作,首先会计算键的散列值hash("age") print(hash("age"))
-8880918082949028501
第二步:根据计算的散列值确定其在散列表中的位置
极个别的时候,散列值会冲突,则内部有相应的解决冲突的办法
第三步:在该位置上存入值
键值对的访问过程
d["age"]
(1)字典数据类型,通过空间换时间,实现了快速的数据查找
(2)因为散列值对应位置的顺序与键在字典中显示的顺序可能不同,因此表现出来字典是无序的
通过紧凑数字实现字符串的存储
在生命周期中保持内容不变
x = 1 y = "Python" print("x id:", id(x)) print("y id:", id(y))
x id: 1448585029936
y id: 1448590350256
x += 2 y += "3.7" print("x id:", id(x)) print("y id:", id(y))
x id: 1448585030000
y id: 1448593238832
元组并不是总是不可变的
比如元组元素有列表时
t = (1, [2]) t[1].append(3) print(t)
(1, [2, 3])
ls = [1, 2, 3] d = {"Name": "Sarah", "Age": 18} print("ls id:", id(ls)) print("d id:", id(d)) ls += [4, 5] d_2 = {"Sex": "female"} d.update(d_2) print("ls id:", id(ls)) print("d id:", id(d))
ls id: 2332419560576
d id: 2332415175616
ls id: 2332419560576
d id: 2332415175616
alist = ["d", "d", "d", "2", "2", "d", "d", "4"] s = "d" while True: if s in alist: alist.remove(s) else: break print(alist)
[‘2’, ‘2’, ‘4’]
alist = ["d", "d", "d", "2", "2", "d", "d", "4"] for s in alist: if s == "d": alist.remove(s) # remove(s)删除列表中第一次出现的该元素 print(alist)
[‘2’, ‘2’, ‘d’, ‘d’, ‘4’]
原因在于删去后列表内元素位置发生变化
解决方法:使用负向索引
alist = ["d", "d", "d", "2", "2", "d", "d", "4"] for i in range(-len(alist), 0): if alist[i] == "d": alist.remove(alist[i]) # remove(s)删除列表中第一次出现的该元素 print(alist)
[‘2’, ‘2’, ‘4’]
ls = [[0]*10]*5 print(ls)
[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
ls[0][0] = 1 print(ls)
[[1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
原因在于*5是相同的地址,故一行变化,五行一起变化
回到上述问题,如何只改变ls[0][0]位置的元素呢?
ls = [[0]*10 for i in range(5)] print(ls)
[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
ls[0][0] = 1 print(ls)
[[1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
我们发现这里解决了上述问题,因为五行为独立创建的
[expression for value in iterable if condition]
执行过程
(1) 从可迭代对象中拿出一个元素
(2) 通过if条件(如果有的话),对元素进行筛选
若通过筛选:则把元素传递给表达式
若未通过:则进入(1)步骤,进入下一次迭代
(3) 将传递给表达式的元素代入到表达式中进行处理,产生一个结果
(4) 将(3)步产生的结果作为列表的一个元素进行存储
(5) 重复(1)~(4)步,直至迭代对象迭代结束,返回新创建的列表
# 等价于如下代码 result = [] for value in iterable: if condition: result.append(expression)
squares = [] for i in range(1, 21): if i%2 == 1: squares.append(i**2) print(squares)
[1, 9, 25, 49, 81, 121, 169, 225, 289, 361]
利用解析语法可简写为
squares = [i**2 for i in range(1, 21) if i % 2 == 1] print(squares)
[1, 9, 25, 49, 81, 121, 169, 225, 289, 361]
支持多变量
x = [1, 2, 3] y = [1, 2, 3] result = [i*j for i, j in zip(x, y)] print(result)
[1, 4, 9]
支持循环嵌套
colors = ["black", "white"] sizes = ["S", "M", "L"] tshirts = {"{} {}".format(color, size) for color in colors for size in sizes} print(tshirts)
{‘white M’, ‘black S’, ‘black M’, ‘black L’, ‘white S’, ‘white L’}
squares = {i: i**2 for i in range(10)} for k, v in squares.items(): print(k, ":", v)
0 : 0
1 : 1
2 : 4
3 : 9
4 : 16
5 : 25
6 : 36
7 : 49
8 : 64
9 : 81
squares = {i**2 for i in range(10)} print(squares)
{0, 1, 64, 4, 36, 9, 16, 49, 81, 25}
squares = (i**2 for i in range(10)) print(squares)
<generator object at 0x0000019CA4D8EB30>
可利用生成器进行迭代
colors = ["black", "white"] sizes = ["S", "M", "L"] tshirts = ("{} {}".format(color, size) for color in colors for size in sizes) for tshirt in tshirts: print(tshirt)
black S
black M
black L
white S
white M
white L
exprl if condition else expr2
n = -10 if n >= 0: x = n else: x = -n print(x)
10
使用简洁语法后
n = -10 x = n if n >= 0 else -n print(x)
10
条件表达式和解析语法简单实用,运行速度相对更快一些
ls = [i**2 for i in range(1, 1000001)] for i in ls: pass
缺点:占用大量内存
生成器
(1) 采用惰性计算的方式
(2) 无需一次性存储海量数据
(3) 一边执行一边计算,只计算每次需要的值
(4) 实际上一直在执行next()操作,直到无值可取
squares = (i**2 for i in range(1, 1000001)) for i in squares: pass
print(sum((i for i in range(101))))
5050
def fib(max): ls = [] n, a, b = 0, 1, 1 while n < max: ls.append(a) a, b = b, a+b n = n + 1 return ls print(fib(10))
[1, 1, 2, 3, 5, 8, 13, 21, 34, 55]
使用yield,构造生成器函数
在每次调用next()的时候进行,遇到yield语句返回,再次执行时从上次返回的yield语句继续执行
def fib(max): n, a, b = 0, 1, 1 while n < max: yield a a, b = b, a + b n = n + 1 print(fib(10)) for i in fib(10): print(i)
<generator object fib at 0x0000014AE130EB30>
1
1
2
3
5
8
13
21
34
55
可直接作用于for循环的对象统称为可迭代对象:Iterable
可以使用isinstance()判断一个对象是否是Iterable对象
from collections.abc import Iterable print(isinstance([1, 2, 3], Iterable))
True
print(isinstance({"name": "Sarah"}, Iterable))
True
print(isinstance("Python", Iterable))
True
squares = (i**2 for i in range(5)) print(isinstance(squares, Iterable))
True
生成器不但可以用于for循环,还可以被next()函数调用
print(next(squares)) print(next(squares)) print(next(squares)) print(next(squares)) print(next(squares))
0
1
4
9
16
直到没有数据可取,抛出StopIterable
print(next(squares))
StopIteration
可以被next()函数调用并不断返回下一个值,直至没有数据可取的对象称为迭代器:Iterator
可以使用isinstance()判断一个对象是否是Iterator对象
from collections.abc import Iterator squares = (i**2 for i in range(5)) print(isinstance(squares, Iterator))
True
print(isinstance([1, 2, 3], Iterator))
False
可以通过iter(Iterable)创建迭代器
print(isinstance(iter([1, 2, 3]), Iterator))
True
for item in Iterable 等价于:
先通过iter()函数获取可迭代对象的迭代器
然后对获取到的迭代器不断调用next()方法来获取下一个值并将其赋值给item
当遇到StopIteration的异常后循环结束
x = [1, 2] y = ["a", "b"] for i in zip(x, y): print(i) print(isinstance(zip(x, y), Iterator))
(1, ‘a’)
(2, ‘b’)
True
numbers = [1, 2, 3, 4, 5] for i in enumerate(numbers): print(i) print(isinstance(enumerate(numbers), Iterator))
(0, 1)
(1, 2)
(2, 3)
(3, 4)
(4, 5)
True
enumerate返回元素位置信息与元素的元组
with open("测试文件.txt", "r", encoding="utf-8") as f: print(isinstance(f, Iterator))
Ture
squares = (i ** 2 for i in range(5)) for square in squares: print(square) for square in squares: print(square) # 迭代一次过后没有结果代表其被耗尽了
numbers = range(10) print(isinstance(numbers, Iterator))
False
print(len(numbers)) # 有长度 print(numbers[0]) # 有索引 print(9 in numbers) # 可计算 next(numbers) # 不可被next()调用
TypeError: ‘range’ object is not an iterator
10
0
True
for number in numbers: print(number)
0
1
2
3
4
5
6
7
8
9
可以称range()为懒序列
(1)需要对已开发上线的程序添加某些功能
(2)不能对程序中函数的源代码进行修改
(3)不能改变程序中函数的调用方式
比如,要统计每个函数的运行时间
def f1(): pass def f2(): pass def f3(): pass f1() f2() f3()
函数是Python中的第一类对象
(1)可以把函数赋值给变量
(2)对该变量进行调用,可实现原函数的功能
def square(x): return x ** 2 print(type(square))
<class ‘function’>
pow_2 = square # 可以理解成给这个函数起了个别名pow_2 print(pow_2(5)) print(square(5))
25
25
可以将函数作为参数进行传递
(1)接收函数作为参数
(2)或者返回一个函数
满足上述条件之一的函数称之为高阶函数
def square(x): return x ** 2 def pow_2(fun): return fun f = pow_2(square) print(f(8))
64
print(f == square)
Ture
在函数内部定义一个函数
def outer(): print("outer is running") def inner(): print("inner is running") inner() outer()
outer is running
inner is running
def outer(): x = 1 z = 10 def inner(): y = x + 100 return y, z return inner f = outer() # 实际上f包含了inner函数本身+outer函数的环境 print(f)
<function outer.<locals>.inner at 0x000001F8E28FE040>
print(f.__closure__) # __closure__属性中包含了来自外部函数的信息 for i in f.__closure__: print(i.cell_contents)
(<cell at 0x000001BF227D7FD0: int object at 0x000001BF22116930>, <cell at 0x000001BF22726A00: int object at 0x000001BF22116A50>)
1
10
res = f() print(res)
(101, 10)
闭包:延伸了作用域的函数
如果一个函数定义在另一个函数的作用域内,并且引用了外层函数的变量,则称该函数为闭包
闭包是由函数及其相关的引用环境组合而成的实体(即:闭包=函数+引用环境)
def outer(): x = 1 def inner(): x = x + 100 return x return inner f = outer() print(f())
因为inner里的x是局部变量且没有值,故会报错
nonlocal允许内嵌的函数来修改闭包变量
def outer(): x = 1 def inner(): nonlocal x x = x + 100 return x return inner f = outer() print(f())
101
嵌套函数的实现
import time def timer(func): def inner(): print("inner run") start = time.time() func() end = time.time() print("{}函数运行用时{:.2f}秒".format(func.__name__, end - start)) return inner def f1(): print("f1 run") time.sleep(1) f1 = timer(f1) f1()
inner run
f1 run
f1函数运行用时1.00秒
语法糖
import time def timer(func): def inner(): print("inner run") start = time.time() func() end = time.time() print("{}函数运行用时{:.2f}秒".format(func.__name__, end - start)) return inner @timer # 相当于实现了f1 = timer(f1) def f1(): print("f1 run") time.sleep(1) f1()
inner run
f1 run
f1函数运行用时1.00秒
import time def timer(func): def inner(*args, **kwargs): print("inner run") start = time.time() func(*args, **kwargs) end = time.time() print("{}函数运行用时{:.2f}秒".format(func.__name__, end - start)) return inner @timer # 相当于实现了f1 = timer(f1) def f1(n): print("f1 run") time.sleep(n) f1(2)
inner run
f1 run
f1函数运行用时2.00秒
被装饰函数有返回值的情况
import time def timer(func): def inner(*args, **kwargs): print("inner run") start = time.time() res = func(*args, **kwargs) end = time.time() print("{}函数运行用时{:.2f}秒".format(func.__name__, end - start)) return res return inner @timer # 相当于实现了f1 = timer(f1) def f1(n): print("f1 run") time.sleep(n) return "wake up" res = f1(2) print(res)
inner run
f1 run
f1函数运行用时2.00秒
wake up
装饰器本身要传递一些额外参数
import time def timer(method): def outer(func): def inner(*args, **kwargs): print("inner run") if method == "origin": print("origin_inner run") start = time.time() res = func(*args, **kwargs) end = time.time() print("{}函数运行用时{:.2f}秒".format(func.__name__, end - start)) elif method == "double": print("double_inner run") start = time.time() res = func(*args, **kwargs) end = time.time() print("{}函数运行用时{:.2f}秒".format(func.__name__, 2 * (end - start))) return res return inner return outer @timer(method="origin") # 相当于timer = timer(method = "origin") f1 = timer(f1) def f1(): print("f1 run") time.sleep(1) return "wake up" @timer(method="double") def f2(): print("f2 run") time.sleep(1) return "wake up" f1() print(" ") f2()
inner run
origin_inner run
f1 run
f1函数运行用时1.00秒
inner run
double_inner run
f2 run
f2函数运行用时2.00秒
理解闭包是关键
func_names = [] def find_function(func): print("run") func_names.append(func) return func @find_function def f1(): print("f1 run") @find_function def f2(): print("f2 run") @find_function def f3(): print("f3 run")
run
run
run
我们发现,还未调用函数,装饰函数就已经开始运行
我们尝试对func_names进行遍历
for func in func_names: print(func.__name__) func() print()
f1
f1 run
f2
f2 run
f3
f3 run
import time def timer(func): def inner(): print("inner run") start = time.time() func() end = time.time() print("{}函数运行用时{:.2f}秒".format(func.__name__, end - start)) return inner @timer # 相当于实现了f1 = timer(f1) def f1(): print("f1 run") time.sleep(1) print(f1.__name__)
inner
import time from functools import wraps # Python工具 def timer(func): @wraps(func) # 还原原属性 def inner(): print("inner run") start = time.time() func() end = time.time() print("{}函数运行用时{:.2f}秒".format(func.__name__, end - start)) return inner @timer # 相当于实现了f1 = timer(f1) def f1(): print("f1 run") time.sleep(1) print(f1.__name__) f1()
f1
inner run
f1 run
f1函数运行用时1.00秒
以上,便是第八节深入探索内容,包含数据类型的底层故事,简介语法与实用器具。
下一节将深入理解Python自带的标准库。