html=''' <div> <ul> <li class="item-0">frist item</li> <li class="item-1"><a href="link2.html">second item</a></li> <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li> <li class="item-1 active"><a href="link4.html">fourth</a></li> ''' from pyquery import PyQuery as pq doc=pq(html) print(doc("li"))
from pyquery import PyQuery as pq doc=pq(url="http://www.baidu.com") print(doc("head"))
from pyquery import PyQuery as pq doc=pq(filename='demo.html')#这里的demo.html是已经保存好的html文件 print(doc("li"))css选择器
#代表id .代表class
html=''' <div id="container"> <ul class="list"> <li class="item-0">frist item</li> <li class="item-1"><a href="link2.html">second item</a></li> <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li> <li class="item-1 active"><a href="link4.html">fourth</a></li> ''' from pyquery import PyQuery as pq doc=pq(html) print(doc('#container .list li'))#表示id为container的节点,选择其内部的class为list的节点内部的所有li节点,然后打印输出查找元素
html=''' <div id="container"> <ul class="list"> <li class="item-0">frist item</li> <li class="item-1"><a href="link2.html">second item</a></li> <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li> <li class="item-1 active"><a href="link4.html">fourth</a></li> ''' from pyquery import PyQuery as pq doc=pq(html) items=doc(".list") print(type(items))#<class 'pyquery.pyquery.PyQuery'> print(items) lis=items.find('li')# 查找ul下面的li元素 print(type(lis)) print(lis)
html=''' <div class="warp"> <div id="container"> <ul class="list"> <li class="item-0">frist item</li> <li class="item-1"><a href="link2.html">second item</a></li> <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li> <li class="item-1 active"><a href="link4.html">fourth</a></li> ''' from pyquery import PyQuery as pq doc=pq(html) items=doc(".list") print(type(items.parent())) print(items.parent())# 把直接父节点也打印出来 print(items.parents())# 把外层的所有节点 分别 都给打印处出来;返回所有的祖先节点 print(items.parents(".warp")) # 将指定的曾祖父打印处理,就不会像parents一样打印多种情况查找兄弟节点
html=''' <div class="warp"> <div id="container"> <ul class="list"> <li class="item-0">frist item</li> <li class="item-1"><a href="link2.html">second item</a></li> <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li> <li class="item-1 active"><a href="link4.html">fourth</a></li> ''' from pyquery import PyQuery as pq doc=pq(html) li=doc(".list .item-0.active") print(li.siblings()) print("**************") print(li.siblings(".active"))遍历
html=''' <div class="warp"> <div id="container"> <ul class="list"> <li class="item-0">frist item</li> <li class="item-1"><a href="link2.html">second item</a></li> <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li> <li class="item-1 active"><a href="link4.html">fourth</a></li> ''' from pyquery import PyQuery as pq doc=pq(html) li=doc(".list .item-0.active") print(li) # 打印结果: #<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
html=''' <div class="warp"> <div id="container"> <ul class="list"> <li class="item-0">frist item</li> <li class="item-1"><a href="link2.html">second item</a></li> <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li> <li class="item-1 active"><a href="link4.html">fourth</a></li> ''' from pyquery import PyQuery as pq doc=pq(html) li=doc("li").items()# 就会生成一个迭代器 <class 'generator'> print(type(li)) for i in li: print(i)获取信息
doc=pq(html) a=doc(".item-0.active a") print(a) print(a.attr("href")) print(a.attr.href) #与前面的等价
doc=pq(html) a=doc(".item-0.active a") print(a) print(a.text()) print(a.text('heelo'))#为其中添加text属性的值
doc=pq(html) li=doc(".item-0.active") print(li) print(li.html('<span>changed item</span>'))#为其中添加html属性的值DOM操作
doc=pq(html) li=doc(".item-0.active") print(li) li.removeClass('active')#移除节点中的active print(li) li.addClass('active')#添加节点中的active print(li) # 打印结果: #<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li> #<li class="item-0"><a href="link3.html"><span class="bold">third item</span></a></li> #<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
参考文档:
链接: 参考文档:.
from pyquery import PyQuery as pq doc=pq(html) li=doc(".item-0.active") li.attr('name','link')# 如果属性中有name了,就会用link覆盖原来的值 print(li) li.css('font-size','14px') print(li) #打印结果: #<li class="item-0 active" name="link"><a href="link3.html"><span class="bold">third item</span></a></li> #<li class="item-0 active" name="link" style="font-size: 14px"><a href="link3.html"><span class="bold">third item</span></a></li>
html=''' <div class="wrap"> Hello,World <p>This is a paragraph</p> </div> ''' from pyquery import PyQuery as pq doc=pq(html) wrap=doc(".wrap") print(wrap.text()) wrap.find('p').remove() print(wrap.text())
打印结果:
Hello,World
This is a paragraph
Hello,World
html=''' <div> <ul> <li class="item-0">frist item</li> <li class="item-1"><a href="link2.html">second item</a></li> <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li> <li class="item-1 active"><a href="link4.html">fourth</a></li> ''' from pyquery import PyQuery as pq doc=pq(html) li=doc('li:first-child') print(li) print("---------1-------") li=doc('li:last-child') print(li) print("---------2-------") li=doc('li:nth-child(2)') print(li) print("----------3------") li=doc('li:gt(2)')#大于2编号以后的内容 print(li) print("----------4------") li=doc('li:contains(second)')#获取包含second的 print(li) print("----------------") #---------1------- #<li class="item-1 active"><a href="link4.html">fourth</a></li> #---------2------- #<li class="item-1"><a href="link2.html">second item</a></li> #----------3------ #<li class="item-1 active"><a href="link4.html">fourth</a></li> #----------4------ #<li class="item-1"><a href="link2.html">second item</a></li> ----------------