Selenium 是一个用电脑模拟人操作浏览器网页,可以实现自动化的工具,估计很多小伙伴在学爬虫的时候有所了解。安装的步骤也很简单:
pip install selenium #如果不行,就 sudo pip install selenium #如果还不行,就 pip3 install selenium #就 sudo pip3 install selenium
除了安装库之外,还要有selenium程序启动的浏览器,因此程序运行环境须要提前安装firefox
浏览器和下载geckodriver
wget https://github.com/mozilla/geckodriver/releases/download/v0.22.0/geckodriver-v0.22.0-linux64.tar.gz 各个版本的driver下载地址如下 https://github.com/mozilla/geckodriver/releases tar zxvf geckodriver-v0.22.0-linux64.tar.gz mv geckodriver /usr/local/bin
代码如下:
import time from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC
代码如下:
#!/usr/bin/env python # -*- coding: utf-8 -*- import time from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver import ActionChains class LoginUrl(object): #初始化类的属性 def __init__(self, driver, url, username, password): self.__driver = driver self.__url = url self.__username = username self.__password = password #打开网页的方法 def openwebsite(self): self.__driver.maximize_window() self.__driver.get(self.__url) #输入网页账号 def inputusername(self, find_element_method, element): if(find_element_method == "id"): WebDriverWait(self.__driver, 10).until(EC.presence_of_element_located((By.ID, element)))#等待10s,每500ms查询一次,直到元素加载完毕或超过10s结束 usrName = self.__driver.find_element_by_id(element) elif(find_element_method == "name"): WebDriverWait(self.__driver, 10).until(EC.presence_of_element_located((By.NAME, element))) usrName = self.__driver.find_element_by_name(element) elif(find_element_method == "xpath"): WebDriverWait(self.__driver, 10).until(EC.presence_of_element_located((By.XPATH, element))) usrName = self.__driver.find_element_by_xpath(element) else: print("find element error!") usrName.send_keys(self.__username) #输入网页密码 def inputpassword(self, find_element_method, element): if(find_element_method == "id"): WebDriverWait(self.__driver, 10).until(EC.presence_of_element_located((By.ID, element))) passWrd = self.__driver.find_element_by_id(element) elif(find_element_method == "name"): WebDriverWait(self.__driver, 10).until(EC.presence_of_element_located((By.NAME, element))) passWrd = self.__driver.find_element_by_name(element) elif(find_element_method == "xpath"): WebDriverWait(self.__driver, 10).until(EC.presence_of_element_located((By.XPATH, element))) passWrd = self.__driver.find_element_by_xpath(element) else: print("find element error!") passWrd.send_keys(self.__password) #点击登陆 def clicksubmit(self, find_element_method, element): if(find_element_method == "id"): WebDriverWait(self.__driver, 5).until(EC.presence_of_element_located((By.ID, element))) self.__driver.find_element_by_id(element).click() elif(find_element_method == "name"): WebDriverWait(self.__driver, 5).until(EC.presence_of_element_located((By.NAME, element))) self.__driver.find_element_by_name(element).click() elif(find_element_method == "xpath"): WebDriverWait(self.__driver, 5).until(EC.presence_of_element_located((By.XPATH, element))) self.__driver.find_element_by_xpath(element).click() else: print( "find element error!") #test def test(self, find_element_method, element): ActionChains(self.__driver).move_to_element(self.__driver.find_element_by_xpath(element)).perform() self.__driver.find_element_by_xpath(element).click() def main(): #登录打卡的网站 browser1 = webdriver.Firefox() giga = LoginUrl(browser1, "http://xxxxxxxxxx/index.html", u"你的账户名", u"你的密码") giga.openwebsite() # 输入账号 giga.inputusername("id", "loginid") # 输入密码 giga.inputpassword("id", "userpassword") # 点击登录 giga.clicksubmit("xpath", "/html/body/div[@id='container']/div[@id='LoginContainer']/div[@class='e9login-content']/div[@class='e9login-element e9login-btn']/div[@class='e9login-form-submit e9login-submit']/button[@id='submit']") # 等待完成 time.sleep(3) # 点击考勤 giga.clicksubmit("xpath", "/html/body/div[@id='container']/div/div[@class='e9theme-layout-container ']/div[@class='e9theme-layout-header']/div[@class='e9header-container wea-f12']/div[@class='e9header-right']/div[@class='e9header-right-toolbar']/div[@id='signPlugin']/div[@class='singBtn']/span") # 等待完成 time.sleep(3) # 更新打卡时间,之前出问题是应为Xphth写的问题,简化之后就好了 giga.test("xpath", "//a[@class='resign']") print("succeess...") if __name__ == "__main__": main()
usrName.send_keys(self.__username)
和passWrd.send_keys(self.__password)
1.下载文件xpath-helper.crx,链接:https://pan.baidu.com/s/1dFgzBSd 密码:zwvb,感谢网友
2.在Google浏览器或者Edge浏览器里边找到这个“扩展程序”选项菜单即可。
3.然后就会进入到扩展插件的界面了,把下载好的离线插件xpath-helper.crx拖动到这个扩展界面便可以了,它会有提示,松开鼠标即可。
4.添加成功后,可以用快捷键Ctrl+shift+x来调出界面编写xpath, 再按一次就会关掉,打开xpath插件,按住shift,用鼠标点击网页上的所有元素,就会自动生成对于的xpath元素,拷贝粘贴即可食用。
对于复杂的xpath,直接复制粘贴可能导致程序定位元素失败,这时候可能需要简化xpath helper生成的东西,如何简化呢,例如
/html/body/div[@id='container']/div[@id='LoginContainer']/div[@class='e9login-content']/div[@class='e9login-element e9login-btn']/div[@class='e9login-form-submit e9login-submit']/button[@id='submit']
直接简写成
//button[@id='submit']