引入jsoup依赖包
<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>org.example</groupId> <artifactId>Spider</artifactId> <version>1.0-SNAPSHOT</version> <properties> <maven.compiler.source>17</maven.compiler.source> <maven.compiler.target>17</maven.compiler.target> </properties> <dependencies> <dependency> <groupId>org.jsoup</groupId> <artifactId>jsoup</artifactId> <version>1.14.3</version> </dependency> </dependencies> </project>
import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import java.io.*; import java.net.URL; import java.net.URLConnection; public class Spider { public static void main(String[] args) throws IOException { //url连接/目标网站 String url = "http://www.dzs.so/Book/List"; //获取保存的位置 File saveFile = new File("D:\\IDEA_CODE_F\\com\\photo"); //由于这里获取到的url地址为相对地址,所以加上前缀 String headURL = "http://www.dzs.so/"; //指定url和超时时间 Document document = Jsoup.parse(new URL(url), 10000); //指定从那个元素中获取 Elements img = document.getElementsByTag("img"); for (Element element : img) { //从src属性中获取url连接 String src = element.attr("src"); //获取图片名 String title = element.attr("title").split("epub")[0]; //获得输入和输出流 URL url1 = new URL(headURL + src); URLConnection urlConnection = url1.openConnection(); InputStream in = urlConnection.getInputStream(); OutputStream out = new FileOutputStream(saveFile + File.separator + title + ".jpg"); int n; while ((n = in.read() )!= -1){ out.write(n); } in.close(); out.close(); } } }
运行结果图:
资料参考:
https://www.bilibili.com/video/BV1RU4y147eZ
https://www.bilibili.com/medialist/play/watchlater/BV19h41147AQ