前言:最近要做一个打印机的项目,用户可以上传文件,然后选择打印的页数,所以后端需要对上传的文件进行解析获取页数。
Maven项目直接先上pom
<dependencies> <dependency> <groupId>com.itextpdf</groupId> <artifactId>itextpdf</artifactId> <version>5.0.6</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.poi/poi --> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>4.1.2</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml --> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>4.1.2</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.poi/poi-scratchpad --> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-scratchpad</artifactId> <version>4.1.2</version> </dependency> </dependencies>
首先,现在的后端都是SpringBoot项目,都是用MultipartFile对象接收文件,默认上传大小好像是1MB,如果文件大的话可以设置上传大小,不然会报错org.apache.tomcat.util.http.fileupload.impl.SizeLimitExceededException: the request was rejected because its size (4543309) exceeds the configured maximum (1048576)
设置方法为在yml配置文件添加配置:
spring: servlet: multipart: max-file-size: 100MB max-request-size: 100MB enabled: true
第二步、因为word、pdf、ppt获取页数的方法都不一样,所以先通过后缀获取文件类型
String fileName = file.getOriginalFilename(); //获取文件名 String type = fileName.substring(fileName.lastIndexOf(".")); 获取后缀
第三步,将MultipartFile转成InputStream流的形式进行解析
MultipartFile转成InputStream的方法: MultipartFile file; byte[] byteArr = file.getBytes(); InputStream inputStream = new ByteArrayInputStream(byteArr);
第四步:因为经常要用到获取页数的方法,所以我直接写了一个工具类,大家可以直接复制;
public class FilePagesUtils { /** * * @param fileInputStream 文件流 * @param fileType 文件后缀 * @return * @throws IOException */ public static int filesPage(InputStream fileInputStream, String fileType) throws IOException { int count = 0; if (".doc".equals(fileType)) { count = countWord2003Page(fileInputStream); } if (".docx".equals(fileType)) { count = countWord2007Page(fileInputStream); } if (".pdf".equals(fileType)) { count = countPdfPage(fileInputStream); } if (".pptx".equals(fileType)) { count = countPPTXPage(fileInputStream); } if (".ppt".equals(fileType)) { count = countPPTPage(fileInputStream); } return count; } /** * 计算PDF格式文档的页数 */ public static int countPdfPage(InputStream fileInputStream) { int pageCount = 0; PdfReader reader = null; try { reader = new PdfReader(fileInputStream); pageCount = reader.getNumberOfPages(); } catch (IOException e) { e.printStackTrace(); } finally { reader.close(); } return pageCount; } /** * 计算PPTX格式文档的页数 * @param fileInputStream * @return * @throws IOException */ public static int countPPTPage(InputStream fileInputStream) throws IOException { int pageCount = 0; ZipSecureFile.setMinInflateRatio(-1.0d); HSLFSlideShow hslfSlideShow = new HSLFSlideShow(fileInputStream); try { pageCount = hslfSlideShow.getSlides().size(); } catch (Exception e) { e.printStackTrace(); } finally { fileInputStream.close(); } return pageCount; } /** * 计算PPTX格式文档的页数 */ public static int countPPTXPage(InputStream fileInputStream) throws IOException { int pageCount = 0; ZipSecureFile.setMinInflateRatio(-1.0d); try { XMLSlideShow pptxFile = new XMLSlideShow(fileInputStream); pageCount = pptxFile.getSlides().size(); } catch (IOException e) { e.printStackTrace(); } finally { fileInputStream.close(); } return pageCount; } /** * 计算WORD2007(*.docx)格式文档的页数 */ public static int countWord2007Page(InputStream fileInputStream) throws IOException { int pageCount = 0; ZipSecureFile.setMinInflateRatio(-1.0d); XWPFDocument docx = null; try { docx = new XWPFDocument(fileInputStream); pageCount = docx.getProperties().getExtendedProperties().getUnderlyingProperties().getPages();//总页数 } catch (IOException e) { e.printStackTrace(); } finally { docx.close(); } return pageCount; } /** * 计算WORD2003(*.doc)格式文档的页数 */ public static int countWord2003Page(InputStream fileInputStream) throws IOException { int pageCount = 0; WordExtractor doc = null; ZipSecureFile.setMinInflateRatio(-1.0d); try { doc = new WordExtractor(fileInputStream);//.doc格式Word文件提取器 pageCount = doc.getSummaryInformation().getPageCount();//总页数 } catch (IOException e) { e.printStackTrace(); } finally { doc.close(); } return pageCount; } }