Java教程

记录-工具类-java读取pdf和word

本文主要是介绍记录-工具类-java读取pdf和word,对大家解决编程问题具有一定的参考价值,需要的程序猿们随着小编来一起学习吧!

## 依赖
```java
//word 依赖
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>4.0.0</version>
</dependency>

## 依赖
```java
//word 依赖
<dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-scratchpad</artifactId>
            <version>4.0.0</version>
        </dependency>
        
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi</artifactId>
            <version>4.0.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml</artifactId>
            <version>4.0.1</version>
        </dependency>

        <dependency>
            <groupId>org.apache.xmlbeans</groupId>
            <artifactId>xmlbeans</artifactId>
            <version>3.1.0</version>
        </dependency>
//pdf依赖
 <dependency>
            <groupId>org.apache.pdfbox</groupId>
            <artifactId>pdfbox</artifactId>
            <version>2.0.24</version>
        </dependency>

```
## 代码
```java
// 读取接口
public interface ReadTool {
    String read(InputStream inputStream,String fileName);
}
//pdf读取工具
public class PDFReadTool implements ReadTool{
    @Override
    public String read(InputStream inputStream,String fileName) {
        String text = null;
        PDDocument document = null;
        try {
            PDFParser parser = new PDFParser(new RandomAccessBufferedFileInputStream(inputStream));
            parser.parse();
            document = parser.getPDDocument();
            PDFTextStripper stripper = new PDFTextStripper();
            text = stripper.getText(document);
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            if (inputStream != null) {
                try {
                    inputStream.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
            if (document != null) {
                try {
                    document.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
        return text;
    }
}
//word读取工具
public class WordReadTool implements ReadTool{
    @Override
    public String read(InputStream inputStream,String fileName) {
        String buffer = "";
        try {
            if (fileName.endsWith(".doc")) {
                WordExtractor ex = new WordExtractor(inputStream);
                buffer = ex.getText();
                inputStream.close();
            } else if (fileName.endsWith("ocx")) {
                XWPFDocument document = new XWPFDocument(inputStream);
                POIXMLTextExtractor extractor = new XWPFWordExtractor(document);
                buffer = extractor.getText();
                inputStream.close();
            } else {
                throw new RuntimeException("不是word文件!");
            }
        } catch (Exception e) {
            System.out.println(e);
            throw new RuntimeException(e);
//            e.printStackTrace();
        }
        return buffer;
    }
}
//主方法
public static void main(String[] args) throws FileNotFoundException {
        String path = "路径";
        String fileName = "文件名";
        InputStream is = new FileInputStream(path);
        ReadTool readTool = new PDFReadTool();
        ReadTool readTool1 = new WordReadTool();
        String s =  readTool1.read(is,fileName);
        System.out.println(s);
    }

```

 

这篇关于记录-工具类-java读取pdf和word的文章就介绍到这儿,希望我们推荐的文章对大家有所帮助,也希望大家多多支持为之网!