合规国际互联网加速 OSASE为企业客户提供高速稳定SD-WAN国际加速解决方案。 广告
1.pom ~~~ <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>4.1.0</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>4.1.0</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-scratchpad</artifactId> <version>4.1.0</version> </dependency> ~~~ 2. 读取两种文件 ~~~ if (file.getName().endsWith("doc") || file.getName().endsWith("docx")) { String[] paragraphText = new String[0]; if (file.getName().endsWith("doc")) { is = new FileInputStream(file); WordExtractor ex = new WordExtractor(is); paragraphText = ex.getParagraphText(); } if (file.getName().endsWith("docx")) { OPCPackage opcPackage = POIXMLDocument.openPackage(file.getPath()); System.out.println(file.getPath()); POIXMLTextExtractor extractor = new XWPFWordExtractor(opcPackage); String[] split = extractor.getText().split("\n"); // List<String[]> collect = Arrays.asList(split).stream().map(v -> v.split("\t")).collect(Collectors.toList()); List<String> collect = Arrays.asList(split).stream().flatMap(v -> Arrays.stream(v.split("\t"))).collect(Collectors.toList()); paragraphText = collect.toArray(new String[0]); extractor.close(); } ~~~