适用场景: java 读取带图片或不带图片的 doc文件
1.带图片的doc文件格式

2.运行代码

生成的html格式文件:

生成到目录的图片:

双击打开csDoc.html:

代码:
package com.test;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Random;
import org.apache. poi .hwpf.HWPFDocument;
import org.apache.poi.hwpf.model.PicturesTable;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.Range;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.test.context.junit4.SpringRunner;
@RunWith(SpringRunner.class)
@SpringBootTest
public class TestDoc {
@Test
public void testReadImgDoc() {
System.out.println(analyzeDoc());
}
private String analyzeDoc(){
//对应桌面地址的doc格式文件
String path=”C:UsersAdministratorDesktopcs.doc”;
StringBuffer tempBuffer = new StringBuffer();
InputStream in = null;
try
{
in = new FileInputStream(path);
HWPFDocument doc=new HWPFDocument(in);
// 取得文档中字符的总数
int length = doc.characterLength();
// 创建图片容器
PicturesTable pTable = doc.getPicturesTable();
//存放图片路径
String fileName = “D:uploadcs”;
for (int i = 0; i < length – 1; i++) {
// 整篇文章的字符通过一个个字符的来判断,range为得到文档的范围
Range range = new Range(i, i + 1, doc);
CharacterRun cr = range.getCharacterRun(0);
if (pTable.hasPicture(cr)) {
String pic = readPicture(pTable, cr,i,fileName);
// 读写图片
tempBuffer. append (“<img src='”+pic+”‘ />”);
}else {
char c = cr.text().charAt(0);
// 判断为回车符
if (c == 13) {
tempBuffer.append(“<br/>”);
}else {
tempBuffer.append(cr.text());
}
}
}
}catch (FileNotFoundException e) {
//异常按业务场景处理
}catch (IOException e) {
//异常按业务场景处理
}
finally {
if(in != null) {
try {
in. close ();
} catch (IOException e) {
e.printStackTrace();
}
}
}
String docxContent = tempBuffer.toString();
PrintWriter pw = null;
try {
//对应桌面地址
pw = new PrintWriter(“C:UsersAdministratorDesktopcsDoc.html”);
//输出到html,然后桌面找到csDoc.html打开看效果
pw.write(docxContent);
} catch (FileNotFoundException e) {
}finally {
if(pw != null) {
pw.close();
}
}
return docxContent;
}
private String readPicture(PicturesTable pTable, CharacterRun cr,int i,String imgPath) {
// 提取图片
Picture pic = pTable.extractPicture(cr, false);
// 返回POI建议的图片文件名
String img = pic.suggestFullFileName();
img = createNumber() +i+ img;
String fileName = imgPath + img;
File file = new File(fileName);
//如果路径不存在,则创建
if (!file.getParentFile(). exists ()) {
file.getParentFile().mkdirs();
}
OutputStream out=null;
try {
out = new FileOutputStream(new File(fileName));
pic.writeImageContent(out);
}catch(Exception e) {
//异常按业务需求处理
return “”;
}finally {
if(out != null) {
try {
out.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return fileName;
}
/**
* 生成18位编号,yyyyMMddHHmmss+4位随机数(共18位)
*/
private String createNumber(){
SimpleDateFormat sdf = new SimpleDateFormat(“yyyyMMddHHmmss”);
Random r = new Random();
return sdf.format(new Date())+(r.nextInt(9000)+1000);
}
}