Java實現Word


Java實現Word/Pdf/TXT轉html


引言:

最近公司在做一個教育培訓學習及在線考試的項目,本人主要從事網絡課程模塊,主要做課程分類,課程,課件的創建及在線學習和統計的功能,因為課件涉及到多種類型,像視頻,音頻,圖文,外部鏈接及文檔類型.其中就涉及到一個問題,就是文檔型課件課程在網頁上的展示和學習問題,因為要在線統計學習的課程,學習的人員,學習的時長,所以不能像傳統做法將文檔下載到本地學習,那樣就不受系統控制了,所以最終的方案是,在上傳文檔型課件的時候,將其文件對應的轉換成HTML文件,以便在網頁上能夠瀏覽學習

下邊主要針對word,pdf和txt文本文件進行轉換


一:Java實現將word轉換為html


1:引入依賴

<code><dependency>  <groupid>fr.opensagres.xdocreport/<groupid>  <artifactid>fr.opensagres.xdocreport.document/<artifactid>  <version>1.0.5/<version>/<dependency><dependency>   <groupid>fr.opensagres.xdocreport/<groupid>   <artifactid>org.apache.poi.xwpf.converter.xhtml/<artifactid>   <version>1.0.5/<version> /<dependency>  <dependency>  <groupid>org.apache.poi/<groupid>  <artifactid>poi/<artifactid>  <version>3.12/<version>/<dependency><dependency>  <groupid>org.apache.poi/<groupid>  <artifactid>poi-scratchpad/<artifactid>  <version>3.12/<version>/<dependency>/<code>


2:代碼demo

<code>package com.svse.controller;import javax.xml.parsers.DocumentBuilderFactory;import javax.xml.parsers.ParserConfigurationException;import javax.xml.transform.OutputKeys;import javax.xml.transform.Transformer;import javax.xml.transform.TransformerException;import javax.xml.transform.TransformerFactory;import javax.xml.transform.dom.DOMSource;import javax.xml.transform.stream.StreamResult;import org.apache.poi.hwpf.HWPFDocument;import org.apache.poi.hwpf.converter.PicturesManager;import org.apache.poi.hwpf.converter.WordToHtmlConverter;import org.apache.poi.hwpf.usermodel.PictureType;import org.apache.poi.xwpf.converter.core.BasicURIResolver;import org.apache.poi.xwpf.converter.core.FileImageExtractor;import org.apache.poi.xwpf.converter.core.FileURIResolver;import org.apache.poi.xwpf.converter.core.IURIResolver;import org.apache.poi.xwpf.converter.core.IXWPFConverter;import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;import org.apache.poi.xwpf.usermodel.XWPFDocument;/** * word 轉換成html */public class TestWordToHtml {    public static  final String STORAGEPATH="C://works//files//";    public static  final String IP="192.168.30.222";    public static  final String PORT="8010";    public static void main(String[] args) throws IOException, TransformerException, ParserConfigurationException {        TestWordToHtml wt=new TestWordToHtml();        //wt.Word2003ToHtml("甲骨文考證.doc");        wt.Word2007ToHtml("甲骨文考證.docx");    }           /**     * 2003版本word轉換成html     * @throws IOException     * @throws TransformerException     * @throws ParserConfigurationException     */    public void Word2003ToHtml(String fileName) throws IOException, TransformerException, ParserConfigurationException {               final String imagepath = STORAGEPATH+"fileImage/";//解析時候如果doc文件中有圖片  圖片會保存在此路徑        final String strRanString=getRandomNum();        String filepath =STORAGEPATH;        String htmlName =fileName.substring(0, fileName.indexOf("."))+ "2003.html";        final String file = filepath + fileName;        InputStream input = new FileInputStream(new File(file));        HWPFDocument wordDocument = new HWPFDocument(input);        WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());        //設置圖片存放的位置        wordToHtmlConverter.setPicturesManager(new PicturesManager() {            public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches) {                File imgPath = new File(imagepath);                if(!imgPath.exists()){//圖片目錄不存在則創建                    imgPath.mkdirs();                }                                File file = new File(imagepath +strRanString+suggestedName);                try {                    OutputStream os = new FileOutputStream(file);                    os.write(content);                    os.close();                } catch (FileNotFoundException e) {                    e.printStackTrace();                } catch (IOException e) {                    e.printStackTrace();                }                                return  "http://"+IP+":"+PORT+"//uploadFile/fileImage/"+strRanString+suggestedName;               // return imagepath +strRanString+suggestedName;            }        });                //解析word文檔        wordToHtmlConverter.processDocument(wordDocument);        Document htmlDocument = wordToHtmlConverter.getDocument();                File htmlFile = new File(filepath +strRanString+htmlName);        OutputStream outStream = new FileOutputStream(htmlFile);                DOMSource domSource = new DOMSource(htmlDocument);        StreamResult streamResult = new StreamResult(outStream);        TransformerFactory factory = TransformerFactory.newInstance();        Transformer serializer = factory.newTransformer();        serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");        serializer.setOutputProperty(OutputKeys.INDENT, "yes");        serializer.setOutputProperty(OutputKeys.METHOD, "html");                serializer.transform(domSource, streamResult);        outStream.close();                System.out.println("生成html文件路徑:"+ "http://"+IP+":"+PORT+"//uploadFile/"+strRanString+htmlName);    }    /**     * 2007版本word轉換成html     * @throws IOException     */    public void Word2007ToHtml(String fileName) throws IOException {               final String strRanString=getRandomNum();                String filepath = STORAGEPATH+strRanString;        String htmlName =fileName.substring(0, fileName.indexOf("."))+ "2007.html";        File f = new File(STORAGEPATH+fileName);          if (!f.exists()) {              System.out.println("Sorry File does not Exists!");          } else {              if (f.getName().endsWith(".docx") || f.getName().endsWith(".DOCX")) {                  try {                    // 1) 加載word文檔生成 XWPFDocument對象                      InputStream in = new FileInputStream(f);                      XWPFDocument document = new XWPFDocument(in);                            // 2) 解析 XHTML配置 (這裡設置IURIResolver來設置圖片存放的目錄)                      File imageFolderFile = new File(filepath);                      XHTMLOptions options = XHTMLOptions.create().URIResolver(new FileURIResolver(imageFolderFile));                      options.setExtractor(new FileImageExtractor(imageFolderFile));                      options.URIResolver(new IURIResolver() {                        public String resolve(String uri) {                            //http://192.168.30.222:8010//uploadFile/....                            return "http://"+IP+":"+PORT+"//uploadFile/"+strRanString +"/"+ uri;                        }                    });                                        options.setIgnoreStylesIfUnused(false);                      options.setFragment(true);                                            // 3) 將 XWPFDocument轉換成XHTML                      OutputStream out = new FileOutputStream(new File(filepath + htmlName));                      IXWPFConverter<xhtmloptions> converter = XHTMLConverter.getInstance();                    converter.convert(document,out, options);                    //XHTMLConverter.getInstance().convert(document, out, options);                      System.out.println("html路徑:"+"http://"+IP+":"+PORT+"//uploadFile/"+strRanString+htmlName);                } catch (Exception e) {                    e.printStackTrace();                }                        } else {                  System.out.println("Enter only MS Office 2007+ files");              }          }      }       /**     *功能說明:生成時間戳     *創建人:zsq     *創建時間:2019年12月7日 下午2:37:09     *     */     public static String getRandomNum(){         Date dt = new Date();         SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmss");           String str=sdf.format(dt);         return str;     }        }/<xhtmloptions>/<code>


二:Java實現將Pdf轉換為html
1: 引入依賴

<code><dependency>            <groupid>net.sf.cssbox/<groupid>            <artifactid>pdf2dom/<artifactid>            <version>1.7/<version>        /<dependency>         <dependency>            <groupid>org.apache.pdfbox/<groupid>            <artifactid>pdfbox/<artifactid>            <version>2.0.12/<version>        /<dependency>        <dependency>            <groupid>org.apache.pdfbox/<groupid>            <artifactid>pdfbox-tools/<artifactid>            <version>2.0.12/<version> /<dependency>/<code>


2:代碼Demo

<code>public class PdfToHtml {  /*    pdf轉換html     */    public void pdfToHtmlTest(String inPdfPath,String outputHtmlPath)  {       // String outputPath = "C:\\\\works\\\\files\\\\ZSQ保密知識測試題庫.html";    9        //try() 寫在()裡面會自動關閉流        try{            BufferedWriter out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File(outputHtmlPath)),"utf-8"));            //加載PDF文檔            //PDDocument document = PDDocument.load(bytes);            PDDocument document = PDDocument.load(new File(inPdfPath));            PDFDomTree pdfDomTree = new PDFDomTree();            pdfDomTree.writeText(document,out);        } catch (Exception e) {            e.printStackTrace();        }    }    public static void main(String[] args) throws IOException {        PdfToHtml ph=new PdfToHtml();        String pdfPath="C:\\\\works\\\\files\\\\武研中心行政考勤制度.pdf";        String outputPath="C:\\\\works\\\\files\\\\武研中心行政考勤制度.html";        ph.pdfToHtmlTest(pdfPath,outputPath);  }}/<code>


三:Java實現將TXT轉換為html

<code>/*     * txt文檔轉html       filePath:txt原文件路徑       htmlPosition:轉化後生成的html路徑    */    public static void txtToHtml(String filePath, String htmlPosition) {        try {            //String encoding = "GBK";            File file = new File(filePath);            if (file.isFile() && file.exists()) { // 判斷文件是否存在                InputStreamReader read = new InputStreamReader(new FileInputStream(file), "GBK");                // 考慮到編碼格式                BufferedReader bufferedReader = new BufferedReader(read);                // 寫文件                FileOutputStream fos = new FileOutputStream(new File(htmlPosition));                OutputStreamWriter osw = new OutputStreamWriter(fos, "GBK");                BufferedWriter bw = new BufferedWriter(osw);                String lineTxt = null;                while ((lineTxt = bufferedReader.readLine()) != null) {                    bw.write(""+lineTxt + "");                }                bw.close();                osw.close();                fos.close();                read.close();            } else {                System.out.println("找不到指定的文件");            }        } catch (Exception e) {            System.out.println("讀取文件內容出錯");            e.printStackTrace();        }    }/<code>


原文網址:https://www.cnblogs.com/zhaosq/p/12069087.html


分享到:


相關文章: