| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254 |
- <?php
- /**
- * Create MS Word file from HTML file or url
- * @author Filipp Yanukovich
- * @version 1.0
- * @name HtmlToDoc
- * https://github.com/votetake/HtmlToDoc
- */
- class HtmlToDoc
- {
- private $docFile = "";
- private $title = "";
- private $htmlHead = "";
- private $htmlBody = "";
-
- /**
- * Constructor
- *
- * @return void
- */
- public function init()
- {
- $this->title = "Untitled Document";
- $this->htmlHead = "";
- $this->htmlBody = "";
- }
-
- /**
- * Set the document file name
- * @param string $docfile
- * @return void
- */
-
- protected function setDocFileName($docfile)
- {
- $this->docFile = $docfile;
- if(!preg_match("/\.doc$/i", $this->docFile))
- {
- $this->docFile .= ".doc";
- }
- return;
- }
-
- public function setTitle($title)
- {
- $this->title = $title;
- }
-
- /**
- * Return header of MS Doc format
- *
- * @return String
- */
- protected function getHeader()
- {
- $return = <<<EOH
- <html xmlns:v="urn:schemas-microsoft-com:vml"
- xmlns:o="urn:schemas-microsoft-com:office:office"
- xmlns:w="urn:schemas-microsoft-com:office:word"
- xmlns="http://www.w3.org/TR/REC-html40">
-
- <head>
- <meta http-equiv=Content-Type content="text/html; charset=utf-8">
- <meta name=ProgId content=Word.Document>
- <meta name=Generator content="Microsoft Word 9">
- <meta name=Originator content="Microsoft Word 9">
- <!--[if !mso]>
- <style>
- v\:* {behavior:url(#default#VML);}
- o\:* {behavior:url(#default#VML);}
- w\:* {behavior:url(#default#VML);}
- .shape {behavior:url(#default#VML);}
- </style>
- <![endif]-->
- <title>$this->title</title>
- <!--[if gte mso 9]><xml>
- <w:WordDocument>
- <w:View>Print</w:View>
- <w:DoNotHyphenateCaps/>
- <w:PunctuationKerning/>
- <w:DrawingGridHorizontalSpacing>9.35 pt</w:DrawingGridHorizontalSpacing>
- <w:DrawingGridVerticalSpacing>9.35 pt</w:DrawingGridVerticalSpacing>
- </w:WordDocument>
- </xml><![endif]-->
- <style>
- <!--
- /* Font Definitions */
- @font-face
- {font-family:Verdana;
- panose-1:2 11 6 4 3 5 4 4 2 4;
- mso-font-charset:0;
- mso-generic-font-family:swiss;
- mso-font-pitch:variable;
- mso-font-signature:536871559 0 0 0 415 0;}
- /* Style Definitions */
- p.MsoNormal, li.MsoNormal, div.MsoNormal
- {mso-style-parent:"";
- margin:0in;
- margin-bottom:.0001pt;
- mso-pagination:widow-orphan;
- font-size:7.5pt;
- mso-bidi-font-size:8.0pt;
- font-family:"Verdana";
- mso-fareast-font-family:"Verdana";}
- p.small
- {mso-style-parent:"";
- margin:0in;
- margin-bottom:.0001pt;
- mso-pagination:widow-orphan;
- font-size:1.0pt;
- mso-bidi-font-size:1.0pt;
- font-family:"Verdana";
- mso-fareast-font-family:"Verdana";}
- @page Section1
- {size:8.5in 11.0in;
- margin:1.0in 1.25in 1.0in 1.25in;
- mso-header-margin:.5in;
- mso-footer-margin:.5in;
- mso-paper-source:0;}
- div.Section1
- {page:Section1;}
- -->
- </style>
- <!--[if gte mso 9]><xml>
- <o:shapedefaults v:ext="edit" spidmax="1032">
- <o:colormenu v:ext="edit" strokecolor="none"/>
- </o:shapedefaults></xml><![endif]--><!--[if gte mso 9]><xml>
- <o:shapelayout v:ext="edit">
- <o:idmap v:ext="edit" data="1"/>
- </o:shapelayout></xml><![endif]-->
- $this->htmlHead
- </head>
- <body>
- EOH;
- return $return;
- }
-
- /**
- * Return footer
- *
- * @return String
- */
- protected function getFotter()
- {
- return "</body></html>";
- }
-
- /**
- * Parse the html and remove <head></head> part if present into html.
- * Find and set html body and html head
- *
- * @param String $html
- * @return void
- */
-
- private function _parseHtml($html)
- {
- $html = preg_replace("/<!DOCTYPE((.|\n)*?)>/ims","", $html);
- $html = preg_replace("/<script((.|\n)*?)>((.|\n)*?)<\/script>/ims", "", $html);
- preg_match("/<head>((.|\n)*?)<\/head>/ims", $html, $matches);
- $head = $matches[1];
- preg_match("/<title>((.|\n)*?)<\/title>/ims", $head, $matches);
- $this->title = $matches[1];
- $html = preg_replace("/<head>((.|\n)*?)<\/head>/ims", "", $html);
- $head = preg_replace("/<title>((.|\n)*?)<\/title>/ims", "", $head);
- $head = preg_replace("/<\/?head>/ims", "", $head);
- $html = preg_replace("/<\/?body((.|\n)*?)>/ims", "", $html);
- $this->htmlHead = $head;
- $this->htmlBody = $html;
- return;
- }
-
- /**
- * Write the content int file
- *
- * @param String $file :: File name to be save
- * @param String $content :: Content to be write
- * @param [Optional] String $mode :: Write Mode
- * @return void
- * @access boolean True on success else false
- */
-
- private function _writeFile($file, $content, $mode = "w")
- {
- $fp = @fopen($file, $mode);
- if(!is_resource($fp))
- {
- return false;
- }
- fwrite($fp, $content);
- fclose($fp);
- return true;
- }
-
-
- /**
- * Create The MS Word Document from given HTML
- *
- * @param String $html :: url like http://www.example.com
- * @param String $file :: Document File Name
- * @param Boolean $download :: Wheather to download the file or save the file
- * @return boolean
- */
-
- public function createDocFromURL($url, $file, $download = false)
- {
- if(!preg_match("/^http:/",$url))
- {
- $url = "http://".$url;
- }
- $html = @file_get_contents($url);
- return $this->createDoc($html, $file, $download);
- }
- /**
- * Create The MS Word Document from given HTML
- *
- * @param String $html :: HTML Content or HTML File Name like path/to/html/file.html
- * @param String $file :: Document File Name
- * @param Boolean $download :: Wheather to download the file or save the file
- * @return boolean
- */
-
- public function createDoc($html, $file, $download = false)
- {
- if(is_file($html))
- {
- $html = @file_get_contents($html);
- }
-
- $this->_parseHtml($html);
- $this->setDocFileName($file);
- $doc = $this->getHeader();
- $doc .= $this->htmlBody;
- $doc .= $this->getFotter();
-
- if($download)
- {
- @header("Cache-Control: ");// leave blank to avoid IE errors
- @header("Pragma: ");// leave blank to avoid IE errors
- @header("Content-type: application/octet-stream");
- @header("Content-Disposition: attachment; filename=\"$this->docFile\"");
- echo $doc;
- return true;
- }
- else
- {
- return $this->_writeFile($this->docFile, $doc);
- }
- }
- }
- ?>
|