HtmlToDoc.php 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254
  1. <?php
  2. /**
  3. * Create MS Word file from HTML file or url
  4. * @author Filipp Yanukovich
  5. * @version 1.0
  6. * @name HtmlToDoc
  7. * https://github.com/votetake/HtmlToDoc
  8. */
  9. class HtmlToDoc
  10. {
  11. private $docFile = "";
  12. private $title = "";
  13. private $htmlHead = "";
  14. private $htmlBody = "";
  15. /**
  16. * Constructor
  17. *
  18. * @return void
  19. */
  20. public function init()
  21. {
  22. $this->title = "Untitled Document";
  23. $this->htmlHead = "";
  24. $this->htmlBody = "";
  25. }
  26. /**
  27. * Set the document file name
  28. * @param string $docfile
  29. * @return void
  30. */
  31. protected function setDocFileName($docfile)
  32. {
  33. $this->docFile = $docfile;
  34. if(!preg_match("/\.doc$/i", $this->docFile))
  35. {
  36. $this->docFile .= ".doc";
  37. }
  38. return;
  39. }
  40. public function setTitle($title)
  41. {
  42. $this->title = $title;
  43. }
  44. /**
  45. * Return header of MS Doc format
  46. *
  47. * @return String
  48. */
  49. protected function getHeader()
  50. {
  51. $return = <<<EOH
  52. <html xmlns:v="urn:schemas-microsoft-com:vml"
  53. xmlns:o="urn:schemas-microsoft-com:office:office"
  54. xmlns:w="urn:schemas-microsoft-com:office:word"
  55. xmlns="http://www.w3.org/TR/REC-html40">
  56. <head>
  57. <meta http-equiv=Content-Type content="text/html; charset=utf-8">
  58. <meta name=ProgId content=Word.Document>
  59. <meta name=Generator content="Microsoft Word 9">
  60. <meta name=Originator content="Microsoft Word 9">
  61. <!--[if !mso]>
  62. <style>
  63. v\:* {behavior:url(#default#VML);}
  64. o\:* {behavior:url(#default#VML);}
  65. w\:* {behavior:url(#default#VML);}
  66. .shape {behavior:url(#default#VML);}
  67. </style>
  68. <![endif]-->
  69. <title>$this->title</title>
  70. <!--[if gte mso 9]><xml>
  71. <w:WordDocument>
  72. <w:View>Print</w:View>
  73. <w:DoNotHyphenateCaps/>
  74. <w:PunctuationKerning/>
  75. <w:DrawingGridHorizontalSpacing>9.35 pt</w:DrawingGridHorizontalSpacing>
  76. <w:DrawingGridVerticalSpacing>9.35 pt</w:DrawingGridVerticalSpacing>
  77. </w:WordDocument>
  78. </xml><![endif]-->
  79. <style>
  80. <!--
  81. /* Font Definitions */
  82. @font-face
  83. {font-family:Verdana;
  84. panose-1:2 11 6 4 3 5 4 4 2 4;
  85. mso-font-charset:0;
  86. mso-generic-font-family:swiss;
  87. mso-font-pitch:variable;
  88. mso-font-signature:536871559 0 0 0 415 0;}
  89. /* Style Definitions */
  90. p.MsoNormal, li.MsoNormal, div.MsoNormal
  91. {mso-style-parent:"";
  92. margin:0in;
  93. margin-bottom:.0001pt;
  94. mso-pagination:widow-orphan;
  95. font-size:7.5pt;
  96. mso-bidi-font-size:8.0pt;
  97. font-family:"Verdana";
  98. mso-fareast-font-family:"Verdana";}
  99. p.small
  100. {mso-style-parent:"";
  101. margin:0in;
  102. margin-bottom:.0001pt;
  103. mso-pagination:widow-orphan;
  104. font-size:1.0pt;
  105. mso-bidi-font-size:1.0pt;
  106. font-family:"Verdana";
  107. mso-fareast-font-family:"Verdana";}
  108. @page Section1
  109. {size:8.5in 11.0in;
  110. margin:1.0in 1.25in 1.0in 1.25in;
  111. mso-header-margin:.5in;
  112. mso-footer-margin:.5in;
  113. mso-paper-source:0;}
  114. div.Section1
  115. {page:Section1;}
  116. -->
  117. </style>
  118. <!--[if gte mso 9]><xml>
  119. <o:shapedefaults v:ext="edit" spidmax="1032">
  120. <o:colormenu v:ext="edit" strokecolor="none"/>
  121. </o:shapedefaults></xml><![endif]--><!--[if gte mso 9]><xml>
  122. <o:shapelayout v:ext="edit">
  123. <o:idmap v:ext="edit" data="1"/>
  124. </o:shapelayout></xml><![endif]-->
  125. $this->htmlHead
  126. </head>
  127. <body>
  128. EOH;
  129. return $return;
  130. }
  131. /**
  132. * Return footer
  133. *
  134. * @return String
  135. */
  136. protected function getFotter()
  137. {
  138. return "</body></html>";
  139. }
  140. /**
  141. * Parse the html and remove <head></head> part if present into html.
  142. * Find and set html body and html head
  143. *
  144. * @param String $html
  145. * @return void
  146. */
  147. private function _parseHtml($html)
  148. {
  149. $html = preg_replace("/<!DOCTYPE((.|\n)*?)>/ims","", $html);
  150. $html = preg_replace("/<script((.|\n)*?)>((.|\n)*?)<\/script>/ims", "", $html);
  151. preg_match("/<head>((.|\n)*?)<\/head>/ims", $html, $matches);
  152. $head = $matches[1];
  153. preg_match("/<title>((.|\n)*?)<\/title>/ims", $head, $matches);
  154. $this->title = $matches[1];
  155. $html = preg_replace("/<head>((.|\n)*?)<\/head>/ims", "", $html);
  156. $head = preg_replace("/<title>((.|\n)*?)<\/title>/ims", "", $head);
  157. $head = preg_replace("/<\/?head>/ims", "", $head);
  158. $html = preg_replace("/<\/?body((.|\n)*?)>/ims", "", $html);
  159. $this->htmlHead = $head;
  160. $this->htmlBody = $html;
  161. return;
  162. }
  163. /**
  164. * Write the content int file
  165. *
  166. * @param String $file :: File name to be save
  167. * @param String $content :: Content to be write
  168. * @param [Optional] String $mode :: Write Mode
  169. * @return void
  170. * @access boolean True on success else false
  171. */
  172. private function _writeFile($file, $content, $mode = "w")
  173. {
  174. $fp = @fopen($file, $mode);
  175. if(!is_resource($fp))
  176. {
  177. return false;
  178. }
  179. fwrite($fp, $content);
  180. fclose($fp);
  181. return true;
  182. }
  183. /**
  184. * Create The MS Word Document from given HTML
  185. *
  186. * @param String $html :: url like http://www.example.com
  187. * @param String $file :: Document File Name
  188. * @param Boolean $download :: Wheather to download the file or save the file
  189. * @return boolean
  190. */
  191. public function createDocFromURL($url, $file, $download = false)
  192. {
  193. if(!preg_match("/^http:/",$url))
  194. {
  195. $url = "http://".$url;
  196. }
  197. $html = @file_get_contents($url);
  198. return $this->createDoc($html, $file, $download);
  199. }
  200. /**
  201. * Create The MS Word Document from given HTML
  202. *
  203. * @param String $html :: HTML Content or HTML File Name like path/to/html/file.html
  204. * @param String $file :: Document File Name
  205. * @param Boolean $download :: Wheather to download the file or save the file
  206. * @return boolean
  207. */
  208. public function createDoc($html, $file, $download = false)
  209. {
  210. if(is_file($html))
  211. {
  212. $html = @file_get_contents($html);
  213. }
  214. $this->_parseHtml($html);
  215. $this->setDocFileName($file);
  216. $doc = $this->getHeader();
  217. $doc .= $this->htmlBody;
  218. $doc .= $this->getFotter();
  219. if($download)
  220. {
  221. @header("Cache-Control: ");// leave blank to avoid IE errors
  222. @header("Pragma: ");// leave blank to avoid IE errors
  223. @header("Content-type: application/octet-stream");
  224. @header("Content-Disposition: attachment; filename=\"$this->docFile\"");
  225. echo $doc;
  226. return true;
  227. }
  228. else
  229. {
  230. return $this->_writeFile($this->docFile, $doc);
  231. }
  232. }
  233. }
  234. ?>