2
0

simpleXLSX.class.php 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188
  1. <?php
  2. /**
  3. * SimpleXLSX php class
  4. * MS Excel 2007+ workbooks reader
  5. *
  6. * Copyright (c) 2012 - 2022 SimpleXLSX
  7. *
  8. * @category SimpleXLSX
  9. * @package SimpleXLSX
  10. * @copyright Copyright (c) 2012 - 2022 SimpleXLSX (https://github.com/shuchkin/simplexlsx/)
  11. * @license MIT
  12. */
  13. /** Examples
  14. *
  15. * use Shuchkin\SimpleXLSX;
  16. *
  17. * Example 1:
  18. * if ( $xlsx = SimpleXLSX::parse('book.xlsx') ) {
  19. * foreach ($xlsx->rows() as $r) {
  20. * print_r( $r );
  21. * }
  22. * } else {
  23. * echo SimpleXLSX::parseError();
  24. * }
  25. *
  26. * Example 2: html table
  27. * if ( $xlsx = SimpleXLSX::parse('book.xlsx') ) {
  28. * echo $xlsx->toHTML();
  29. * } else {
  30. * echo SimpleXLSX::parseError();
  31. * }
  32. *
  33. * Example 3: rowsEx
  34. * $xlsx = SimpleXLSX::parse('book.xlsx');
  35. * foreach ( $xlsx->rowsEx() as $r ) {
  36. * print_r( $r );
  37. * }
  38. *
  39. * Example 4: select worksheet
  40. * $xlsx = SimpleXLSX::parse('book.xlsx');
  41. * foreach( $xlsx->rows(1) as $r ) { // second worksheet
  42. * print_t( $r );
  43. * }
  44. *
  45. * Example 5: IDs and worksheet names
  46. * $xlsx = SimpleXLSX::parse('book.xlsx');
  47. * print_r( $xlsx->sheetNames() ); // array( 0 => 'Sheet 1', 1 => 'Catalog' );
  48. *
  49. * Example 6: get sheet name by index
  50. * $xlsx = SimpleXLSX::parse('book.xlsx');
  51. * echo 'Sheet Name 2 = '.$xlsx->sheetName(1);
  52. *
  53. * Example 7: getCell (very slow)
  54. * echo $xlsx->getCell(1,'D12'); // reads D12 cell from second sheet
  55. *
  56. * Example 8: read data
  57. * if ( $xlsx = SimpleXLSX::parseData( file_get_contents('http://www.example.com/example.xlsx') ) ) {
  58. * $dim = $xlsx->dimension(1);
  59. * $num_cols = $dim[0];
  60. * $num_rows = $dim[1];
  61. * echo $xlsx->sheetName(1).':'.$num_cols.'x'.$num_rows;
  62. * } else {
  63. * echo SimpleXLSX::parseError();
  64. * }
  65. *
  66. * Example 9: old style
  67. * $xlsx = new SimpleXLSX('book.xlsx');
  68. * if ( $xlsx->success() ) {
  69. * print_r( $xlsx->rows() );
  70. * } else {
  71. * echo 'xlsx error: '.$xlsx->error();
  72. * }
  73. */
  74. class simpleXLSX {
  75. public static $CF = [// Cell formats
  76. 0 => 'General',
  77. 1 => '0',
  78. 2 => '0.00',
  79. 3 => '#,##0',
  80. 4 => '#,##0.00',
  81. 9 => '0%',
  82. 10 => '0.00%',
  83. 11 => '0.00E+00',
  84. 12 => '# ?/?',
  85. 13 => '# ??/??',
  86. 14 => 'mm-dd-yy',
  87. 15 => 'd-mmm-yy',
  88. 16 => 'd-mmm',
  89. 17 => 'mmm-yy',
  90. 18 => 'h:mm AM/PM',
  91. 19 => 'h:mm:ss AM/PM',
  92. 20 => 'h:mm',
  93. 21 => 'h:mm:ss',
  94. 22 => 'm/d/yy h:mm',
  95. 37 => '#,##0 ;(#,##0)',
  96. 38 => '#,##0 ;[Red](#,##0)',
  97. 39 => '#,##0.00;(#,##0.00)',
  98. 40 => '#,##0.00;[Red](#,##0.00)',
  99. 44 => '_("$"* #,##0.00_);_("$"* \(#,##0.00\);_("$"* "-"??_);_(@_)',
  100. 45 => 'mm:ss',
  101. 46 => '[h]:mm:ss',
  102. 47 => 'mmss.0',
  103. 48 => '##0.0E+0',
  104. 49 => '@',
  105. 27 => '[$-404]e/m/d',
  106. 30 => 'm/d/yy',
  107. 36 => '[$-404]e/m/d',
  108. 50 => '[$-404]e/m/d',
  109. 57 => '[$-404]e/m/d',
  110. 59 => 't0',
  111. 60 => 't0.00',
  112. 61 => 't#,##0',
  113. 62 => 't#,##0.00',
  114. 67 => 't0%',
  115. 68 => 't0.00%',
  116. 69 => 't# ?/?',
  117. 70 => 't# ??/??',
  118. ];
  119. public $nf = []; // number formats
  120. public $cellFormats = []; // cellXfs
  121. public $datetimeFormat = 'Y-m-d H:i:s';
  122. public $debug;
  123. public $activeSheet = 0;
  124. public $rowsExReader;
  125. /* @var SimpleXMLElement[] $sheets */
  126. protected $sheets;
  127. protected $sheetNames = [];
  128. protected $sheetFiles = [];
  129. // scheme
  130. public $styles;
  131. protected $hyperlinks;
  132. /* @var array[] $package */
  133. protected $package;
  134. protected $sharedstrings;
  135. protected $date1904 = 0;
  136. /*
  137. private $date_formats = array(
  138. 0xe => "d/m/Y",
  139. 0xf => "d-M-Y",
  140. 0x10 => "d-M",
  141. 0x11 => "M-Y",
  142. 0x12 => "h:i a",
  143. 0x13 => "h:i:s a",
  144. 0x14 => "H:i",
  145. 0x15 => "H:i:s",
  146. 0x16 => "d/m/Y H:i",
  147. 0x2d => "i:s",
  148. 0x2e => "H:i:s",
  149. 0x2f => "i:s.S"
  150. );
  151. private $number_formats = array(
  152. 0x1 => "%1.0f", // "0"
  153. 0x2 => "%1.2f", // "0.00",
  154. 0x3 => "%1.0f", //"#,##0",
  155. 0x4 => "%1.2f", //"#,##0.00",
  156. 0x5 => "%1.0f", //"$#,##0;($#,##0)",
  157. 0x6 => '$%1.0f', //"$#,##0;($#,##0)",
  158. 0x7 => '$%1.2f', //"$#,##0.00;($#,##0.00)",
  159. 0x8 => '$%1.2f', //"$#,##0.00;($#,##0.00)",
  160. 0x9 => '%1.0f%%', //"0%"
  161. 0xa => '%1.2f%%', //"0.00%"
  162. 0xb => '%1.2f', //"0.00E00",
  163. 0x25 => '%1.0f', //"#,##0;(#,##0)",
  164. 0x26 => '%1.0f', //"#,##0;(#,##0)",
  165. 0x27 => '%1.2f', //"#,##0.00;(#,##0.00)",
  166. 0x28 => '%1.2f', //"#,##0.00;(#,##0.00)",
  167. 0x29 => '%1.0f', //"#,##0;(#,##0)",
  168. 0x2a => '$%1.0f', //"$#,##0;($#,##0)",
  169. 0x2b => '%1.2f', //"#,##0.00;(#,##0.00)",
  170. 0x2c => '$%1.2f', //"$#,##0.00;($#,##0.00)",
  171. 0x30 => '%1.0f'); //"##0.0E0";
  172. // }}}
  173. */
  174. protected $errno = 0;
  175. protected $error = false;
  176. /**
  177. * @var false|SimpleXMLElement
  178. */
  179. public $theme;
  180. public function __construct($filename = null, $is_data = null, $debug = null) {
  181. if ($debug !== null) {
  182. $this->debug = $debug;
  183. }
  184. $this->package = [
  185. 'filename' => '',
  186. 'mtime' => 0,
  187. 'size' => 0,
  188. 'comment' => '',
  189. 'entries' => []
  190. ];
  191. if ($filename && $this->_unzip($filename, $is_data)) {
  192. $this->_parse();
  193. }
  194. }
  195. protected function _unzip($filename, $is_data = false) {
  196. if ($is_data) {
  197. $this->package['filename'] = 'default.xlsx';
  198. $this->package['mtime'] = time();
  199. $this->package['size'] = $this->_strlen($filename);
  200. $vZ = $filename;
  201. } else {
  202. if (!is_readable($filename)) {
  203. $this->error(1, 'File not found ' . $filename);
  204. return false;
  205. }
  206. // Package information
  207. $this->package['filename'] = $filename;
  208. $this->package['mtime'] = filemtime($filename);
  209. $this->package['size'] = filesize($filename);
  210. // Read file
  211. $vZ = file_get_contents($filename);
  212. }
  213. // Cut end of central directory
  214. /* $aE = explode("\x50\x4b\x05\x06", $vZ);
  215. if (count($aE) == 1) {
  216. $this->error('Unknown format');
  217. return false;
  218. }
  219. */
  220. // Explode to each part
  221. $aE = explode("\x50\x4b\x03\x04", $vZ);
  222. array_shift($aE);
  223. $aEL = count($aE);
  224. if ($aEL === 0) {
  225. $this->error(2, 'Unknown archive format');
  226. return false;
  227. }
  228. // Search central directory end record
  229. $last = $aE[$aEL - 1];
  230. $last = explode("\x50\x4b\x05\x06", $last);
  231. if (count($last) !== 2) {
  232. $this->error(2, 'Unknown archive format');
  233. return false;
  234. }
  235. // Search central directory
  236. $last = explode("\x50\x4b\x01\x02", $last[0]);
  237. if (count($last) < 2) {
  238. $this->error(2, 'Unknown archive format');
  239. return false;
  240. }
  241. $aE[$aEL - 1] = $last[0];
  242. // Loop through the entries
  243. foreach ($aE as $vZ) {
  244. $aI = [];
  245. $aI['E'] = 0;
  246. $aI['EM'] = '';
  247. // Retrieving local file header information
  248. // $aP = unpack('v1VN/v1GPF/v1CM/v1FT/v1FD/V1CRC/V1CS/V1UCS/v1FNL', $vZ);
  249. $aP = unpack('v1VN/v1GPF/v1CM/v1FT/v1FD/V1CRC/V1CS/V1UCS/v1FNL/v1EFL', $vZ);
  250. // Check if data is encrypted
  251. // $bE = ($aP['GPF'] && 0x0001) ? TRUE : FALSE;
  252. // $bE = false;
  253. $nF = $aP['FNL'];
  254. $mF = $aP['EFL'];
  255. // Special case : value block after the compressed data
  256. if ($aP['GPF'] & 0x0008) {
  257. $aP1 = unpack('V1CRC/V1CS/V1UCS', $this->_substr($vZ, -12));
  258. $aP['CRC'] = $aP1['CRC'];
  259. $aP['CS'] = $aP1['CS'];
  260. $aP['UCS'] = $aP1['UCS'];
  261. // 2013-08-10
  262. $vZ = $this->_substr($vZ, 0, -12);
  263. if ($this->_substr($vZ, -4) === "\x50\x4b\x07\x08") {
  264. $vZ = $this->_substr($vZ, 0, -4);
  265. }
  266. }
  267. // Getting stored filename
  268. $aI['N'] = $this->_substr($vZ, 26, $nF);
  269. $aI['N'] = str_replace('\\', '/', $aI['N']);
  270. if ($this->_substr($aI['N'], -1) === '/') {
  271. // is a directory entry - will be skipped
  272. continue;
  273. }
  274. // Truncate full filename in path and filename
  275. $aI['P'] = dirname($aI['N']);
  276. $aI['P'] = ($aI['P'] === '.') ? '' : $aI['P'];
  277. $aI['N'] = basename($aI['N']);
  278. $vZ = $this->_substr($vZ, 26 + $nF + $mF);
  279. if ($this->_strlen($vZ) !== (int) $aP['CS']) { // check only if availabled
  280. $aI['E'] = 1;
  281. $aI['EM'] = 'Compressed size is not equal with the value in header information.';
  282. }
  283. // } elseif ( $bE ) {
  284. // $aI['E'] = 5;
  285. // $aI['EM'] = 'File is encrypted, which is not supported from this class.';
  286. /* } else {
  287. switch ($aP['CM']) {
  288. case 0: // Stored
  289. // Here is nothing to do, the file ist flat.
  290. break;
  291. case 8: // Deflated
  292. $vZ = gzinflate($vZ);
  293. break;
  294. case 12: // BZIP2
  295. if (extension_loaded('bz2')) {
  296. $vZ = bzdecompress($vZ);
  297. } else {
  298. $aI['E'] = 7;
  299. $aI['EM'] = 'PHP BZIP2 extension not available.';
  300. }
  301. break;
  302. default:
  303. $aI['E'] = 6;
  304. $aI['EM'] = "De-/Compression method {$aP['CM']} is not supported.";
  305. }
  306. if (!$aI['E']) {
  307. if ($vZ === false) {
  308. $aI['E'] = 2;
  309. $aI['EM'] = 'Decompression of data failed.';
  310. } elseif ($this->_strlen($vZ) !== (int)$aP['UCS']) {
  311. $aI['E'] = 3;
  312. $aI['EM'] = 'Uncompressed size is not equal with the value in header information.';
  313. } elseif (crc32($vZ) !== $aP['CRC']) {
  314. $aI['E'] = 4;
  315. $aI['EM'] = 'CRC32 checksum is not equal with the value in header information.';
  316. }
  317. }
  318. }
  319. */
  320. // DOS to UNIX timestamp
  321. $aI['T'] = mktime(
  322. ($aP['FT'] & 0xf800) >> 11,
  323. ($aP['FT'] & 0x07e0) >> 5,
  324. ($aP['FT'] & 0x001f) << 1,
  325. ($aP['FD'] & 0x01e0) >> 5,
  326. $aP['FD'] & 0x001f,
  327. (($aP['FD'] & 0xfe00) >> 9) + 1980
  328. );
  329. $this->package['entries'][] = [
  330. 'data' => $vZ,
  331. 'ucs' => (int) $aP['UCS'], // ucompresses size
  332. 'cm' => $aP['CM'], // compressed method
  333. 'cs' => isset($aP['CS']) ? (int) $aP['CS'] : 0, // compresses size
  334. 'crc' => $aP['CRC'],
  335. 'error' => $aI['E'],
  336. 'error_msg' => $aI['EM'],
  337. 'name' => $aI['N'],
  338. 'path' => $aI['P'],
  339. 'time' => $aI['T']
  340. ];
  341. } // end for each entries
  342. return true;
  343. }
  344. protected function _strlen($str) {
  345. return (ini_get('mbstring.func_overload') & 2) ? mb_strlen($str, '8bit') : strlen($str);
  346. }
  347. public function error($num = null, $str = null) {
  348. if ($num) {
  349. $this->errno = $num;
  350. $this->error = $str;
  351. if ($this->debug) {
  352. trigger_error(__CLASS__ . ': ' . $this->error, E_USER_WARNING);
  353. }
  354. }
  355. return $this->error;
  356. }
  357. protected function _substr($str, $start, $length = null) {
  358. return (ini_get('mbstring.func_overload') & 2) ? mb_substr($str, $start, ($length === null) ? mb_strlen($str, '8bit') : $length, '8bit') : substr($str, $start, ($length === null) ? strlen($str) : $length);
  359. }
  360. protected function _parse() {
  361. // Document data holders
  362. $this->sharedstrings = [];
  363. $this->sheets = [];
  364. // $this->styles = array();
  365. // $m1 = 0; // memory_get_peak_usage( true );
  366. // Read relations and search for officeDocument
  367. if ($relations = $this->getEntryXML('_rels/.rels')) {
  368. foreach ($relations->Relationship as $rel) {
  369. $rel_type = basename(trim((string) $rel['Type'])); // officeDocument
  370. $rel_target = $this->_getTarget('', (string) $rel['Target']); // /xl/workbook.xml or xl/workbook.xml
  371. if ($rel_type === 'officeDocument' && $workbook = $this->getEntryXML($rel_target)) {
  372. $index_rId = []; // [0 => rId1]
  373. $index = 0;
  374. foreach ($workbook->sheets->sheet as $s) {
  375. $this->sheetNames[$index] = (string) $s['name'];
  376. $index_rId[$index] = (string) $s['id'];
  377. $index++;
  378. }
  379. if ((int) $workbook->workbookPr['date1904'] === 1) {
  380. $this->date1904 = 1;
  381. }
  382. if ($workbookRelations = $this->getEntryXML(dirname($rel_target) . '/_rels/workbook.xml.rels')) {
  383. // Loop relations for workbook and extract sheets...
  384. foreach ($workbookRelations->Relationship as $workbookRelation) {
  385. $wrel_type = basename(trim((string) $workbookRelation['Type'])); // worksheet
  386. $wrel_path = $this->_getTarget(dirname($rel_target), (string) $workbookRelation['Target']);
  387. if (!$this->entryExists($wrel_path)) {
  388. continue;
  389. }
  390. if ($wrel_type === 'worksheet') { // Sheets
  391. if ($sheet = $this->getEntryXML($wrel_path)) {
  392. $index = array_search((string) $workbookRelation['Id'], $index_rId, true);
  393. $this->sheets[$index] = $sheet;
  394. $this->sheetFiles[$index] = $wrel_path;
  395. }
  396. } elseif ($wrel_type === 'sharedStrings') {
  397. if ($sharedStrings = $this->getEntryXML($wrel_path)) {
  398. foreach ($sharedStrings->si as $val) {
  399. if (isset($val->t)) {
  400. $this->sharedstrings[] = (string) $val->t;
  401. } elseif (isset($val->r)) {
  402. $this->sharedstrings[] = $this->_parseRichText($val);
  403. }
  404. }
  405. }
  406. } elseif ($wrel_type === 'styles') {
  407. $this->styles = $this->getEntryXML($wrel_path);
  408. // number formats
  409. $this->nf = [];
  410. if (isset($this->styles->numFmts->numFmt)) {
  411. foreach ($this->styles->numFmts->numFmt as $v) {
  412. $this->nf[(int) $v['numFmtId']] = (string) $v['formatCode'];
  413. }
  414. }
  415. $this->cellFormats = [];
  416. if (isset($this->styles->cellXfs->xf)) {
  417. foreach ($this->styles->cellXfs->xf as $v) {
  418. $x = [
  419. 'format' => null
  420. ];
  421. foreach ($v->attributes() as $k1 => $v1) {
  422. $x[$k1] = (int) $v1;
  423. }
  424. if (isset($x['numFmtId'])) {
  425. if (isset($this->nf[$x['numFmtId']])) {
  426. $x['format'] = $this->nf[$x['numFmtId']];
  427. } elseif (isset(self::$CF[$x['numFmtId']])) {
  428. $x['format'] = self::$CF[$x['numFmtId']];
  429. }
  430. }
  431. $this->cellFormats[] = $x;
  432. }
  433. }
  434. } elseif ($wrel_type === 'theme') {
  435. $this->theme = $this->getEntryXML($wrel_path);
  436. }
  437. }
  438. break;
  439. }
  440. // reptile hack :: find active sheet from workbook.xml
  441. foreach ($workbook->bookViews->workbookView as $s) {
  442. if (!empty($s['activeTab'])) {
  443. $this->activeSheet = (int) $s['activeTab'];
  444. }
  445. }
  446. }
  447. }
  448. }
  449. // $m2 = memory_get_peak_usage(true);
  450. // echo __FUNCTION__.' M='.round( ($m2-$m1) / 1048576, 2).'MB'.PHP_EOL;
  451. if (count($this->sheets)) {
  452. // Sort sheets
  453. ksort($this->sheets);
  454. return true;
  455. }
  456. return false;
  457. }
  458. public function getEntryXML($name) {
  459. if ($entry_xml = $this->getEntryData($name)) {
  460. $this->deleteEntry($name); // economy memory
  461. // dirty remove namespace prefixes and empty rows
  462. $entry_xml = preg_replace('/xmlns[^=]*="[^"]*"/i', '', $entry_xml); // remove namespaces
  463. $entry_xml .= ' '; // force run garbage collector
  464. $entry_xml = preg_replace('/[a-zA-Z0-9]+:([a-zA-Z0-9]+="[^"]+")/', '$1', $entry_xml); // remove namespaced attrs
  465. $entry_xml .= ' ';
  466. $entry_xml = preg_replace('/<[a-zA-Z0-9]+:([^>]+)>/', '<$1>', $entry_xml); // fix namespaced openned tags
  467. $entry_xml .= ' ';
  468. $entry_xml = preg_replace('/<\/[a-zA-Z0-9]+:([^>]+)>/', '</$1>', $entry_xml); // fix namespaced closed tags
  469. $entry_xml .= ' ';
  470. if (strpos($name, '/sheet')) { // dirty skip empty rows
  471. // remove <row...> <c /><c /></row>
  472. $entry_xml = preg_replace('/<row[^>]+>\s*(<c[^\/]+\/>\s*)+<\/row>/', '', $entry_xml, -1, $cnt);
  473. $entry_xml .= ' ';
  474. // remove <row />
  475. $entry_xml = preg_replace('/<row[^\/>]*\/>/', '', $entry_xml, -1, $cnt2);
  476. $entry_xml .= ' ';
  477. // remove <row...></row>
  478. $entry_xml = preg_replace('/<row[^>]*><\/row>/', '', $entry_xml, -1, $cnt3);
  479. $entry_xml .= ' ';
  480. if ($cnt || $cnt2 || $cnt3) {
  481. $entry_xml = preg_replace('/<dimension[^\/]+\/>/', '', $entry_xml);
  482. $entry_xml .= ' ';
  483. }
  484. // file_put_contents( basename( $name ), $entry_xml ); // @to do comment!!!
  485. }
  486. $entry_xml = trim($entry_xml);
  487. // $m1 = memory_get_usage();
  488. // XML External Entity (XXE) Prevention, libxml_disable_entity_loader deprecated in PHP 8
  489. if (LIBXML_VERSION < 20900 && function_exists('libxml_disable_entity_loader')) {
  490. $_old = libxml_disable_entity_loader();
  491. }
  492. $_old_uie = libxml_use_internal_errors(true);
  493. $entry_xmlobj = simplexml_load_string($entry_xml, 'SimpleXMLElement', LIBXML_COMPACT | LIBXML_PARSEHUGE);
  494. libxml_use_internal_errors($_old_uie);
  495. if (LIBXML_VERSION < 20900 && function_exists('libxml_disable_entity_loader')) {
  496. /** @noinspection PhpUndefinedVariableInspection */
  497. libxml_disable_entity_loader($_old);
  498. }
  499. // $m2 = memory_get_usage();
  500. // echo round( ($m2-$m1) / (1024 * 1024), 2).' MB'.PHP_EOL;
  501. if ($entry_xmlobj) {
  502. return $entry_xmlobj;
  503. }
  504. $e = libxml_get_last_error();
  505. if ($e) {
  506. $this->error(3, 'XML-entry ' . $name . ' parser error ' . $e->message . ' line ' . $e->line);
  507. }
  508. } else {
  509. $this->error(4, 'XML-entry not found ' . $name);
  510. }
  511. return false;
  512. }
  513. // sheets numeration: 1,2,3....
  514. public function getEntryData($name) {
  515. $name = ltrim(str_replace('\\', '/', $name), '/');
  516. $dir = $this->_strtoupper(dirname($name));
  517. $name = $this->_strtoupper(basename($name));
  518. foreach ($this->package['entries'] as &$entry) {
  519. if ($this->_strtoupper($entry['path']) === $dir && $this->_strtoupper($entry['name']) === $name) {
  520. if ($entry['error']) {
  521. return false;
  522. }
  523. switch ($entry['cm']) {
  524. case -1:
  525. case 0: // Stored
  526. // Here is nothing to do, the file ist flat.
  527. break;
  528. case 8: // Deflated
  529. $entry['data'] = gzinflate($entry['data']);
  530. break;
  531. case 12: // BZIP2
  532. if (extension_loaded('bz2')) {
  533. $entry['data'] = bzdecompress($entry['data']);
  534. } else {
  535. $entry['error'] = 7;
  536. $entry['error_message'] = 'PHP BZIP2 extension not available.';
  537. }
  538. break;
  539. default:
  540. $entry['error'] = 6;
  541. $entry['error_msg'] = 'De-/Compression method ' . $entry['cm'] . ' is not supported.';
  542. }
  543. if (!$entry['error'] && $entry['cm'] > -1) {
  544. $entry['cm'] = -1;
  545. if ($entry['data'] === false) {
  546. $entry['error'] = 2;
  547. $entry['error_msg'] = 'Decompression of data failed.';
  548. } elseif ($this->_strlen($entry['data']) !== (int) $entry['ucs']) {
  549. $entry['error'] = 3;
  550. $entry['error_msg'] = 'Uncompressed size is not equal with the value in header information.';
  551. } elseif (crc32($entry['data']) !== $entry['crc']) {
  552. $entry['error'] = 4;
  553. $entry['error_msg'] = 'CRC32 checksum is not equal with the value in header information.';
  554. }
  555. }
  556. return $entry['data'];
  557. }
  558. }
  559. unset($entry);
  560. $this->error(5, 'Entry not found ' . ($dir ? $dir . '/' : '') . $name);
  561. return false;
  562. }
  563. public function deleteEntry($name) {
  564. $name = ltrim(str_replace('\\', '/', $name), '/');
  565. $dir = $this->_strtoupper(dirname($name));
  566. $name = $this->_strtoupper(basename($name));
  567. foreach ($this->package['entries'] as $k => $entry) {
  568. if ($this->_strtoupper($entry['path']) === $dir && $this->_strtoupper($entry['name']) === $name) {
  569. unset($this->package['entries'][$k]);
  570. return true;
  571. }
  572. }
  573. return false;
  574. }
  575. protected function _strtoupper($str) {
  576. return (ini_get('mbstring.func_overload') & 2) ? mb_strtoupper($str, '8bit') : strtoupper($str);
  577. }
  578. protected function _getTarget($base, $target) {
  579. $target = trim($target);
  580. if (strpos($target, '/') === 0) {
  581. return $this->_substr($target, 1);
  582. }
  583. $target = ($base ? $base . '/' : '') . $target;
  584. // a/b/../c -> a/c
  585. $parts = explode('/', $target);
  586. $abs = [];
  587. foreach ($parts as $p) {
  588. if ('.' === $p) {
  589. continue;
  590. }
  591. if ('..' === $p) {
  592. array_pop($abs);
  593. } else {
  594. $abs[] = $p;
  595. }
  596. }
  597. return implode('/', $abs);
  598. }
  599. /*
  600. * @param string $name Filename in archive
  601. * @return SimpleXMLElement|bool
  602. */
  603. public function entryExists($name) {
  604. // 0.6.6
  605. $dir = $this->_strtoupper(dirname($name));
  606. $name = $this->_strtoupper(basename($name));
  607. foreach ($this->package['entries'] as $entry) {
  608. if ($this->_strtoupper($entry['path']) === $dir && $this->_strtoupper($entry['name']) === $name) {
  609. return true;
  610. }
  611. }
  612. return false;
  613. }
  614. protected function _parseRichText($is = null) {
  615. $value = [];
  616. if (isset($is->t)) {
  617. $value[] = (string) $is->t;
  618. } elseif (isset($is->r)) {
  619. foreach ($is->r as $run) {
  620. $value[] = (string) $run->t;
  621. }
  622. }
  623. return implode('', $value);
  624. }
  625. public static function parseFile($filename, $debug = false) {
  626. return self::parse($filename, false, $debug);
  627. }
  628. public static function parse($filename, $is_data = false, $debug = false) {
  629. $xlsx = new self();
  630. $xlsx->debug = $debug;
  631. if ($xlsx->_unzip($filename, $is_data)) {
  632. $xlsx->_parse();
  633. }
  634. if ($xlsx->success()) {
  635. return $xlsx;
  636. }
  637. self::parseError($xlsx->error());
  638. self::parseErrno($xlsx->errno());
  639. return false;
  640. }
  641. public function success() {
  642. return !$this->error;
  643. }
  644. // https://github.com/shuchkin/simplexlsx#gets-extend-cell-info-by--rowsex
  645. public static function parseError($set = false) {
  646. static $error = false;
  647. return $set ? $error = $set : $error;
  648. }
  649. public static function parseErrno($set = false) {
  650. static $errno = false;
  651. return $set ? $errno = $set : $errno;
  652. }
  653. public function errno() {
  654. return $this->errno;
  655. }
  656. public static function parseData($data, $debug = false) {
  657. return self::parse($data, true, $debug);
  658. }
  659. public function worksheet($worksheetIndex = 0) {
  660. if (isset($this->sheets[$worksheetIndex])) {
  661. $ws = $this->sheets[$worksheetIndex];
  662. if (!isset($this->hyperlinks[$worksheetIndex]) && isset($ws->hyperlinks)) {
  663. $this->hyperlinks[$worksheetIndex] = [];
  664. $sheet_rels = str_replace('worksheets', 'worksheets/_rels', $this->sheetFiles[$worksheetIndex]) . '.rels';
  665. $link_ids = [];
  666. if ($rels = $this->getEntryXML($sheet_rels)) {
  667. // hyperlink
  668. // $rel_base = dirname( $sheet_rels );
  669. foreach ($rels->Relationship as $rel) {
  670. $rel_type = basename(trim((string) $rel['Type']));
  671. if ($rel_type === 'hyperlink') {
  672. $rel_id = (string) $rel['Id'];
  673. $rel_target = (string) $rel['Target'];
  674. $link_ids[$rel_id] = $rel_target;
  675. }
  676. }
  677. }
  678. foreach ($ws->hyperlinks->hyperlink as $hyperlink) {
  679. $ref = (string) $hyperlink['ref'];
  680. if ($this->_strpos($ref, ':') > 0) { // A1:A8 -> A1
  681. $ref = explode(':', $ref);
  682. $ref = $ref[0];
  683. }
  684. // $this->hyperlinks[ $worksheetIndex ][ $ref ] = (string) $hyperlink['display'];
  685. $loc = (string) $hyperlink['location'];
  686. $id = (string) $hyperlink['id'];
  687. if ($id) {
  688. $href = $link_ids[$id] . ($loc ? '#' . $loc : '');
  689. } else {
  690. $href = $loc;
  691. }
  692. $this->hyperlinks[$worksheetIndex][$ref] = $href;
  693. }
  694. }
  695. return $ws;
  696. }
  697. $this->error(6, 'Worksheet not found ' . $worksheetIndex);
  698. return false;
  699. }
  700. protected function _strpos($haystack, $needle, $offset = 0) {
  701. return (ini_get('mbstring.func_overload') & 2) ? mb_strpos($haystack, $needle, $offset, '8bit') : strpos($haystack, $needle, $offset);
  702. }
  703. /**
  704. * returns [numCols,numRows] of worksheet
  705. *
  706. * @param int $worksheetIndex
  707. *
  708. * @return array
  709. */
  710. public function dimension($worksheetIndex = 0) {
  711. if (($ws = $this->worksheet($worksheetIndex)) === false) {
  712. return [0, 0];
  713. }
  714. /* @var SimpleXMLElement $ws */
  715. $ref = (string) $ws->dimension['ref'];
  716. if ($this->_strpos($ref, ':') !== false) {
  717. $d = explode(':', $ref);
  718. $idx = $this->getIndex($d[1]);
  719. return [$idx[0] + 1, $idx[1] + 1];
  720. }
  721. /*
  722. if ( $ref !== '' ) { // 0.6.8
  723. $index = $this->getIndex( $ref );
  724. return [ $index[0] + 1, $index[1] + 1 ];
  725. }
  726. */
  727. // slow method
  728. $maxC = $maxR = 0;
  729. $iR = -1;
  730. foreach ($ws->sheetData->row as $row) {
  731. $iR++;
  732. $iC = -1;
  733. foreach ($row->c as $c) {
  734. $iC++;
  735. $idx = $this->getIndex((string) $c['r']);
  736. $x = $idx[0];
  737. $y = $idx[1];
  738. if ($x > -1) {
  739. if ($x > $maxC) {
  740. $maxC = $x;
  741. }
  742. if ($y > $maxR) {
  743. $maxR = $y;
  744. }
  745. } else {
  746. if ($iC > $maxC) {
  747. $maxC = $iC;
  748. }
  749. if ($iR > $maxR) {
  750. $maxR = $iR;
  751. }
  752. }
  753. }
  754. }
  755. return [$maxC + 1, $maxR + 1];
  756. }
  757. public function getIndex($cell = 'A1') {
  758. if (preg_match('/([A-Z]+)(\d+)/', $cell, $m)) {
  759. $col = $m[1];
  760. $row = $m[2];
  761. $colLen = $this->_strlen($col);
  762. $index = 0;
  763. for ($i = $colLen - 1; $i >= 0; $i--) {
  764. $index += (ord($col[$i]) - 64) * pow(26, $colLen - $i - 1);
  765. }
  766. return [$index - 1, $row - 1];
  767. }
  768. // $this->error( 'Invalid cell index ' . $cell );
  769. return [-1, -1];
  770. }
  771. public function value($cell) {
  772. // Determine data type
  773. $dataType = (string) $cell['t'];
  774. if ($dataType === '' || $dataType === 'n') { // number
  775. $s = (int) $cell['s'];
  776. if ($s > 0 && isset($this->cellFormats[$s])) {
  777. if (array_key_exists('format', $this->cellFormats[$s])) {
  778. $format = $this->cellFormats[$s]['format'];
  779. if (preg_match('/(m|AM|PM)/', preg_replace('/\"[^"]+\"/', '', $format))) { // [mm]onth,AM|PM
  780. $dataType = 'D';
  781. }
  782. } else {
  783. $dataType = 'n';
  784. }
  785. }
  786. }
  787. $value = '';
  788. switch ($dataType) {
  789. case 's':
  790. // Value is a shared string
  791. if ((string) $cell->v !== '') {
  792. $value = $this->sharedstrings[(int) $cell->v];
  793. }
  794. break;
  795. case 'str': // formula?
  796. if ((string) $cell->v !== '') {
  797. $value = (string) $cell->v;
  798. }
  799. break;
  800. case 'b':
  801. // Value is boolean
  802. $value = (string) $cell->v;
  803. if ($value === '0') {
  804. $value = false;
  805. } elseif ($value === '1') {
  806. $value = true;
  807. } else {
  808. $value = (bool) $cell->v;
  809. }
  810. break;
  811. case 'inlineStr':
  812. // Value is rich text inline
  813. $value = $this->_parseRichText($cell->is);
  814. break;
  815. case 'e':
  816. // Value is an error message
  817. if ((string) $cell->v !== '') {
  818. $value = (string) $cell->v;
  819. }
  820. break;
  821. case 'D':
  822. // Date as float
  823. if (!empty($cell->v)) {
  824. $value = $this->datetimeFormat ? gmdate($this->datetimeFormat, $this->unixstamp((float) $cell->v)) : (float) $cell->v;
  825. }
  826. break;
  827. case 'd':
  828. // Date as ISO YYYY-MM-DD
  829. if ((string) $cell->v !== '') {
  830. $value = (string) $cell->v;
  831. }
  832. break;
  833. default:
  834. // Value is a string
  835. $value = (string) $cell->v;
  836. // Check for numeric values
  837. if (is_numeric($value)) {
  838. /** @noinspection TypeUnsafeComparisonInspection */
  839. if ($value == (int) $value) {
  840. $value = (int) $value;
  841. } /** @noinspection TypeUnsafeComparisonInspection */ elseif ($value == (float) $value) {
  842. $value = (float) $value;
  843. }
  844. }
  845. }
  846. return $value;
  847. }
  848. public function unixstamp($excelDateTime) {
  849. $d = floor($excelDateTime); // days since 1900 or 1904
  850. $t = $excelDateTime - $d;
  851. if ($this->date1904) {
  852. $d += 1462;
  853. }
  854. $t = (abs($d) > 0) ? ($d - 25569) * 86400 + round($t * 86400) : round($t * 86400);
  855. return (int) $t;
  856. }
  857. public function href($worksheetIndex, $cell) {
  858. $ref = (string) $cell['r'];
  859. return isset($this->hyperlinks[$worksheetIndex][$ref]) ? $this->hyperlinks[$worksheetIndex][$ref] : '';
  860. }
  861. public function toHTML($worksheetIndex = 0) {
  862. $s = '<table class=excel>';
  863. foreach ($this->readRows($worksheetIndex) as $r) {
  864. $s .= '<tr>';
  865. foreach ($r as $c) {
  866. $s .= '<td nowrap>' . ($c === '' ? '&nbsp' : htmlspecialchars($c, ENT_QUOTES)) . '</td>';
  867. }
  868. $s .= "</tr>\r\n";
  869. }
  870. $s .= '</table>';
  871. return $s;
  872. }
  873. public function toHTMLEx($worksheetIndex = 0) {
  874. $s = '<table class=excel>';
  875. $y = 0;
  876. foreach ($this->readRowsEx($worksheetIndex) as $r) {
  877. $s .= '<tr>';
  878. $x = 0;
  879. foreach ($r as $c) {
  880. $tag = 'td';
  881. $css = $c['css'];
  882. if ($y === 0) {
  883. $tag = 'th';
  884. $css .= $c['width'] ? 'width: ' . round($c['width'] * 0.47, 2) . 'em;' : '';
  885. }
  886. if ($x === 0 && $c['height']) {
  887. $css .= 'height: ' . round($c['height'] * 1.3333) . 'px;';
  888. }
  889. $s .= '<' . $tag . ' style="' . $css . '" nowrap>' . ($c['value'] === '' ? '&nbsp' : htmlspecialchars($c['value'], ENT_QUOTES)) . '</' . $tag . '>';
  890. $x++;
  891. }
  892. $s .= "</tr>\r\n";
  893. $y++;
  894. }
  895. $s .= '</table>';
  896. return $s;
  897. }
  898. public function rows($worksheetIndex = 0, $limit = 0) {
  899. return iterator_to_array($this->readRows($worksheetIndex, $limit), false);
  900. }
  901. // thx Gonzo
  902. /**
  903. * @param $worksheetIndex
  904. * @param $limit
  905. * @return \Generator
  906. */
  907. public function readRows($worksheetIndex = 0, $limit = 0) {
  908. if (($ws = $this->worksheet($worksheetIndex)) === false) {
  909. return;
  910. }
  911. $dim = $this->dimension($worksheetIndex);
  912. $numCols = $dim[0];
  913. $numRows = $dim[1];
  914. $emptyRow = [];
  915. for ($i = 0; $i < $numCols; $i++) {
  916. $emptyRow[] = '';
  917. }
  918. $curR = 0;
  919. $_limit = $limit;
  920. /* @var SimpleXMLElement $ws */
  921. foreach ($ws->sheetData->row as $row) {
  922. $r = $emptyRow;
  923. $curC = 0;
  924. foreach ($row->c as $c) {
  925. // detect skipped cols
  926. $idx = $this->getIndex((string) $c['r']);
  927. $x = $idx[0];
  928. $y = $idx[1];
  929. if ($x > -1) {
  930. $curC = $x;
  931. while ($curR < $y) {
  932. yield $emptyRow;
  933. $curR++;
  934. $_limit--;
  935. if ($_limit === 0) {
  936. return;
  937. }
  938. }
  939. }
  940. $r[$curC] = $this->value($c);
  941. $curC++;
  942. }
  943. yield $r;
  944. $curR++;
  945. $_limit--;
  946. if ($_limit === 0) {
  947. return;
  948. }
  949. }
  950. while ($curR < $numRows) {
  951. yield $emptyRow;
  952. $curR++;
  953. $_limit--;
  954. if ($_limit === 0) {
  955. return;
  956. }
  957. }
  958. }
  959. public function rowsEx($worksheetIndex = 0, $limit = 0) {
  960. return iterator_to_array($this->readRowsEx($worksheetIndex, $limit), false);
  961. }
  962. // https://github.com/shuchkin/simplexlsx#gets-extend-cell-info-by--rowsex
  963. /**
  964. * @param $worksheetIndex
  965. * @param $limit
  966. * @return \Generator|null
  967. */
  968. public function readRowsEx($worksheetIndex = 0, $limit = 0) {
  969. if (!$this->rowsExReader) {
  970. require_once __DIR__ . '/SimpleXLSXEx.php';
  971. $this->rowsExReader = new SimpleXLSXEx($this);
  972. }
  973. return $this->rowsExReader->readRowsEx($worksheetIndex, $limit);
  974. }
  975. /**
  976. * Returns cell value
  977. * VERY SLOW! Use ->rows() or ->rowsEx()
  978. *
  979. * @param int $worksheetIndex
  980. * @param string|array $cell ref or coords, D12 or [3,12]
  981. *
  982. * @return mixed Returns NULL if not found
  983. */
  984. public function getCell($worksheetIndex = 0, $cell = 'A1') {
  985. if (($ws = $this->worksheet($worksheetIndex)) === false) {
  986. return false;
  987. }
  988. if (is_array($cell)) {
  989. $cell = $this->_num2name($cell[0]) . $cell[1]; // [3,21] -> D21
  990. }
  991. if (is_string($cell)) {
  992. $result = $ws->sheetData->xpath("row/c[@r='" . $cell . "']");
  993. if (count($result)) {
  994. return $this->value($result[0]);
  995. }
  996. }
  997. return null;
  998. }
  999. protected function _num2name($num) {
  1000. $numeric = ($num - 1) % 26;
  1001. $letter = chr(65 + $numeric);
  1002. $num2 = (int) (($num - 1) / 26);
  1003. if ($num2 > 0) {
  1004. return $this->_num2name($num2) . $letter;
  1005. }
  1006. return $letter;
  1007. }
  1008. public function getSheets() {
  1009. return $this->sheets;
  1010. }
  1011. public function sheetsCount() {
  1012. return count($this->sheets);
  1013. }
  1014. public function sheetName($worksheetIndex) {
  1015. if (isset($this->sheetNames[$worksheetIndex])) {
  1016. return $this->sheetNames[$worksheetIndex];
  1017. }
  1018. return false;
  1019. }
  1020. public function sheetNames() {
  1021. return $this->sheetNames;
  1022. }
  1023. public function getStyles() {
  1024. return $this->styles;
  1025. }
  1026. public function getPackage() {
  1027. return $this->package;
  1028. }
  1029. public function setDateTimeFormat($value) {
  1030. $this->datetimeFormat = is_string($value) ? $value : false;
  1031. }
  1032. }