LuaXML_lib.c 42 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328
  1. /*
  2. LuaXML License
  3. LuaXML is licensed under the terms of the MIT license reproduced below,
  4. the same as Lua itself. This means that LuaXML is free software and can be
  5. used for both academic and commercial purposes at absolutely no cost.
  6. Copyright (C) 2007-2013 Gerald Franz, eludi.net
  7. Permission is hereby granted, free of charge, to any person obtaining a copy
  8. of this software and associated documentation files (the "Software"), to deal
  9. in the Software without restriction, including without limitation the rights
  10. to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11. copies of the Software, and to permit persons to whom the Software is
  12. furnished to do so, subject to the following conditions:
  13. The above copyright notice and this permission notice shall be included in
  14. all copies or substantial portions of the Software.
  15. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18. AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  21. THE SOFTWARE.
  22. */
  23. /// @module LuaXML
  24. #include "LuaXML_lib.h"
  25. #include <ctype.h>
  26. #include <stdbool.h>
  27. #include <stdio.h>
  28. #include <stdlib.h>
  29. #include <string.h>
  30. /* compatibility with older Lua versions (<5.2) */
  31. #if LUA_VERSION_NUM < 502
  32. // Substitute lua_objlen() for lua_rawlen()
  33. #define lua_rawlen(L, index) lua_objlen(L, index)
  34. // Make use of luaL_register() to achieve same result as luaL_newlib()
  35. #define luaL_newlib(L, funcs) \
  36. do { \
  37. lua_newtable(L); \
  38. luaL_register(L, NULL, funcs); \
  39. } while (0)
  40. #endif
  41. /* API changes for 5.2+ */
  42. #if LUA_VERSION_NUM >= 502
  43. // lua_compare() has replaced lua_equal()
  44. #define lua_equal(L, index1, index2) lua_compare(L, index1, index2, LUA_OPEQ)
  45. #endif
  46. /* API changes for 5.3+ */
  47. #if LUA_VERSION_NUM >= 503
  48. // luaL_optinteger() has replaced luaL_optint()
  49. #define luaL_optint(L, arg, d) luaL_optinteger(L, arg, d)
  50. #endif
  51. #define LUAXML_META "LuaXML" // name to be used for metatable
  52. //--- auxliary functions -------------------------------------------
  53. static size_t
  54. find(const char *s, const char *pattern, size_t start)
  55. {
  56. const char *found = strstr(s + start, pattern);
  57. return found ? (size_t)(found - s) : strlen(s);
  58. }
  59. // push (arbitrary Lua) value to be used as tag key, placing it on top of stack
  60. static inline void
  61. push_TAG_key(lua_State *L)
  62. {
  63. /* Note: Currently this is the number 0, which fits in nicely with using
  64. * string keys for attribute-value pairs and also 'stays clear' of the
  65. * array of sub-elements (starting at index 1).
  66. * Theoretically, this could be any kind of Lua value; but when using a
  67. * string key (e.g. "TAG"), extra care needs to be taken that it doesn't
  68. * get confused with an attribute - which means that the str() function
  69. * should be modified accordingly (to recognise and avoid the tag key).
  70. */
  71. lua_pushinteger(L, 0);
  72. }
  73. // convert Lua table at given index to an XML "object", by setting its metatable
  74. static void
  75. make_xml_object(lua_State *L, int index)
  76. {
  77. if (index < 0)
  78. index += lua_gettop(L) + 1; // relative to absolute index
  79. if (!lua_istable(L, index))
  80. luaL_error(L,
  81. "%s() error: invalid type at %d - expected table, got %s",
  82. __func__,
  83. index,
  84. luaL_typename(L, index));
  85. luaL_getmetatable(L, LUAXML_META);
  86. lua_setmetatable(L, index); // assign metatable
  87. }
  88. // push an indentation string for the given level to the Lua stack
  89. static void
  90. push_indentStr(lua_State *L, int level)
  91. {
  92. if (level <= 0) {
  93. lua_pushliteral(L, "");
  94. return;
  95. }
  96. luaL_Buffer b;
  97. luaL_buffinit(L, &b);
  98. // while (level-- > 0) luaL_addlstring(&b, " ", 2);
  99. while (level-- > 0)
  100. luaL_addchar(&b, '\t'); // one TAB char per level
  101. luaL_pushresult(&b);
  102. }
  103. // tests if a string consists entirely of whitespace
  104. static bool
  105. is_whitespace(const char *s)
  106. {
  107. if (!s)
  108. return false; // NULL pointer
  109. if (*s == 0)
  110. return false; // empty string
  111. while (*s)
  112. if (!isspace(*s++))
  113. return false;
  114. return true;
  115. }
  116. // We consider a token "lead in", if it 1) is all whitespace and 2) starts with
  117. // a newline. (This is typical for line breaks plus indentation on nested XML.)
  118. static bool
  119. is_lead_token(const char *s)
  120. {
  121. return is_whitespace(s) && (*s == '\n' || *s == '\r');
  122. }
  123. /*
  124. * For the string at given stack index, substitute any occurrence (exact string
  125. * match) of pattern "p" with the replacement string "r".
  126. * When done, this function will replace the original string with the result.
  127. */
  128. // TODO / Caveat:
  129. // We return the luaL_gsub() pointer, but it's unclear (and untested) if that
  130. // persists after the lua_replace(). Currently the result isn't used anywhere.
  131. static const char *
  132. do_gsub(lua_State *L, int index, const char *p, const char *r)
  133. {
  134. if (index < 0)
  135. index += lua_gettop(L) + 1; // relative to absolute index
  136. const char *result = luaL_gsub(L, lua_tostring(L, index), p, r);
  137. lua_replace(L, index);
  138. return result;
  139. }
  140. /*
  141. * Lua C function to replace a gsub() match with the corresponding character.
  142. * Xml_pushDecode() will use this as a replacement function argument to undo
  143. * the XML encodings, passing one match (sequence of digits) at a time.
  144. *
  145. * Due to the pattern used, the matched string may also be 'x' followed by
  146. * a sequence of hexadecimal characters ("xE4"), which is supported too.
  147. */
  148. static int
  149. XMLencoding_replacement(lua_State *L)
  150. {
  151. const char *matched = lua_tostring(L, 1);
  152. if (matched) {
  153. // support both decimal and hexadecimal conversion
  154. char c = *matched == 'x' ? strtol(++matched, NULL, 16) : atoi(matched);
  155. if (c) {
  156. lua_pushlstring(L, &c, 1); // return character as Lua string
  157. return 1;
  158. } // c == 0 probably indicates conversion failure, return `nil`
  159. }
  160. return 0;
  161. }
  162. /* Lua C callback function for a `find()` match. Sets the upvalue (that will
  163. * later be the result) and stops the iteration.
  164. *
  165. * A small problem here is that the callback handling by iterate() means this
  166. * function cannot simply return the result on the Lua stack. Instead we need
  167. * a "shared" upvalue that can be retrieved 'externally' later. Therefore a
  168. * simple, 'flat' Lua value won't do (it can't be shared); so we'll use a table
  169. * instead and assign the match to t[1].
  170. */
  171. static int
  172. find_on_match(lua_State *L)
  173. {
  174. // Upon entry the Lua stack will have `var` and `depth`
  175. lua_settop(L, 1); // discard depth, leaving var on the stack
  176. lua_rawseti(L, lua_upvalueindex(1), 1); // store to upvalue table
  177. lua_pushboolean(L, false); // return false to stop iteration
  178. return 1;
  179. }
  180. /// strip all leading / trailing whitespace
  181. // @field WS_TRIM
  182. /// remove "lead in" whitespace before tags
  183. // @field WS_NORMALIZE
  184. /// preserve all whitespace, even between tags
  185. // @field WS_PRESERVE
  186. enum whitespace_mode {
  187. WHITESPACE_TRIM,
  188. WHITESPACE_NORMALIZE,
  189. WHITESPACE_PRESERVE
  190. };
  191. // control chars used by the Tokenizer to denote special meanings
  192. #define ESC 27 /* end of scope, closing tag */
  193. #define OPN 28 /* "open", start of tag */
  194. #define CLS 29 /* closes opening tag, actual content follows */
  195. //--- internal tokenizer -------------------------------------------
  196. typedef struct Tokenizer_s {
  197. /// stores string to be tokenized
  198. const char *s;
  199. /// stores size of string to be tokenized
  200. size_t s_size;
  201. /// stores current read position
  202. size_t i;
  203. /// stores current read context
  204. int tagMode;
  205. /// stores flag for "raw" byte sequence, *DON'T* decode any further
  206. int cdata;
  207. /// stores next token, if already determined
  208. const char *m_next;
  209. /// size of next token
  210. size_t m_next_size;
  211. /// pointer to current token
  212. char *m_token;
  213. /// size of current token
  214. size_t m_token_size;
  215. /// capacity of current token
  216. size_t m_token_capacity;
  217. /// whitespace handling
  218. enum whitespace_mode mode;
  219. } Tokenizer;
  220. Tokenizer *
  221. Tokenizer_new(const char *str, size_t str_size, enum whitespace_mode mode)
  222. {
  223. Tokenizer *tok = calloc(1, sizeof(Tokenizer));
  224. tok->s_size = str_size;
  225. tok->s = str;
  226. tok->mode = mode;
  227. return tok;
  228. }
  229. void
  230. Tokenizer_delete(Tokenizer *tok)
  231. {
  232. free(tok->m_token);
  233. free(tok);
  234. }
  235. #if LUAXML_DEBUG
  236. void
  237. Tokenizer_print(Tokenizer *tok)
  238. {
  239. printf(" @%u %s\n",
  240. tok->i,
  241. !tok->m_token ? "(null)"
  242. : (tok->m_token[0] == ESC)
  243. ? "(esc)"
  244. : (tok->m_token[0] == OPN)
  245. ? "(open)"
  246. : (tok->m_token[0] == CLS) ? "(close)"
  247. : tok->m_token);
  248. fflush(stdout);
  249. }
  250. #else
  251. #define Tokenizer_print(tok) /* ignore */
  252. #endif
  253. static const char *
  254. Tokenizer_set(Tokenizer *tok, const char *s, size_t size)
  255. {
  256. if (!size || !s)
  257. return NULL;
  258. free(tok->m_token);
  259. tok->m_token = malloc(size + 1);
  260. strncpy(tok->m_token, s, size);
  261. tok->m_token[size] = 0;
  262. tok->m_token_size = tok->m_token_capacity = size;
  263. Tokenizer_print(tok);
  264. return tok->m_token;
  265. }
  266. static void
  267. Tokenizer_append(Tokenizer *tok, char ch)
  268. {
  269. if (tok->m_token_size + 1 >= tok->m_token_capacity) {
  270. tok->m_token_capacity =
  271. tok->m_token_capacity ? tok->m_token_capacity * 2 : 16;
  272. tok->m_token = realloc(tok->m_token, tok->m_token_capacity);
  273. }
  274. tok->m_token[tok->m_token_size] = ch;
  275. tok->m_token[++tok->m_token_size] = 0;
  276. }
  277. const char *
  278. Tokenizer_next(Tokenizer *tok)
  279. {
  280. // NUL-terminated strings for the special tokens
  281. static const char ESC_str[] = {ESC, 0};
  282. static const char OPEN_str[] = {OPN, 0};
  283. static const char CLOSE_str[] = {CLS, 0};
  284. if (tok->m_token) {
  285. free(tok->m_token);
  286. tok->m_token = NULL;
  287. tok->m_token_size = tok->m_token_capacity = 0;
  288. }
  289. char quotMode = 0;
  290. int tokenComplete = 0;
  291. while (tok->m_next_size || (tok->i < tok->s_size)) {
  292. tok->cdata = 0;
  293. if (tok->m_next_size) {
  294. Tokenizer_set(tok, tok->m_next, tok->m_next_size);
  295. tok->m_next = NULL;
  296. tok->m_next_size = 0;
  297. return tok->m_token;
  298. }
  299. switch (tok->s[tok->i]) {
  300. case '"':
  301. case '\'':
  302. if (tok->tagMode) {
  303. // toggle quotation mode
  304. if (!quotMode)
  305. quotMode = tok->s[tok->i];
  306. else if (quotMode == tok->s[tok->i])
  307. quotMode = 0;
  308. }
  309. Tokenizer_append(tok, tok->s[tok->i]);
  310. break;
  311. case '<':
  312. if (!quotMode && (tok->i + 4 < tok->s_size)
  313. && (strncmp(tok->s + tok->i, "<!--", 4) == 0))
  314. tok->i = find(tok->s, "-->", tok->i + 4) + 2; // strip comments
  315. else if (!quotMode && (tok->i + 9 < tok->s_size)
  316. && (strncmp(tok->s + tok->i, "<![CDATA[", 9) == 0)) {
  317. if (tok->m_token_size > 0)
  318. // finish current token first, after that reparse CDATA
  319. tokenComplete = 1;
  320. else {
  321. // interpret CDATA
  322. size_t b = tok->i + 9;
  323. tok->i = find(tok->s, "]]>", b) + 3;
  324. size_t cdata_len = tok->i - b - 3;
  325. if (cdata_len > 0) {
  326. tok->cdata = 1; // mark as "raw" byte sequence
  327. return Tokenizer_set(tok, tok->s + b, cdata_len);
  328. }
  329. }
  330. --tok->i;
  331. } else if (!quotMode && (tok->i + 1 < tok->s_size)
  332. && ((tok->s[tok->i + 1] == '?')
  333. || (tok->s[tok->i + 1] == '!')))
  334. tok->i =
  335. find(tok->s, ">", tok->i + 2); // strip meta information
  336. else if (!quotMode && !tok->tagMode) {
  337. if ((tok->i + 1 < tok->s_size) && (tok->s[tok->i + 1] == '/')) {
  338. // "</" sequence that starts a closing tag
  339. tok->m_next = ESC_str;
  340. tok->m_next_size = 1;
  341. tok->i = find(tok->s, ">", tok->i + 2);
  342. } else {
  343. // regular '<' opening a new tag
  344. tok->m_next = OPEN_str;
  345. tok->m_next_size = 1;
  346. tok->tagMode = 1;
  347. }
  348. tokenComplete = 1;
  349. } else
  350. Tokenizer_append(tok, tok->s[tok->i]);
  351. break;
  352. case '/':
  353. if (tok->tagMode && !quotMode) {
  354. tokenComplete = 1;
  355. if ((tok->i + 1 < tok->s_size) && (tok->s[tok->i + 1] == '>')) {
  356. // "/>" sequence = end of 'empty' tag
  357. tok->tagMode = 0;
  358. tok->m_next = ESC_str;
  359. tok->m_next_size = 1;
  360. ++tok->i;
  361. } else
  362. Tokenizer_append(tok, tok->s[tok->i]);
  363. } else
  364. Tokenizer_append(tok, tok->s[tok->i]);
  365. break;
  366. case '>':
  367. if (!quotMode && tok->tagMode) {
  368. // this '>' closes the current tag
  369. tok->tagMode = 0;
  370. tokenComplete = 1;
  371. tok->m_next = CLOSE_str;
  372. tok->m_next_size = 1;
  373. } else
  374. Tokenizer_append(tok, tok->s[tok->i]);
  375. break;
  376. case ' ':
  377. case '\r':
  378. case '\n':
  379. case '\t':
  380. if (tok->tagMode && !quotMode) {
  381. // within a tag, any unquoted whitespace ends the current token
  382. // (= attribute)
  383. if (tok->m_token_size)
  384. tokenComplete = 1;
  385. } else if (tok->m_token_size || tok->mode != WHITESPACE_TRIM)
  386. Tokenizer_append(tok, tok->s[tok->i]);
  387. break;
  388. default:
  389. Tokenizer_append(tok, tok->s[tok->i]);
  390. }
  391. ++tok->i;
  392. if (tok->i >= tok->s_size || (tokenComplete && tok->m_token_size)) {
  393. tokenComplete = 0;
  394. if (tok->mode == WHITESPACE_TRIM) // trim whitespace
  395. while (tok->m_token_size
  396. && isspace(tok->m_token[tok->m_token_size - 1]))
  397. tok->m_token[--tok->m_token_size] = 0;
  398. if (tok->m_token_size)
  399. break;
  400. }
  401. }
  402. Tokenizer_print(tok);
  403. return tok->m_token;
  404. }
  405. //--- local variables ----------------------------------------------
  406. // 'private' table mapping between special chars and their XML substitutions
  407. static int sv_code_ref; // (will receive a LUA reference)
  408. //--- public methods -----------------------------------------------
  409. /** sets or returns tag of a LuaXML object.
  410. This method is just "syntactic sugar" (using a typical Lua term) that allows
  411. the writing of clearer code. LuaXML stores the tag value of an XML statement
  412. at table index 0, hence it can be simply accessed or altered by `var[0]`.
  413. However, writing `var:tag()` for access or `var:tag("newTag")` for altering
  414. may be more self explanatory (and future-proof in case LuaXML's tag handling
  415. should ever change).
  416. @function tag
  417. @param var the variable whose tag should be accessed, a LuaXML object
  418. @tparam ?string tag the new tag to be set
  419. @return If you have passed a new tag, the function will return `var` (with
  420. its tag changed); otherwise the result will be the current tag of `var`
  421. (normally a string).
  422. */
  423. int
  424. Xml_tag(lua_State *L)
  425. {
  426. // the function will only operate on tables
  427. if
  428. lua_istable(L, 1)
  429. {
  430. lua_settop(L, 2);
  431. push_TAG_key(L); // place tag key on top of stack (#3)
  432. if (lua_type(L, 2) == LUA_TSTRING) {
  433. lua_pushvalue(L, 2); // duplicate the value
  434. lua_rawset(L, 1);
  435. // we return the (modified) table
  436. lua_settop(L, 1);
  437. return 1;
  438. } else {
  439. // "tag" is empty or wrong type, retrieve the current tag
  440. lua_rawget(L, 1);
  441. return 1;
  442. }
  443. }
  444. return 0;
  445. }
  446. /** creates a LuaXML "object", and optionally sets its tag.
  447. The function either sets the metatable of an existing Lua table, or creates a
  448. new (empty) "object". If you pass an optional` tag` string, it will be assigned
  449. to the result.
  450. (It's also possible to call this as `new(tag)`, which creates a new XML object
  451. with the given tag and is equivalent to `new({}, tag)`.)
  452. Note that it's not mandatory to use this function in order to treat a Lua table
  453. as LuaXML object. Setting the metatable just allows the usage of a more
  454. object-oriented syntax (e.g. `xmlvar:str()` instead of `xml.str(xmlvar)`).
  455. XML objects created by `load` or `eval` automatically offer the
  456. object-oriented syntax.
  457. @function new
  458. @param arg (optional) _(1)_ a table to be converted to a LuaXML object,
  459. or _(2)_ the tag of the new LuaXML object
  460. @tparam ?string tag a tag value that will be assigned to the object
  461. @return LuaXML object, either newly created or the conversion of `arg`;
  462. optionally tagged as requested
  463. */
  464. int
  465. Xml_new(lua_State *L)
  466. {
  467. if (!lua_istable(L, 1)) {
  468. // create a new table and move it to the bottom of the stack (#1),
  469. // possibly shifting other elements "one up"
  470. lua_newtable(L);
  471. lua_insert(L, 1);
  472. }
  473. // element at #1 now is a table, convert to "object"
  474. make_xml_object(L, 1);
  475. if (lua_type(L, 2) == LUA_TSTRING) {
  476. lua_pushcfunction(L, Xml_tag);
  477. lua_pushvalue(L, 1); // duplicate the object table
  478. lua_pushvalue(L, 2); // duplicate the tag (string)
  479. lua_call(L, 2, 0); // call the "tag" function, discarding any result
  480. }
  481. lua_settop(L, 1);
  482. return 1;
  483. }
  484. /** appends a new subordinate LuaXML object to an existing one.
  485. optionally sets tag
  486. @function append
  487. @param var the parent LuaXML object
  488. @tparam ?string tag the tag of the appended LuaXML object
  489. @return appended LuaXML object, or `nil` in case of errors
  490. */
  491. int
  492. Xml_append(lua_State *L)
  493. {
  494. if (lua_type(L, 1) == LUA_TTABLE) {
  495. lua_settop(L, 2);
  496. lua_pushcfunction(L, Xml_new);
  497. lua_insert(L, 2);
  498. lua_call(L, 1, 1); // new(tag)
  499. lua_pushvalue(L, -1); // duplicate result
  500. lua_rawseti(L, 1, lua_rawlen(L, 1) + 1); // append to parent (elements)
  501. return 1;
  502. }
  503. return 0;
  504. }
  505. // Push XML-encoded string for the Lua value at given index.
  506. // Will automatically use a tostring() conversion first, if necessary.
  507. static void
  508. Xml_pushEncode(lua_State *L, int index)
  509. {
  510. if (index < 0)
  511. index += lua_gettop(L) + 1; // relative to absolute index
  512. if (lua_type(L, index) == LUA_TSTRING)
  513. lua_pushvalue(L, index); // already a string, just duplicate it
  514. else {
  515. lua_getglobal(L, "tostring");
  516. lua_pushvalue(L, index); // duplicate value
  517. lua_call(L, 1, 1); // tostring()
  518. }
  519. // always do "&amp;" first
  520. // (avoids later affecting other substitutions, which may contain '&')
  521. do_gsub(L, -1, "&", "&amp;");
  522. // encode other special entities
  523. lua_rawgeti(L, LUA_REGISTRYINDEX, sv_code_ref);
  524. lua_pushnil(L);
  525. while (lua_next(L, -2)) {
  526. // Lua stack has string to work on (-4), substitution table (-3),
  527. // table key (-2 = special char) and value (-1 = replacement)
  528. // (We want to replace the original char with the XML encoding.)
  529. do_gsub(L, -4, lua_tostring(L, -2), lua_tostring(L, -1));
  530. lua_pop(L, 1); // pop value, leaving key for the next iteration
  531. }
  532. lua_pop(L, 1); // pop substitution table to realign the stack
  533. // transfer string one character at a time, encoding any chars with MSB set
  534. char buf[8];
  535. const unsigned char *s = (unsigned char *)lua_tostring(L, -1);
  536. luaL_Buffer b;
  537. luaL_buffinit(L, &b);
  538. while (*s) {
  539. if (*s < 128)
  540. luaL_addchar(&b, *s); // copy character literally
  541. else {
  542. int len = snprintf(buf, sizeof(buf), "&#%d;", *s); // encode char
  543. luaL_addlstring(&b, buf, len);
  544. }
  545. s++;
  546. }
  547. luaL_pushresult(&b);
  548. lua_replace(L, -2); // (leaving the result on the stack)
  549. }
  550. /*
  551. // Push a string, then do XML conversion on it - result remains on top of stack.
  552. static void Xml_pushEncodeStr(lua_State *L, const char *s, int size) {
  553. if (size == 0) {
  554. lua_pushliteral(L, "");
  555. return;
  556. }
  557. if (size < 0) size = strlen(s);
  558. lua_pushlstring(L, s, size);
  559. Xml_pushEncode(L, -1);
  560. lua_replace(L, -2);
  561. }
  562. */
  563. // Push Lua representation of the given string, while decoding any special XML
  564. // encodings
  565. static void
  566. Xml_pushDecode(lua_State *L, const char *s, int size)
  567. {
  568. if (size == 0) {
  569. lua_pushliteral(L, "");
  570. return;
  571. }
  572. if (size < 0)
  573. size = strlen(s);
  574. // try a gsub() substition of decimal and hexadecimal character encodings
  575. lua_pushlstring(L, s, size); // initial string
  576. lua_pushliteral(L, "gsub");
  577. lua_gettable(L, -2); // using string as object, retrieve the "gsub" function
  578. lua_insert(L, -2); // swap with function, making string the arg #1
  579. lua_pushliteral(L, "&#(x?%x+);"); // pattern for XML encodings (arg #2)
  580. lua_pushcfunction(L, XMLencoding_replacement); // replacement func (arg #3)
  581. lua_call(L, 3, 1); // three parameters, one result (the substituted string)
  582. lua_rawgeti(L, LUA_REGISTRYINDEX, sv_code_ref);
  583. lua_pushnil(L);
  584. while (lua_next(L, -2)) {
  585. // Lua stack has string to work on (-4), substitution table (-3),
  586. // table key (-2 = special char) and value (-1 = replacement)
  587. // (We want to replace the XML encoding with the original char.)
  588. do_gsub(L, -4, lua_tostring(L, -1), lua_tostring(L, -2));
  589. lua_pop(L, 1); // pop value, leaving key for the next iteration
  590. }
  591. lua_pop(L, 1); // pop substitution table, leaving result string on stack
  592. do_gsub(L, -1, "&amp;", "&"); // this should always be done last
  593. }
  594. /** parses an XML string into a Lua table.
  595. The table will contain a representation of the XML tag, attributes (and their
  596. values), and element content / subelements (either as strings or nested LuaXML
  597. "objects").
  598. Note: Parsing "wide" strings or Unicode (UCS-2, UCS-4, UTF-16) currently is
  599. __not__ supported. If needed, convert such `xml` data to UTF-8 before passing it
  600. to `eval()`. UTF-8 should be safe to use, and this function will also recognize
  601. and ignore a UTF-8 BOM (byte order mark) at the start of `xml`.
  602. @function eval
  603. @tparam string|userdata xml
  604. the XML to be converted. When passing a userdata type `xml` value, it must
  605. point to a C-style (NUL-terminated) string.
  606. @tparam ?number mode
  607. whitespace handling mode, one of the `WS_*` constants - see [Fields](#Fields).
  608. defaults to `WS_TRIM` (compatible to previous LuaXML versions)
  609. @return a LuaXML object containing the XML data, or `nil` in case of errors
  610. */
  611. int
  612. Xml_eval(lua_State *L)
  613. {
  614. enum whitespace_mode mode = luaL_optint(L, 2, WHITESPACE_TRIM);
  615. const char *str;
  616. size_t str_size;
  617. if (lua_isuserdata(L, 1)) {
  618. str = lua_touserdata(L, 1);
  619. str_size = strlen(str);
  620. } else
  621. str = luaL_checklstring(L, 1, &str_size);
  622. if (str_size >= 3 && strncmp(str, "\xEF\xBB\xBF", 3) == 0) {
  623. // ignore / skip over UTF-8 BOM (byte order mark)
  624. str += 3;
  625. str_size -= 3;
  626. }
  627. Tokenizer *tok = Tokenizer_new(str, str_size, mode);
  628. lua_settop(L, 1);
  629. const char *token;
  630. int firstStatement = 1;
  631. while ((token = Tokenizer_next(tok)))
  632. if (*token == OPN) { // new tag found
  633. if (lua_gettop(L) > 1) {
  634. lua_newtable(L);
  635. lua_pushvalue(L,
  636. -1); // duplicate table (keep one copy on stack)
  637. lua_rawseti(L,
  638. -3,
  639. lua_rawlen(L, -3) + 1); // set parent subelement
  640. } else {
  641. if (firstStatement) {
  642. lua_newtable(L);
  643. firstStatement = 0;
  644. } else
  645. return 0;
  646. }
  647. make_xml_object(L, -1); // assign metatable
  648. // parse tag and content:
  649. push_TAG_key(L); // place tag key on top of stack
  650. lua_pushstring(L, Tokenizer_next(tok));
  651. lua_rawset(L, -3);
  652. while ((token = Tokenizer_next(tok)) && (*token != CLS)
  653. && (*token != ESC)) {
  654. // parse tag header
  655. size_t sepPos = find(token, "=", 0);
  656. if (token[sepPos]) { // regular attribute (key="value")
  657. const char *aVal = token + sepPos + 2;
  658. lua_pushlstring(L, token, sepPos);
  659. Xml_pushDecode(L, aVal, strlen(aVal) - 1);
  660. lua_rawset(L, -3);
  661. }
  662. }
  663. if (!token || (*token == ESC)) {
  664. // this tag has no content, only attributes
  665. if (lua_gettop(L) > 2)
  666. lua_pop(L, 1);
  667. else
  668. break;
  669. }
  670. } else if (*token == ESC) { // previous tag is over
  671. if (lua_gettop(L) > 2)
  672. lua_pop(L, 1); // pop current table
  673. else
  674. break;
  675. } else { // read elements
  676. if (lua_gettop(L) > 1) {
  677. // when normalizing, we ignore tokens considered "lead-in" type
  678. if (mode != WHITESPACE_NORMALIZE || !is_lead_token(token)) {
  679. if (tok->cdata) // "raw" mode, don't change token string!
  680. lua_pushstring(L, token);
  681. else
  682. Xml_pushDecode(L, token, -1);
  683. lua_rawseti(L, -2, lua_rawlen(L, -2) + 1);
  684. }
  685. } else // element stack is empty, i.e. we encountered a token
  686. // *before* any tag
  687. if (!is_whitespace(token))
  688. luaL_error(L,
  689. "Malformed XML: non-empty string '%s' before any "
  690. "tag (parser pos %d)",
  691. token,
  692. (int)tok->i);
  693. }
  694. Tokenizer_delete(tok);
  695. return lua_gettop(L) - 1;
  696. }
  697. /** loads XML data from a file and returns it as table.
  698. Basically, this is just calling `eval` on the given file's content.
  699. @function load
  700. @tparam string filename the name and path of the file to be loaded
  701. @tparam ?number mode whitespace handling mode, defaults to `WS_TRIM`
  702. @return a Lua table representing the XML data, or `nil` in case of errors
  703. */
  704. int
  705. Xml_load(lua_State *L)
  706. {
  707. const char *filename = luaL_checkstring(L, 1);
  708. FILE *file = fopen(filename, "r");
  709. if (!file)
  710. return luaL_error(L,
  711. "LuaXML ERROR: \"%s\" file error or file not found!",
  712. filename);
  713. fseek(file, 0, SEEK_END);
  714. size_t sz = ftell(file);
  715. rewind(file);
  716. char *buffer = malloc(sz + 1);
  717. sz = fread(buffer, 1, sz, file);
  718. fclose(file);
  719. buffer[sz] = 0;
  720. lua_pushlightuserdata(L, buffer);
  721. lua_replace(L, 1);
  722. int result = Xml_eval(L);
  723. free(buffer);
  724. return result;
  725. };
  726. /** registers a custom code for the conversion between non-standard characters
  727. and XML character entities.
  728. By default, only the most basic entities are known to LuaXML:
  729. " < > '
  730. On top (and independent) of that, the **ampersand** sign always gets encoded /
  731. decoded separately: `&amp;` &harr; `&amp;amp;`. Character codes above 127 are
  732. directly converted to an appropriate XML encoding, representing the character
  733. number (e.g. `&amp;#160;`). If other special encodings are needed, they can be
  734. registered using this function.
  735. Note: LuaXML now manages these encodings in a (private) standard Lua table.
  736. This allows you to replace entries by calling `registerCode()` again, using the
  737. same `decoded` and a different `encoded`. Encodings may even be removed later,
  738. by explictly registering a `nil` value: `registerCode(decoded, nil)`.
  739. @function registerCode
  740. @tparam string decoded the character (sequence) to be used within Lua
  741. @tparam string encoded the character entity to be used in XML
  742. @see encode, decode
  743. */
  744. int
  745. Xml_registerCode(lua_State *L)
  746. {
  747. // We require the "decoded" string, but allow `nil` as argument #2.
  748. // That way, users may remove entries from the table again.
  749. luaL_checkstring(L, 1);
  750. if (!lua_isnoneornil(L, 2))
  751. luaL_checkstring(L, 2);
  752. lua_settop(L, 2);
  753. lua_rawgeti(L, LUA_REGISTRYINDEX, sv_code_ref); // get translation table
  754. lua_insert(L, 1);
  755. lua_rawset(L, 1); // assign key-value pair (k "decoded" -> v "encoded")
  756. return 0;
  757. }
  758. /** converts a string to XML encoding.
  759. This function transforms` str` by replacing any special characters with
  760. suitable XML encodings.
  761. @usage
  762. print(xml.encode("<->")) -- "&lt;-&gt;"
  763. @function encode
  764. @tparam string str string to be transformed
  765. @treturn string the XML-encoded string
  766. @see decode, registerCode
  767. */
  768. int
  769. Xml_encode(lua_State *L)
  770. {
  771. luaL_checkstring(L, 1); // make sure arg #1 is a string
  772. Xml_pushEncode(L, 1); // and convert it
  773. return 1;
  774. }
  775. /** converts a string from XML encoding.
  776. This function transforms` str` by replacing any special XML encodings with
  777. their "plain text" counterparts.
  778. @usage
  779. print((xml.decode("&lt;-&gt;")) -- "<->"
  780. @function decode
  781. @tparam string str string to be transformed
  782. @treturn string the decoded string
  783. @see encode, registerCode
  784. */
  785. int
  786. Xml_decode(lua_State *L)
  787. {
  788. size_t size;
  789. luaL_checklstring(L, 1, &size); // make sure arg #1 is a string
  790. Xml_pushDecode(L, lua_tostring(L, 1), size); // and convert it
  791. return 1;
  792. }
  793. /** converts any Lua value to an XML string.
  794. @function str
  795. @param value
  796. the value to be converted, normally a table (LuaXML object). However this
  797. function will 'encapsulate' other Lua values (of arbitrary type) in a way that
  798. should make them valid XML.
  799. <br>Note: Passing no `value` will cause the function to return `nil`.
  800. @tparam ?number indent
  801. indentation level for 'pretty' output. Mainly for internal use, defaults to 0.
  802. @tparam ?string tag
  803. the tag to be used in case `value` doesn't already have an 'implicit' tag.
  804. Mainly for internal use.
  805. @treturn string
  806. an XML string, or `nil` in case of errors.
  807. */
  808. int
  809. Xml_str(lua_State *L)
  810. {
  811. // Note:
  812. // Be very careful about mixing Lua stack usage and buffer access here.
  813. // The stack *must* be (re)balanced before accessing "b", i.e. any output
  814. // should only occur at the same Lua stack level as the previous one!
  815. luaL_Buffer b;
  816. lua_settop(L, 3);
  817. int type = lua_type(L, 1); // type of "value"
  818. if (type == LUA_TNIL)
  819. return 0;
  820. if (type == LUA_TTABLE) {
  821. push_TAG_key(L);
  822. lua_rawget(L, 1); // retrieve tag entry from the table (may be `nil`)
  823. // order of precedence: value[0], explicit tag string, Lua type name
  824. const char *tag = lua_tostring(L, -1);
  825. if (!tag)
  826. tag = lua_tostring(L, 3);
  827. if (!tag)
  828. tag = lua_typename(L, type);
  829. // Four elements already on stack: value, indent, tag, value[0]
  830. // Use a string (#5) to manage (concatenate) simple attributes
  831. lua_pushliteral(L, "");
  832. // And a table (#6) to take care of (collect) 'extended' attributes
  833. lua_newtable(L);
  834. size_t table_attr = 0;
  835. luaL_buffinit(L, &b);
  836. push_indentStr(L, lua_tointeger(L, 2));
  837. luaL_addvalue(&b);
  838. luaL_addchar(&b, '<');
  839. luaL_addstring(&b, tag);
  840. // Iterate over string keys (= attributes)
  841. lua_pushnil(L);
  842. while (lua_next(L, 1)) {
  843. // (k, v) pair on the stack
  844. if (lua_type(L, -2) == LUA_TSTRING) {
  845. // (the "_M" test here is to avoid recursion on module tables)
  846. if (lua_istable(L, -1) && strcmp(lua_tostring(L, -2), "_M")) {
  847. lua_pushcfunction(L, Xml_str);
  848. lua_pushvalue(L, -2); // duplicate "v"
  849. lua_pushinteger(L, lua_tointeger(L, 2) + 1); // indent + 1
  850. lua_pushvalue(L, -4); // duplicate "k"
  851. lua_call(L, 3, 1); // xml.str(v, indent + 1, k)
  852. lua_rawseti(L, 6, ++table_attr); // append string to table
  853. } else {
  854. Xml_pushEncode(L, -1); // encode(tostring(v))
  855. lua_pushfstring(L,
  856. "%s %s=\"%s\"",
  857. lua_tostring(L, 5),
  858. lua_tostring(L, -3),
  859. lua_tostring(L, -1));
  860. lua_replace(L, 5); // new attribute string
  861. lua_pop(L, 1); // realign stack
  862. }
  863. }
  864. lua_pop(L, 1); // pop <v>alue, leaving <k>ey for next iteration
  865. }
  866. // append "simple" attribute string to the output
  867. if (lua_rawlen(L, 5) > 0)
  868. luaL_addstring(&b, lua_tostring(L, 5));
  869. size_t count = lua_rawlen(L, 1); // number of "array" (sub)elements
  870. if (count == 0 && table_attr == 0) {
  871. // no sub-elements and no extended attr -> close tag and we're done
  872. luaL_addlstring(&b, " />\n", 4);
  873. luaL_pushresult(&b);
  874. return 1;
  875. }
  876. luaL_addchar(&b, '>'); // close opening tag
  877. if (count == 1 && table_attr == 0) {
  878. // single subelement, no extended attributes
  879. lua_rawgeti(L, 1, 1); // value[1]
  880. if (!lua_istable(L, -1)) {
  881. // output as single string, then close tag
  882. Xml_pushEncode(L, -1); // encode(tostring(value[1]))
  883. lua_replace(L, -2);
  884. luaL_addvalue(&b); // add and pop
  885. luaL_addlstring(&b, "</", 2);
  886. luaL_addstring(&b, tag);
  887. luaL_addlstring(&b, ">\n", 2);
  888. luaL_pushresult(&b);
  889. return 1;
  890. }
  891. lua_pop(L, 1); // discard (table) value, to realign stack
  892. }
  893. luaL_addchar(&b, '\n');
  894. // Loop over all the sub-elements, placing each on a separate line
  895. size_t k;
  896. for (k = 1; k <= count; k++) {
  897. #if LUA_VERSION_NUM < 503
  898. lua_rawgeti(L, 1, k);
  899. type = lua_type(L, -1);
  900. #else
  901. type = lua_rawgeti(L, 1, k); // (Lua 5.3 returns type directly)
  902. #endif
  903. if (type == LUA_TSTRING) {
  904. push_indentStr(L, lua_tointeger(L, 2) + 1); // indentation
  905. Xml_pushEncode(L, -2);
  906. lua_remove(L, -3);
  907. lua_pushliteral(L, "\n");
  908. lua_concat(L, 3);
  909. } else {
  910. lua_pushcfunction(L, Xml_str);
  911. lua_insert(L, -2); // place function before value
  912. lua_pushinteger(L, lua_tointeger(L, 2) + 1); // indent + 1
  913. lua_call(L, 2, 1); // xml.str(v, indent + 1)
  914. }
  915. luaL_addvalue(&b); // add (string) to output, pop from stack
  916. }
  917. // Finally we'll take care of the "extended" (table-type) attributes.
  918. // The output is appended after the regular sub-elements, in order
  919. // not to affect their numbering.
  920. // Just process the corresponding table, concatenating all entries:
  921. for (k = 1; k <= table_attr; k++) {
  922. lua_rawgeti(L, 6, k);
  923. luaL_addvalue(&b);
  924. }
  925. // closing tag
  926. push_indentStr(L, lua_tointeger(L, 2));
  927. luaL_addvalue(&b);
  928. luaL_addlstring(&b, "</", 2);
  929. luaL_addstring(&b, tag);
  930. luaL_addlstring(&b, ">\n", 2);
  931. luaL_pushresult(&b);
  932. return 1;
  933. }
  934. // Getting here means a "flat" Lua value, format to XML as a single string
  935. const char *tag = lua_tostring(L, 3);
  936. if (!tag)
  937. tag = lua_typename(L, type); // use either tag or the type name
  938. luaL_buffinit(L, &b);
  939. push_indentStr(L, lua_tointeger(L, 2));
  940. luaL_addvalue(&b);
  941. luaL_addchar(&b, '<');
  942. luaL_addstring(&b, tag);
  943. luaL_addchar(&b, '>');
  944. Xml_pushEncode(L, 1); // encode(tostring(value))
  945. luaL_addvalue(&b);
  946. luaL_addlstring(&b, "</", 2);
  947. luaL_addstring(&b, tag);
  948. luaL_addlstring(&b, ">\n", 2);
  949. luaL_pushresult(&b);
  950. return 1;
  951. }
  952. /** match XML entity against given (optional) criteria.
  953. Passing `nil` for one of the` tag`, `key`, or `value` parameters means "don't
  954. care" (i.e. match anything for that particular aspect). So for example
  955. var:match(nil, "text", nil)
  956. -- or shorter, but identical: var:match(nil, "text")
  957. will look for an XML attribute (name) "text" to be present in `var`, but won't
  958. consider its value or the tag of `var`.
  959. Note: If you want to test for a specific attribute `value`, so also have to
  960. supply a `key` - otherwise `value` will be ignored.
  961. @usage
  962. -- each of these will either return `x`, or `nil` in case of no match
  963. x:match("foo") -- test for x:tag() == "foo"
  964. x:match(nil, "bar") -- test if x has a "bar" attribute
  965. x:match(nil, "foo", "bar") -- test if x has a "foo" attribute that equals "bar"
  966. x:match("foobar", "foo", "bar") -- test for "foobar" tag, and attr "foo" ==
  967. "bar"
  968. @function match
  969. @param var
  970. the variable to test, normally a Lua table or LuaXML object. (If `var` is not
  971. a table type, the test always fails.)
  972. @tparam ?string tag
  973. If set, has to match the XML `tag` (i.e. must be equal to the `tag(var, nil)`
  974. result)
  975. @tparam ?string key
  976. If set, a corresponding **attribute key** needs to be present (exact name
  977. match).
  978. @param value (optional)
  979. arbitrary Lua value. If set, the **attribute value** for `key` has to match it.
  980. @return
  981. either `nil` for no match; or the `var` argument properly converted to a
  982. LuaXML object, equivalent to `xml.new(var)`.
  983. This allows you to either make direct use of the matched LuaXML object, or to
  984. use the return value in a boolean test (`if xml.match(...)`), which is a common
  985. Lua idiom.
  986. */
  987. int
  988. Xml_match(lua_State *L)
  989. {
  990. if (lua_type(L, 1) == LUA_TTABLE) {
  991. if (!lua_isnoneornil(L, 2)) {
  992. push_TAG_key(L);
  993. lua_rawget(L, 1); // get the tag value from var
  994. if (!lua_equal(L, -1, 2))
  995. return 0; // tag mismatch, return `nil`
  996. lua_pop(L, 1); // realign stack
  997. }
  998. if (lua_type(L, 3) == LUA_TSTRING) {
  999. lua_pushvalue(L, 3); // duplicate attribute key
  1000. lua_rawget(L, 1); // try to get value from var
  1001. if (lua_isnil(L, -1))
  1002. return 0; // no such attribute
  1003. if (!lua_isnoneornil(L, 4)) {
  1004. if (!lua_equal(L, -1, 4))
  1005. return 0; // attribute value mismatch
  1006. }
  1007. }
  1008. lua_settop(L, 1);
  1009. make_xml_object(L, 1);
  1010. return 1;
  1011. }
  1012. return 0;
  1013. }
  1014. /** iterates a LuaXML object,
  1015. invoking a callback function for all matching (sub)elements.
  1016. The iteration starts with the variable `var` itself (= default depth 0).
  1017. A callback function `cb` gets invoked for each `match`, depending on the
  1018. specified criteria. If the `r` flag is set, the process will
  1019. repeat **recursively** for the subelements of `var` (at depth + 1). You can
  1020. limit the scope by setting a maximum depth, or have the callback function
  1021. explicitly request to stop the iteration (by returning `false`).
  1022. @function iterate
  1023. @param var the table (LuaXML object) to iterate
  1024. @tparam function cb
  1025. callback function. `callback(var, depth)` will be called for each matching
  1026. element.<br>
  1027. The function may return `false` to request a stop; if its result is
  1028. any other value (including `nil`), the iteration will continue.
  1029. @tparam ?string tag XML tag to be matched
  1030. @tparam ?string key attribute key to be matched
  1031. @param value (optional) attribute value to be matched
  1032. @tparam ?boolean r
  1033. recursive operation. If `true`, also iterate over the subelements of `var`
  1034. @tparam ?number max maximum depth allowed
  1035. @tparam ?number d initial depth value, defaults to 0
  1036. @return
  1037. The function returns two values: a counter representing the number of elements
  1038. that were successfully matched (and processed), and a boolean completion flag.
  1039. The latter is `true` for an exhaustive iteration, and `false` if was stopped
  1040. from the callback.
  1041. @see match
  1042. */
  1043. int
  1044. Xml_iterate(lua_State *L)
  1045. {
  1046. lua_settop(L, 8);
  1047. luaL_checktype(L, 2, LUA_TFUNCTION); // callback must be a function
  1048. int maxdepth = luaL_optint(L, 7, -1); // default (< 0) indicates "no limit"
  1049. int depth = lua_tointeger(L, 8);
  1050. int count = 0;
  1051. bool cont = true;
  1052. // examine "var" element first
  1053. lua_pushcfunction(L, Xml_match);
  1054. lua_pushvalue(L, 1); // var
  1055. lua_pushvalue(L, 3); // tag
  1056. lua_pushvalue(L, 4); // key
  1057. lua_pushvalue(L, 5); // value
  1058. lua_call(L, 4, 1);
  1059. if (!lua_isnil(L, -1)) { // "var" matches, invoke callback
  1060. count = 1;
  1061. lua_pushvalue(L, 2); // duplicate function
  1062. lua_insert(L, -2);
  1063. lua_pushinteger(L, depth);
  1064. lua_call(L, 2, 1);
  1065. lua_pushboolean(L, false);
  1066. cont = !lua_equal(L, -1, -2);
  1067. lua_pop(L, 2);
  1068. } else
  1069. lua_pop(L, 1);
  1070. if (cont && lua_toboolean(L, 6) && lua_type(L, 1) == LUA_TTABLE) {
  1071. // process "children" / sub-elements recursively
  1072. depth += 1;
  1073. if (maxdepth < 0 || depth <= maxdepth) {
  1074. int k = 0;
  1075. while (true) {
  1076. lua_pushcfunction(L, Xml_iterate);
  1077. lua_rawgeti(L, 1, ++k);
  1078. if (lua_isnil(L, -1))
  1079. break; // no element var[k], exit loop
  1080. lua_pushvalue(L, 2);
  1081. lua_pushvalue(L, 3);
  1082. lua_pushvalue(L, 4);
  1083. lua_pushvalue(L, 5);
  1084. lua_pushboolean(L, true);
  1085. lua_pushvalue(L, 7);
  1086. lua_pushinteger(L, depth);
  1087. lua_call(L, 8, 2); // done, continue = iterate(var[k], ...)
  1088. count += lua_tointeger(L, -2);
  1089. if (!lua_toboolean(L, -1)) {
  1090. lua_pushinteger(L, count);
  1091. lua_pushboolean(L, false);
  1092. return 2;
  1093. }
  1094. lua_pop(L, 2);
  1095. }
  1096. }
  1097. }
  1098. lua_pushinteger(L, count);
  1099. lua_pushboolean(L, true);
  1100. return 2;
  1101. }
  1102. /** recursively searches a Lua table for a subelement
  1103. matching the provided tag and attribute. See the description of `match` for
  1104. the logic involved with testing for` tag`, `key` and `value`.
  1105. @function find
  1106. @param var the table to be searched in
  1107. @tparam ?string tag the XML tag to be found
  1108. @tparam ?string key the attribute key (= exact name) to be found
  1109. @param value (optional) the attribute value to be found
  1110. @return the first (sub-)table that satisfies the search condition,
  1111. or `nil` for no match
  1112. */
  1113. int
  1114. Xml_find(lua_State *L)
  1115. {
  1116. lua_settop(L, 4); // accept at most four parameters for this function
  1117. lua_newtable(L); // upon a match, this table will receive our result as t[1]
  1118. lua_insert(L, 1); // (move it before anything else)
  1119. lua_pushcfunction(L, Xml_iterate);
  1120. lua_insert(L, 2); // iterate is now stack arg #2, `var` at #3
  1121. lua_pushvalue(L, 1); // duplicate the table (for use as upvalue)
  1122. lua_pushcclosure(L, find_on_match, 1); // create a C closure
  1123. lua_insert(L, 4); // place the callback function (closure) at #4
  1124. // (`tag`, `key` and `value` have moved to #5, #6 and #7 respectively)
  1125. lua_pushboolean(L, true); // set "recursive" flag (#8)
  1126. // iterate(var, find_on_match, tag, key, value, true), discarding results
  1127. // (but if something matches, we expect that `find_on_match` sets t[1])
  1128. lua_call(L, 6, 0);
  1129. lua_rawgeti(L, 1, 1);
  1130. return 1; // returns result[1], which may be `nil` (if no match)
  1131. }
  1132. #ifdef __cplusplus
  1133. extern "C" {
  1134. #endif
  1135. int _EXPORT
  1136. luaopen_LuaXML_lib(lua_State *L)
  1137. {
  1138. static const struct luaL_Reg funcs[] = {{"append", Xml_append},
  1139. {"decode", Xml_decode},
  1140. {"encode", Xml_encode},
  1141. {"eval", Xml_eval},
  1142. {"find", Xml_find},
  1143. {"iterate", Xml_iterate},
  1144. {"load", Xml_load},
  1145. {"match", Xml_match},
  1146. {"new", Xml_new},
  1147. {"registerCode", Xml_registerCode},
  1148. {"str", Xml_str},
  1149. {"tag", Xml_tag},
  1150. {NULL, NULL}};
  1151. luaL_newlib(L, funcs);
  1152. // create a metatable for LuaXML "objects"
  1153. luaL_newmetatable(L, LUAXML_META);
  1154. lua_pushliteral(L, "__index");
  1155. lua_pushvalue(L, -3); // duplicate the module table
  1156. lua_rawset(L, -3); // and set it as metaindex
  1157. lua_pushliteral(L, "__tostring");
  1158. lua_pushcfunction(L, Xml_str);
  1159. lua_rawset(L, -3); // set metamethod
  1160. lua_pop(L, 1); // drop value (metatable)
  1161. // expose API constants (via the module table)
  1162. lua_pushinteger(L, WHITESPACE_TRIM);
  1163. lua_setfield(L, -2, "WS_TRIM");
  1164. lua_pushinteger(L, WHITESPACE_NORMALIZE);
  1165. lua_setfield(L, -2, "WS_NORMALIZE");
  1166. lua_pushinteger(L, WHITESPACE_PRESERVE);
  1167. lua_setfield(L, -2, "WS_PRESERVE");
  1168. // register default codes
  1169. // Note: We'll always handle "&amp;" separately!
  1170. lua_newtable(L);
  1171. lua_pushliteral(L, "&lt;");
  1172. lua_setfield(L, -2, "<");
  1173. lua_pushliteral(L, "&gt;");
  1174. lua_setfield(L, -2, ">");
  1175. lua_pushliteral(L, "&quot;");
  1176. lua_setfield(L, -2, "\"");
  1177. lua_pushliteral(L, "&apos;");
  1178. lua_setfield(L, -2, "'");
  1179. sv_code_ref = luaL_ref(L, LUA_REGISTRYINDEX); // reference (and pop table)
  1180. return 1; // return module (table)
  1181. }
  1182. #ifdef __cplusplus
  1183. } // extern "C"
  1184. #endif