123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458 |
- /**
- LuaXML License
- LuaXml is licensed under the terms of the MIT license reproduced below,
- the same as Lua itself. This means that LuaXml is free software and can be
- used for both academic and commercial purposes at absolutely no cost.
- Copyright (C) 2007-2013 Gerald Franz, eludi.net
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- THE SOFTWARE.
- */
- #if defined __WIN32__ || defined WIN32
- # include <windows.h>
- #endif
- #ifdef __cplusplus
- extern "C" {
- #endif
- #include <lua.h>
- #include <lauxlib.h>
- #include <lualib.h>
- #ifdef __cplusplus
- }
- #endif
- #include <stdio.h>
- #include <string.h>
- #include <ctype.h>
- #include <stdlib.h>
- static const char ESC=27;
- static const char OPN=28;
- static const char CLS=29;
- /*--- auxliary functions -------------------------------------------*/
- static const char* char2code(unsigned char ch, char buf[8]) {
- unsigned char i=0;
- buf[i++]='&';
- buf[i++]='#';
- if(ch>99) buf[i++]=ch/100+48;
- if(ch>9) buf[i++]=(ch%100)/10+48;
- buf[i++]=ch%10+48;
- buf[i++]=';';
- buf[i]=0;
- return buf;
- }
- static size_t find(const char* s, const char* pattern, size_t start) {
- const char* found =strstr(s+start, pattern);
- return found ? found-s : strlen(s);
- }
- /*--- internal tokenizer -------------------------------------------*/
- typedef struct Tokenizer_s {
- const char* s; /* stores string to be tokenized */
- size_t s_size; /* stores size of string to be tokenized */
- size_t i; /* stores current read position */
- int tagMode; /* stores current read context */
- const char* m_next; /* stores next token, if already determined */
- size_t m_next_size; /* size of next token */
- char* m_token; /* pointer to current token */
- size_t m_token_size; /* size of current token */
- size_t m_token_capacity; /* capacity of current token */
- } Tokenizer;
- Tokenizer* Tokenizer_new(const char* str, size_t str_size) {
- Tokenizer *tok = (Tokenizer*)malloc(sizeof(Tokenizer));
- memset(tok, 0, sizeof(Tokenizer));
- tok->s_size = str_size;
- tok->s = str;
- return tok;
- }
- void Tokenizer_delete(Tokenizer* tok) {
- free(tok->m_token);
- free(tok);
- }
- /*
- void Tokenizer_print(Tokenizer* tok) { printf(" @%u %s\n", tok->i, !tok->m_token ? "(null)" : (tok->m_token[0]==ESC)?"(esc)" : (tok->m_token[0]==OPN)?"(open)": (tok->m_token[0]==CLS)?"(close)" : tok->m_token); fflush(stdout); }
- */
- static const char* Tokenizer_set(Tokenizer* tok, const char* s, size_t size) {
- if(!size||!s) return 0;
- free(tok->m_token);
- tok->m_token = (char*)malloc(size+1);
- strncpy(tok->m_token,s, size);
- tok->m_token[size] = 0;
- tok->m_token_size = tok->m_token_capacity = size;
- /*Tokenizer_print(tok);*/
- return tok->m_token;
- }
- static void Tokenizer_append(Tokenizer* tok, char ch) {
- if(tok->m_token_size+1>=tok->m_token_capacity) {
- tok->m_token_capacity = (tok->m_token_capacity==0) ? 16 : tok->m_token_capacity*2;
- tok->m_token = (char*)realloc(tok->m_token, tok->m_token_capacity);
- }
- tok->m_token[tok->m_token_size]=ch;
- tok->m_token[++tok->m_token_size]=0;
- }
- const char* Tokenizer_next(Tokenizer* tok) {
- const char* ESC_str = "\033";
- const char* OPEN_str = "\034";
- const char* CLOSE_str = "\035";
- int quotMode=0;
- int tokenComplete = 0;
- if(tok->m_token) {
- free(tok->m_token);
- tok->m_token = 0;
- tok->m_token_size=tok->m_token_capacity = 0;
- }
- while(tok->m_next_size || (tok->i < tok->s_size)) {
- if(tok->m_next_size) {
- Tokenizer_set(tok, tok->m_next, tok->m_next_size);
- tok->m_next=0;
- tok->m_next_size=0;
- return tok->m_token;
- }
- switch(tok->s[tok->i]) {
- case '"':
- case '\'':
- if(tok->tagMode) {
- if(!quotMode) quotMode=tok->s[tok->i];
- else if(quotMode==tok->s[tok->i]) quotMode=0;
- }
- Tokenizer_append(tok, tok->s[tok->i]);
- break;
- case '<':
- if(!quotMode&&(tok->i+4<tok->s_size)&&(strncmp(tok->s+tok->i,"<!--",4)==0)) /* strip comments */
- tok->i=find(tok->s, "-->", tok->i+4)+2;
- else if(!quotMode&&(tok->i+9<tok->s_size)&&(strncmp(tok->s+tok->i,"<![CDATA[",9)==0)) { /* interpet CDATA */
- size_t b=tok->i+9;
- tok->i=find(tok->s, "]]>",b)+3;
- if(!tok->m_token_size) return Tokenizer_set(tok, tok->s+b, tok->i-b-3);
- tokenComplete = 1;
- tok->m_next = tok->s+b;
- tok->m_next_size = tok->i-b-3;
- --tok->i;
- }
- else if(!quotMode&&(tok->i+1<tok->s_size)&&((tok->s[tok->i+1]=='?')||(tok->s[tok->i+1]=='!'))) /* strip meta information */
- tok->i=find(tok->s, ">", tok->i+2);
- else if(!quotMode&&!tok->tagMode) {
- if((tok->i+1<tok->s_size)&&(tok->s[tok->i+1]=='/')) {
- tok->m_next=ESC_str;
- tok->m_next_size = 1;
- tok->i=find(tok->s, ">", tok->i+2);
- }
- else {
- tok->m_next = OPEN_str;
- tok->m_next_size = 1;
- tok->tagMode=1;
- }
- tokenComplete = 1;
- }
- else Tokenizer_append(tok, tok->s[tok->i]);
- break;
- case '/':
- if(tok->tagMode&&!quotMode) {
- tokenComplete = 1;
- if((tok->i+1 < tok->s_size) && (tok->s[tok->i+1]=='>')) {
- tok->tagMode=0;
- tok->m_next=ESC_str;
- tok->m_next_size = 1;
- ++tok->i;
- }
- else Tokenizer_append(tok, tok->s[tok->i]);
- }
- else Tokenizer_append(tok, tok->s[tok->i]);
- break;
- case '>':
- if(!quotMode&&tok->tagMode) {
- tok->tagMode=0;
- tokenComplete = 1;
- tok->m_next = CLOSE_str;
- tok->m_next_size = 1;
- }
- else Tokenizer_append(tok, tok->s[tok->i]);
- break;
- case ' ':
- case '\r':
- case '\n':
- case '\t':
- if(tok->tagMode&&!quotMode) {
- if(tok->m_token_size) tokenComplete=1;
- }
- else if(tok->m_token_size) Tokenizer_append(tok, tok->s[tok->i]);
- break;
- default: Tokenizer_append(tok, tok->s[tok->i]);
- }
- ++tok->i;
- if((tok->i>=tok->s_size)||(tokenComplete&&tok->m_token_size)) {
- tokenComplete=0;
- while(tok->m_token_size&&isspace(tok->m_token[tok->m_token_size-1])) /* trim whitespace */
- tok->m_token[--tok->m_token_size]=0;
- if(tok->m_token_size) break;
- }
- }
- /*Tokenizer_print(tok);*/
- return tok->m_token;
- }
- /*--- local variables ----------------------------------------------*/
- static size_t sv_code_size=0; /* stores number of special character codes */
- static size_t sv_code_capacity=16; /* stores currently allocated capacity for special character codes */
- static char** sv_code=0; /* stores code table for special characters */
- /*--- public methods -----------------------------------------------*/
- static void Xml_pushDecode(lua_State* L, const char* s, size_t s_size) {
- luaL_Buffer b;
- const char* found;
- size_t start=0, pos;
- size_t i;
- if(!s_size)
- s_size=strlen(s);
- luaL_buffinit(L, &b);
- found = strstr(s, "&#");
- pos = found ? found-s : s_size;
- while(found) {
- char ch = 0;
- size_t i=0;
- for(found += 2; i<3; ++i, ++found)
- if(isdigit(*found))
- ch = ch * 10 + (*found - 48);
- else break;
- if(*found == ';') {
- if(pos>start)
- luaL_addlstring(&b, s+start, pos-start);
- luaL_addchar(&b, ch);
- start = pos + 3 + i;
- }
- found = strstr(found+1, "&#");
- pos = found ? found-s : s_size;
- }
- if(pos>start)
- luaL_addlstring(&b,s+start, pos-start);
- luaL_pushresult(&b);
- for(i=sv_code_size-1; i<sv_code_size; i-=2) {
- luaL_gsub(L, lua_tostring(L,-1), sv_code[i], sv_code[i-1]);
- lua_remove(L,-2);
- }
- }
- int Xml_eval(lua_State *L) {
- char* str = 0;
- size_t str_size=0;
- Tokenizer* tok;
- const char* token=0;
- int firstStatement = 1;
- if(lua_isuserdata(L,1))
- str = (char*)lua_touserdata(L,1);
- else {
- const char * sTmp = luaL_checklstring(L,1,&str_size);
- str = (char*)malloc(str_size+1);
- memcpy(str, sTmp, str_size);
- str[str_size]=0;
- }
- tok = Tokenizer_new(str, str_size ? str_size : strlen(str));
- lua_settop(L,0);
- while((token=Tokenizer_next(tok))!=0) if(token[0]==OPN) { /* new tag found */
- if(lua_gettop(L)) {
- int newIndex=lua_rawlen(L,-1)+1;
- lua_pushnumber(L,newIndex);
- lua_newtable(L);
- lua_settable(L, -3);
- lua_pushnumber(L,newIndex);
- lua_gettable(L,-2);
- }
- else {
- if (firstStatement) {
- lua_newtable(L);
- firstStatement = 0;
- }
- else return lua_gettop(L);
- }
- /* set metatable: */
- lua_newtable(L);
- lua_pushliteral(L, "__index");
- lua_getglobal(L, "xml");
- lua_settable(L, -3);
- lua_pushliteral(L, "__tostring"); /* set __tostring metamethod */
- lua_getglobal(L, "xml");
- lua_pushliteral(L,"str");
- lua_gettable(L, -2);
- lua_remove(L, -2);
- lua_settable(L, -3);
- lua_setmetatable(L, -2);
- /* parse tag and content: */
- lua_pushnumber(L,0); /* use index 0 for storing the tag */
- lua_pushstring(L, Tokenizer_next(tok));
- lua_settable(L, -3);
- while(((token = Tokenizer_next(tok))!=0)&&(token[0]!=CLS)&&(token[0]!=ESC)) { /* parse tag header */
- size_t sepPos=find(token, "=", 0);
- if(token[sepPos]) { /* regular attribute */
- const char* aVal =token+sepPos+2;
- size_t lenVal;
- lua_pushlstring(L, token, sepPos);
- lenVal = strlen(aVal)-1;
- if(!lenVal) Xml_pushDecode(L, "", 0);
- else Xml_pushDecode(L, aVal, lenVal);
- lua_settable(L, -3);
- }
- }
- if(!token||(token[0]==ESC)) {
- if(lua_gettop(L)>1) lua_settop(L,-2); /* this tag has no content, only attributes */
- else break;
- }
- }
- else if(token[0]==ESC) { /* previous tag is over */
- if(lua_gettop(L)>1) lua_settop(L,-2); /* pop current table */
- else break;
- }
- else { /* read elements */
- lua_pushnumber(L,lua_rawlen(L,-1)+1);
- Xml_pushDecode(L, token, 0);
- lua_settable(L, -3);
- }
- Tokenizer_delete(tok);
- free(str);
- return lua_gettop(L);
- }
- int Xml_load (lua_State *L) {
- const char * filename = luaL_checkstring(L,1);
- size_t sz;
- FILE * file=fopen(filename,"r");
- char* buffer;
- if(!file)
- return luaL_error(L,"LuaXml ERROR: \"%s\" file error or file not found!",filename);
- fseek (file , 0 , SEEK_END);
- sz = ftell (file);
- rewind (file);
- buffer = (char*)malloc(sz+1);
- sz = fread (buffer,1,sz,file);
- fclose(file);
- buffer[sz]=0;
- lua_pushlightuserdata(L,buffer);
- lua_replace(L,1);
- return Xml_eval(L);
- };
- int Xml_registerCode(lua_State *L) {
- const char * decoded = luaL_checkstring(L,1);
- const char * encoded = luaL_checkstring(L,2);
- size_t i;
- for(i=0; i<sv_code_size; i+=2)
- if(strcmp(sv_code[i],decoded)==0)
- return luaL_error(L,"LuaXml ERROR: code already exists.");
- if(sv_code_size+2>sv_code_capacity) {
- sv_code_capacity*=2;
- sv_code = (char**)realloc(sv_code, sv_code_capacity*sizeof(char*));
- }
- sv_code[sv_code_size]=(char*)malloc(strlen(decoded)+1);
- strcpy(sv_code[sv_code_size++], decoded);
- sv_code[sv_code_size]=(char*)malloc(strlen(encoded)+1);
- strcpy(sv_code[sv_code_size++],encoded);
- return 0;
- }
- int Xml_encode(lua_State *L) {
- char buf[8];
- size_t i, start, pos;
- luaL_Buffer b;
- const char* s;
- if(lua_gettop(L)!=1)
- return 0;
- luaL_checkstring(L,-1);
- for(i=0; i<sv_code_size; i+=2) {
- luaL_gsub(L, lua_tostring(L,-1), sv_code[i], sv_code[i+1]);
- lua_remove(L,-2);
- }
- s=lua_tostring(L,1);
- luaL_buffinit(L, &b);
- for(start=pos=0; s[pos]!=0; ++pos) if(s[pos]<0) {
- if(pos>start) luaL_addlstring(&b,s+start, pos-start);
- luaL_addstring(&b,char2code((unsigned char)(s[pos]),buf));
- start=pos+1;
- }
- if(pos>start)
- luaL_addlstring(&b,s+start, pos-start);
- luaL_pushresult(&b);
- lua_remove(L,-2);
- return 1;
- }
- #ifdef __cplusplus
- extern "C" {
- #endif
- int luaopen_LuaXML(lua_State* L) {
- static const struct luaL_Reg funcs[] = {
- {"load", Xml_load},
- {"eval", Xml_eval},
- {"encode", Xml_encode},
- /* {"registerCode", Xml_registerCode}, */
- {NULL, NULL}
- };
- luaL_newlibtable(L, funcs);
- luaL_setfuncs(L, funcs, 0);
- lua_setglobal(L, "xml");
- /* register default codes: */
- if(!sv_code) {
- sv_code=(char**)malloc(sv_code_capacity*sizeof(char*));
- sv_code[sv_code_size++]="&";
- sv_code[sv_code_size++]="&";
- sv_code[sv_code_size++]="<";
- sv_code[sv_code_size++]="<";
- sv_code[sv_code_size++]=">";
- sv_code[sv_code_size++]=">";
- sv_code[sv_code_size++]="\"";
- sv_code[sv_code_size++]=""";
- sv_code[sv_code_size++]="'";
- sv_code[sv_code_size++]="'";
- }
- return 1;
- }
- #ifdef __cplusplus
- }
- #endif
|