9 years ago · c88d045888
--- a/cJSON.c
+++ b/cJSON.c
@@ -443,76 +443,175 @@ static const unsigned char firstByteMark[7] =
 
				 };
			
 
				 
			
 
				 /* Parse the input text into an unescaped cstring, and populate item. */
			
 
				-static const char *parse_string(cJSON *item,const char *str,const char **ep)
			
 
				+static const char *parse_string(cJSON *item, const char *str, const char **ep)
			
 
				 {
			
 
				-	const char *ptr=str+1,*end_ptr=str+1;char *ptr2;char *out;int len=0;unsigned uc,uc2;
			
 
				-	if (*str!='\"') {*ep=str;return 0;}	/* not a string! */
			
 
				+    const char *ptr = str + 1;
			
 
				+    const char *end_ptr =str + 1;
			
 
				+    char *ptr2;
			
 
				+    char *out;
			
 
				+    int len = 0;
			
 
				+    unsigned uc;
			
 
				+    unsigned uc2;
			
 
				+
			
 
				+    /* not a string! */
			
 
				+    if (*str != '\"')
			
 
				+    {
			
 
				+        *ep = str;
			
 
				+        return 0;
			
 
				+    }
			
 
				 
			
 
				-	while (*end_ptr!='\"' && *end_ptr && ++len)
			
 
				-	{
			
 
				-	    if (*end_ptr++ == '\\')
			
 
				-	    {
			
 
				-		if (*end_ptr == '\0')
			
 
				-		{
			
 
				-		    /* prevent buffer overflow when last input character is a backslash */
			
 
				-		    return 0;
			
 
				-		}
			
 
				-		end_ptr++;	/* Skip escaped quotes. */
			
 
				-	    }
			
 
				-	}
			
 
				+    while ((*end_ptr != '\"') && *end_ptr && ++len)
			
 
				+    {
			
 
				+        if (*end_ptr++ == '\\')
			
 
				+        {
			
 
				+            if (*end_ptr == '\0')
			
 
				+            {
			
 
				+                /* prevent buffer overflow when last input character is a backslash */
			
 
				+                return 0;
			
 
				+            }
			
 
				+            /* Skip escaped quotes. */
			
 
				+            end_ptr++;
			
 
				+        }
			
 
				+    }
			
 
				 
			
 
				-	out=(char*)cJSON_malloc(len+1);	/* This is how long we need for the string, roughly. */
			
 
				-	if (!out) return 0;
			
 
				-	item->valuestring=out; /* assign here so out will be deleted during cJSON_Delete() later */
			
 
				-	item->type=cJSON_String;
			
 
				-	
			
 
				-	ptr=str+1;ptr2=out;
			
 
				-	while (ptr < end_ptr)
			
 
				-	{
			
 
				-		if (*ptr!='\\') *ptr2++=*ptr++;
			
 
				-		else
			
 
				-		{
			
 
				-			ptr++;
			
 
				-			switch (*ptr)
			
 
				-			{
			
 
				-				case 'b': *ptr2++='\b';	break;
			
 
				-				case 'f': *ptr2++='\f';	break;
			
 
				-				case 'n': *ptr2++='\n';	break;
			
 
				-				case 'r': *ptr2++='\r';	break;
			
 
				-				case 't': *ptr2++='\t';	break;
			
 
				-				case 'u':	 /* transcode utf16 to utf8. */
			
 
				-					uc=parse_hex4(ptr+1);ptr+=4;	/* get the unicode char. */
			
 
				-					if (ptr >= end_ptr) {*ep=str;return 0;}	/* invalid */
			
 
				-					
			
 
				-					if ((uc>=0xDC00 && uc<=0xDFFF) || uc==0)    {*ep=str;return 0;}	/* check for invalid.   */
			
 
				-					
			
 
				-					if (uc>=0xD800 && uc<=0xDBFF)	/* UTF16 surrogate pairs.	*/
			
 
				-					{
			
 
				-						if (ptr+6 > end_ptr)    {*ep=str;return 0;}	/* invalid */
			
 
				-						if (ptr[1]!='\\' || ptr[2]!='u')    {*ep=str;return 0;}	/* missing second-half of surrogate.    */
			
 
				-						uc2=parse_hex4(ptr+3);ptr+=6;
			
 
				-						if (uc2<0xDC00 || uc2>0xDFFF)       {*ep=str;return 0;}	/* invalid second-half of surrogate.    */
			
 
				-						uc=0x10000 + (((uc&0x3FF)<<10) | (uc2&0x3FF));
			
 
				-					}
			
 
				-
			
 
				-					len=4;if (uc<0x80) len=1;else if (uc<0x800) len=2;else if (uc<0x10000) len=3; ptr2+=len;
			
 
				-					
			
 
				-					switch (len) {
			
 
				-						case 4: *--ptr2 =((uc | 0x80) & 0xBF); uc >>= 6;
			
 
				-						case 3: *--ptr2 =((uc | 0x80) & 0xBF); uc >>= 6;
			
 
				-						case 2: *--ptr2 =((uc | 0x80) & 0xBF); uc >>= 6;
			
 
				-						case 1: *--ptr2 =(uc | firstByteMark[len]);
			
 
				-					}
			
 
				-					ptr2+=len;
			
 
				-					break;
			
 
				-				default:  *ptr2++=*ptr; break;
			
 
				-			}
			
 
				-			ptr++;
			
 
				-		}
			
 
				-	}
			
 
				-	*ptr2=0;
			
 
				-	if (*ptr=='\"') ptr++;
			
 
				-	return ptr;
			
 
				+    /* This is at most how long we need for the string, roughly. */
			
 
				+    out = (char*)cJSON_malloc(len + 1);
			
 
				+    if (!out)
			
 
				+    {
			
 
				+        return 0;
			
 
				+    }
			
 
				+    item->valuestring = out; /* assign here so out will be deleted during cJSON_Delete() later */
			
 
				+    item->type = cJSON_String;
			
 
				+
			
 
				+    ptr = str + 1;
			
 
				+    ptr2 = out;
			
 
				+    /* loop through the string literal */
			
 
				+    while (ptr < end_ptr)
			
 
				+    {
			
 
				+        if (*ptr != '\\')
			
 
				+        {
			
 
				+            *ptr2++ = *ptr++;
			
 
				+        }
			
 
				+        /* escape sequence */
			
 
				+        else
			
 
				+        {
			
 
				+            ptr++;
			
 
				+            switch (*ptr)
			
 
				+            {
			
 
				+                case 'b':
			
 
				+                    *ptr2++ = '\b';
			
 
				+                    break;
			
 
				+                case 'f':
			
 
				+                    *ptr2++ = '\f';
			
 
				+                    break;
			
 
				+                case 'n':
			
 
				+                    *ptr2++ = '\n';
			
 
				+                    break;
			
 
				+                case 'r':
			
 
				+                    *ptr2++ = '\r';
			
 
				+                    break;
			
 
				+                case 't':
			
 
				+                    *ptr2++ = '\t';
			
 
				+                    break;
			
 
				+                case 'u':
			
 
				+                    /* transcode utf16 to utf8. See RFC2781 and RFC3629. */
			
 
				+                    uc = parse_hex4(ptr + 1); /* get the unicode char. */
			
 
				+                    ptr += 4;
			
 
				+                    if (ptr >= end_ptr)
			
 
				+                    {
			
 
				+                        /* invalid */
			
 
				+                        *ep = str;
			
 
				+                        return 0;
			
 
				+                    }
			
 
				+                    /* check for invalid. */
			
 
				+                    if (((uc >= 0xDC00) && (uc <= 0xDFFF)) || (uc == 0))
			
 
				+                    {
			
 
				+                        *ep = str;
			
 
				+                        return 0;
			
 
				+                    }
			
 
				+
			
 
				+                    /* UTF16 surrogate pairs. */
			
 
				+                    if ((uc >= 0xD800) && (uc<=0xDBFF))
			
 
				+                    {
			
 
				+                        if ((ptr + 6) > end_ptr)
			
 
				+                        {
			
 
				+                            /* invalid */
			
 
				+                            *ep = str;
			
 
				+                            return 0;
			
 
				+                        }
			
 
				+                        if ((ptr[1] != '\\') || (ptr[2] != 'u'))
			
 
				+                        {
			
 
				+                            /* missing second-half of surrogate. */
			
 
				+                            *ep = str;
			
 
				+                            return 0;
			
 
				+                        }
			
 
				+                        uc2 = parse_hex4(ptr + 3);
			
 
				+                        ptr += 6; /* \uXXXX */
			
 
				+                        if ((uc2 < 0xDC00) || (uc2 > 0xDFFF))
			
 
				+                        {
			
 
				+                            /* invalid second-half of surrogate. */
			
 
				+                            *ep = str;
			
 
				+                            return 0;
			
 
				+                        }
			
 
				+                        /* calculate unicode codepoint from the surrogate pair */
			
 
				+                        uc = 0x10000 + (((uc & 0x3FF) << 10) | (uc2 & 0x3FF));
			
 
				+                    }
			
 
				+
			
 
				+                    /* encode as UTF8
			
 
				+                     * takes at maximum 4 bytes to encode:
			
 
				+                     * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
			
 
				+                    len = 4;
			
 
				+                    if (uc < 0x80)
			
 
				+                    {
			
 
				+                        /* normal ascii, encoding 0xxxxxxx */
			
 
				+                        len = 1;
			
 
				+                    }
			
 
				+                    else if (uc < 0x800)
			
 
				+                    {
			
 
				+                        /* two bytes, encoding 110xxxxx 10xxxxxx */
			
 
				+                        len = 2;
			
 
				+                    }
			
 
				+                    else if (uc < 0x10000)
			
 
				+                    {
			
 
				+                        /* three bytes, encoding 1110xxxx 10xxxxxx 10xxxxxx */
			
 
				+                        len = 3;
			
 
				+                    }
			
 
				+                    ptr2 += len;
			
 
				+
			
 
				+                    switch (len) {
			
 
				+                        case 4:
			
 
				+                            /* 10xxxxxx */
			
 
				+                            *--ptr2 = ((uc | 0x80) & 0xBF);
			
 
				+                            uc >>= 6;
			
 
				+                        case 3:
			
 
				+                            /* 10xxxxxx */
			
 
				+                            *--ptr2 = ((uc | 0x80) & 0xBF);
			
 
				+                            uc >>= 6;
			
 
				+                        case 2:
			
 
				+                            /* 10xxxxxx */
			
 
				+                            *--ptr2 = ((uc | 0x80) & 0xBF);
			
 
				+                            uc >>= 6;
			
 
				+                        case 1:
			
 
				+                            /* depending on the length in bytes this determines the
			
 
				+                             * encoding ofthe first UTF8 byte */
			
 
				+                            *--ptr2 = (uc | firstByteMark[len]);
			
 
				+                    }
			
 
				+                    ptr2 += len;
			
 
				+                    break;
			
 
				+                default:
			
 
				+                    *ptr2++ = *ptr;
			
 
				+                    break;
			
 
				+            }
			
 
				+            ptr++;
			
 
				+        }
			
 
				+    }
			
 
				+    *ptr2 = '\0';
			
 
				+    if (*ptr == '\"')
			
 
				+    {
			
 
				+        ptr++;
			
 
				+    }
			
 
				+
			
 
				+    return ptr;
			
 
				 }
			
 
				 
			
 
				 /* Render the cstring provided to an escaped version that can be printed. */