Browse Source

Integrate patch for handling broken unicode surrogate pairs, with
thanks to kolman and Irwan Djajadi on SourceForge.

Dave Gamble 9 years ago
parent
commit
ee579ecbd6
1 changed files with 12 additions and 10 deletions
  1. 12 10
      cJSON.c

+ 12 - 10
cJSON.c

@@ -192,16 +192,18 @@ static unsigned parse_hex4(const char *str)
 static const unsigned char firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
 static const char *parse_string(cJSON *item,const char *str)
 {
-	const char *ptr=str+1;char *ptr2;char *out;int len=0;unsigned uc,uc2;
+	const char *ptr=str+1,*end_ptr=str+1;char *ptr2;char *out;int len=0;unsigned uc,uc2;
 	if (*str!='\"') {ep=str;return 0;}	/* not a string! */
 	
-	while (*ptr!='\"' && *ptr && ++len) if (*ptr++ == '\\') ptr++;	/* Skip escaped quotes. */
+	while (*end_ptr!='\"' && *end_ptr && ++len) if (*end_ptr++ == '\\') end_ptr++;	/* Skip escaped quotes. */
 	
 	out=(char*)cJSON_malloc(len+1);	/* This is how long we need for the string, roughly. */
 	if (!out) return 0;
+	item->valuestring=out; /* assign here so out will be deleted during cJSON_Delete() later */
+	item->type=cJSON_String;
 	
 	ptr=str+1;ptr2=out;
-	while (*ptr!='\"' && *ptr)
+	while (ptr < end_ptr)
 	{
 		if (*ptr!='\\') *ptr2++=*ptr++;
 		else
@@ -216,14 +218,16 @@ static const char *parse_string(cJSON *item,const char *str)
 				case 't': *ptr2++='\t';	break;
 				case 'u':	 /* transcode utf16 to utf8. */
 					uc=parse_hex4(ptr+1);ptr+=4;	/* get the unicode char. */
-
-					if ((uc>=0xDC00 && uc<=0xDFFF) || uc==0)	break;	/* check for invalid.	*/
-
+					if (ptr >= end_ptr) {ep=str;return 0;}	/* invalid */
+					
+					if ((uc>=0xDC00 && uc<=0xDFFF) || uc==0)    {ep=str;return 0;}	/* check for invalid.   */
+					
 					if (uc>=0xD800 && uc<=0xDBFF)	/* UTF16 surrogate pairs.	*/
 					{
-						if (ptr[1]!='\\' || ptr[2]!='u')	break;	/* missing second-half of surrogate.	*/
+						if (ptr+6 > end_ptr)    {ep=str;return 0;}	/* invalid */
+						if (ptr[1]!='\\' || ptr[2]!='u')    {ep=str;return 0;}	/* missing second-half of surrogate.    */
 						uc2=parse_hex4(ptr+3);ptr+=6;
-						if (uc2<0xDC00 || uc2>0xDFFF)		break;	/* invalid second-half of surrogate.	*/
+						if (uc2<0xDC00 || uc2>0xDFFF)       {ep=str;return 0;}	/* invalid second-half of surrogate.    */
 						uc=0x10000 + (((uc&0x3FF)<<10) | (uc2&0x3FF));
 					}
 
@@ -244,8 +248,6 @@ static const char *parse_string(cJSON *item,const char *str)
 	}
 	*ptr2=0;
 	if (*ptr=='\"') ptr++;
-	item->valuestring=out;
-	item->type=cJSON_String;
 	return ptr;
 }