|  | @@ -452,21 +452,13 @@ static unsigned parse_hex4(const unsigned char * const input)
 | 
	
		
			
				|  |  |   * A literal can be one or two sequences of the form \uXXXX */
 | 
	
		
			
				|  |  |  static unsigned char utf16_literal_to_utf8(const unsigned char * const input_pointer, const unsigned char * const input_end, unsigned char **output_pointer, const unsigned char **error_pointer)
 | 
	
		
			
				|  |  |  {
 | 
	
		
			
				|  |  | -    /* first bytes of UTF8 encoding for a given length in bytes */
 | 
	
		
			
				|  |  | -    static const unsigned char firstByteMark[5] =
 | 
	
		
			
				|  |  | -    {
 | 
	
		
			
				|  |  | -        0x00, /* should never happen */
 | 
	
		
			
				|  |  | -        0x00, /* 0xxxxxxx */
 | 
	
		
			
				|  |  | -        0xC0, /* 110xxxxx */
 | 
	
		
			
				|  |  | -        0xE0, /* 1110xxxx */
 | 
	
		
			
				|  |  | -        0xF0 /* 11110xxx */
 | 
	
		
			
				|  |  | -    };
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  |      long unsigned int codepoint = 0;
 | 
	
		
			
				|  |  |      unsigned int first_code = 0;
 | 
	
		
			
				|  |  |      const unsigned char *first_sequence = input_pointer;
 | 
	
		
			
				|  |  |      unsigned char utf8_length = 0;
 | 
	
		
			
				|  |  | +    unsigned char utf8_position = 0;
 | 
	
		
			
				|  |  |      unsigned char sequence_length = 0;
 | 
	
		
			
				|  |  | +    unsigned char first_byte_mark = 0;
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      /* get the first utf16 sequence */
 | 
	
		
			
				|  |  |      first_code = parse_hex4(first_sequence + 2);
 | 
	
	
		
			
				|  | @@ -537,16 +529,19 @@ static unsigned char utf16_literal_to_utf8(const unsigned char * const input_poi
 | 
	
		
			
				|  |  |      {
 | 
	
		
			
				|  |  |          /* two bytes, encoding 110xxxxx 10xxxxxx */
 | 
	
		
			
				|  |  |          utf8_length = 2;
 | 
	
		
			
				|  |  | +        first_byte_mark = 0xC0; /* 11000000 */
 | 
	
		
			
				|  |  |      }
 | 
	
		
			
				|  |  |      else if (codepoint < 0x10000)
 | 
	
		
			
				|  |  |      {
 | 
	
		
			
				|  |  |          /* three bytes, encoding 1110xxxx 10xxxxxx 10xxxxxx */
 | 
	
		
			
				|  |  |          utf8_length = 3;
 | 
	
		
			
				|  |  | +        first_byte_mark = 0xE0; /* 11100000 */
 | 
	
		
			
				|  |  |      }
 | 
	
		
			
				|  |  |      else if (codepoint <= 0x10FFFF)
 | 
	
		
			
				|  |  |      {
 | 
	
		
			
				|  |  |          /* four bytes, encoding 1110xxxx 10xxxxxx 10xxxxxx 10xxxxxx */
 | 
	
		
			
				|  |  |          utf8_length = 4;
 | 
	
		
			
				|  |  | +        first_byte_mark = 0xF0; /* 11110000 */
 | 
	
		
			
				|  |  |      }
 | 
	
		
			
				|  |  |      else
 | 
	
		
			
				|  |  |      {
 | 
	
	
		
			
				|  | @@ -556,28 +551,22 @@ static unsigned char utf16_literal_to_utf8(const unsigned char * const input_poi
 | 
	
		
			
				|  |  |      }
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      /* encode as utf8 */
 | 
	
		
			
				|  |  | -    switch (utf8_length)
 | 
	
		
			
				|  |  | -    {
 | 
	
		
			
				|  |  | -        case 4:
 | 
	
		
			
				|  |  | -            /* 10xxxxxx */
 | 
	
		
			
				|  |  | -            (*output_pointer)[3] = (unsigned char)((codepoint | 0x80) & 0xBF);
 | 
	
		
			
				|  |  | -            codepoint >>= 6;
 | 
	
		
			
				|  |  | -        case 3:
 | 
	
		
			
				|  |  | -            /* 10xxxxxx */
 | 
	
		
			
				|  |  | -            (*output_pointer)[2] = (unsigned char)((codepoint | 0x80) & 0xBF);
 | 
	
		
			
				|  |  | -            codepoint >>= 6;
 | 
	
		
			
				|  |  | -        case 2:
 | 
	
		
			
				|  |  | -            (*output_pointer)[1] = (unsigned char)((codepoint | 0x80) & 0xBF);
 | 
	
		
			
				|  |  | -            codepoint >>= 6;
 | 
	
		
			
				|  |  | -        case 1:
 | 
	
		
			
				|  |  | -            /* depending on the length in bytes this determines the
 | 
	
		
			
				|  |  | -               encoding of the first UTF8 byte */
 | 
	
		
			
				|  |  | -            (*output_pointer)[0] = (unsigned char)((codepoint | firstByteMark[utf8_length]) & 0xFF);
 | 
	
		
			
				|  |  | -            break;
 | 
	
		
			
				|  |  | -        default:
 | 
	
		
			
				|  |  | -            *error_pointer = first_sequence;
 | 
	
		
			
				|  |  | -            goto fail;
 | 
	
		
			
				|  |  | +    for (utf8_position = (unsigned char)(utf8_length - 1); utf8_position > 0; utf8_position--)
 | 
	
		
			
				|  |  | +    {
 | 
	
		
			
				|  |  | +        /* 10xxxxxx */
 | 
	
		
			
				|  |  | +        (*output_pointer)[utf8_position] = (unsigned char)((codepoint | 0x80) & 0xBF);
 | 
	
		
			
				|  |  | +        codepoint >>= 6;
 | 
	
		
			
				|  |  |      }
 | 
	
		
			
				|  |  | +    /* encode first byte */
 | 
	
		
			
				|  |  | +    if (utf8_length > 1)
 | 
	
		
			
				|  |  | +    {
 | 
	
		
			
				|  |  | +        (*output_pointer)[0] = (unsigned char)((codepoint | first_byte_mark) & 0xFF);
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +    else
 | 
	
		
			
				|  |  | +    {
 | 
	
		
			
				|  |  | +        (*output_pointer)[0] = (unsigned char)(codepoint & 0x7F);
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |      *output_pointer += utf8_length;
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      return sequence_length;
 |