Jelajahi Sumber

Rewrite request parsing (Step 1/?)

Split request and response parsing, and rewrite them.
This is the first of multiple steps to solve #479,
and it's required for a sustainable solution of many
other issues, including #434, #456, #460, #467.

(Note: Intermediate commits do not necessarily build)
bel2125 8 tahun lalu
induk
melakukan
cc4f775d9f
2 mengubah file dengan 183 tambahan dan 133 penghapusan
  1. 142 109
      src/civetweb.c
  2. 41 24
      test/private.c

+ 142 - 109
src/civetweb.c

@@ -3201,15 +3201,6 @@ skip_quoted(char **buf,
 }
 
 
-/* Simplified version of skip_quoted without quote char
- * and whitespace == delimiters */
-static char *
-skip(char **buf, const char *delimiters)
-{
-	return skip_quoted(buf, delimiters, delimiters, 0);
-}
-
-
 /* Return HTTP header value, or NULL if not found. */
 static const char *
 get_header(const struct mg_request_info *ri, const char *name)
@@ -6424,31 +6415,39 @@ interpret_cleanup:
 
 
 /* Check whether full request is buffered. Return:
- * -1  if request is malformed
- *  0  if request is not yet fully buffered
+ * -1  if request or response is malformed
+ *  0  if request or response is not yet fully buffered
  * >0  actual request length, including last \r\n\r\n */
 static int
-get_request_len(const char *buf, int buflen)
-{
-	const char *s, *e;
-	int len = 0;
-
-	for (s = buf, e = s + buflen - 1; (len <= 0) && (s < e); s++)
-		/* Control characters are not allowed but >=128 is. */
-		if (!isprint(*(const unsigned char *)s) && (*s != '\r') && (*s != '\n')
-		    && (*(const unsigned char *)s < 128)) {
-			len = -1;
-			break; /* [i_a] abort scan as soon as one malformed character is
-			        * found; */
-			/* don't let subsequent \r\n\r\n win us over anyhow */
-		} else if ((s[0] == '\n') && (s[1] == '\n')) {
-			len = (int)(s - buf) + 2;
-		} else if ((s[0] == '\n') && (&s[1] < e) && (s[1] == '\r')
-		           && (s[2] == '\n')) {
-			len = (int)(s - buf) + 3;
+get_http_header_len(const char *buf, int buflen)
+{
+	int i;
+	for (i = 0; i < buflen - 1; i++) {
+		/* Do an unsigned comparison in some conditions below */
+		const unsigned char c = ((const unsigned char *)buf)[i];
+
+		if ((c < 128) && ((char)c != '\r') && ((char)c != '\n')
+		    && !isprint(c)) {
+			/* abort scan as soon as one malformed character is found */
+			return -1;
 		}
 
-	return len;
+		if ((buf[i] == '\n') && (buf[i + 1] == '\n')) {
+			/* Two newline, no carriage return - not standard compliant, but it
+			 * should be accepted */
+			return i + 1;
+		}
+
+		if (i < buflen - 3) {
+			if ((buf[i] == '\r') && (buf[i + 1] == '\n') && (buf[i + 2] == '\r')
+			    && (buf[i + 3] == '\n')) {
+				/* Two \r\n - standard compliant */
+				return i + 3;
+			}
+		}
+	}
+
+	return 0;
 }
 
 
@@ -8508,6 +8507,51 @@ mg_store_body(struct mg_connection *conn, const char *path)
 }
 
 
+/* Parse a buffer:
+ * Forward the string pointer till the end of a word, then
+ * terminate it and forward till the begin of the next word.
+ */
+static int
+skip_to_end_of_word_and_terminate(char **ppw, int eol)
+{
+	/* Forward until a space is found - use isgraph here */
+	/* See http://www.cplusplus.com/reference/cctype/ */
+	while (isgraph(**ppw)) {
+		(*ppw)++;
+	}
+
+	/* Check end of word */
+	if (eol) {
+		/* must be a end of line */
+		if ((**ppw != '\r') && (**ppw != '\n')) {
+			return -1;
+		}
+	} else {
+		/* must be a end of a word, but not a line */
+		if (**ppw != ' ') {
+			return -1;
+		}
+	}
+
+	/* Terminate and forward to the next word */
+	do {
+		**ppw = 0;
+		(*ppw)++;
+	} while ((**ppw) && isspace(**ppw));
+
+	/* Check after term */
+	if (!eol) {
+		/* if it's not the end of line, there must be a next word */
+		if (!isgraph(**ppw)) {
+			return -1;
+		}
+	}
+
+	/* ok */
+	return 1;
+}
+
+
 /* Parse HTTP headers from the given buffer, advance buf pointer
  * to the point where parsing stopped.
  * All parameters must be valid pointers (not NULL).
@@ -8621,79 +8665,68 @@ is_valid_http_method(const char *method)
  * buf and ri must be valid pointers (not NULL), len>0.
  * Returns <0 on error. */
 static int
-parse_http_message(char *buf, int len, struct mg_request_info *ri)
-{
-	int is_request, request_length;
-	char *start_line;
-
-	request_length = get_request_len(buf, len);
-
-	if (request_length > 0) {
-		/* Reset attributes. DO NOT TOUCH is_ssl, remote_ip, remote_addr,
-		 * remote_port */
-		ri->remote_user = ri->request_method = ri->request_uri =
-		    ri->http_version = NULL;
-		ri->num_headers = 0;
-		buf[request_length - 1] = '\0';
-
-		/* RFC says that all initial whitespaces should be ingored */
-		while ((*buf != '\0') && isspace(*(unsigned char *)buf)) {
-			buf++;
-		}
-
-		/* Separate first line from the following lines.
-		 * "buf" will point to the next line afterwards and the \r\n at
-		 * the end of the line will be replaced by 0 */
-		start_line = skip(&buf, "\r\n");
-
-		/* Separate the first word in the first line from the rest.
-		 * For a HTTP request, this is the method (e.g. "GET").
-		 * The string will be terminated after the method by
-		 * replacing the space by a 0 character. */
-		ri->request_method = skip(&start_line, " ");
-
-		/* The second element in the first line is the URI,
-		 * again terminated by a space. */
-		ri->request_uri = skip(&start_line, " ");
-
-		/* Last element of the first line of a HTTP request
-		 * is the version string (e.g. "HTTP/1.0"). */
-		ri->http_version = start_line;
-
-		/* HTTP message could be either HTTP request:
-		 * "GET / HTTP/1.0 ..."
-		 * or a HTTP response:
-		 *  "HTTP/1.0 200 OK ..."
-		 * otherwise it is invalid.
-		 */
-		is_request = is_valid_http_method(ri->request_method);
-		if (is_request) {
-			if ((toupper(ri->http_version[0]) != 'H')
-			    || (toupper(ri->http_version[1]) != 'T')
-			    || (toupper(ri->http_version[2]) != 'T')
-			    || (toupper(ri->http_version[3]) != 'P')
-			    || (toupper(ri->http_version[4]) != '/')) {
-				/* Invalid request */
-				return -1;
-			}
-			ri->http_version += 5;
-		} else {
-			/* Response */
-			if ((toupper(ri->request_method[0]) != 'H')
-			    || (toupper(ri->request_method[1]) != 'T')
-			    || (toupper(ri->request_method[2]) != 'T')
-			    || (toupper(ri->request_method[3]) != 'P')
-			    || (toupper(ri->request_method[4]) != '/')) {
-				/* Invalid response */
-				return -1;
-			}
-		}
+parse_http_request(char *buf, int len, struct mg_request_info *ri)
+{
+	int request_length;
 
-		if (parse_http_headers(&buf, ri) < 0) {
-			/* Error while parsing headers */
-			return -1;
-		}
+	/* Reset attributes. DO NOT TOUCH is_ssl, remote_ip, remote_addr,
+	 * remote_port */
+	ri->remote_user = ri->request_method = ri->request_uri = ri->http_version =
+	    NULL;
+	ri->num_headers = 0;
+
+	/* Ignore leading \r and \n */
+	while ((len > 0) && (*buf == '\r') && (*buf == '\n')) {
+		buf++;
+		len--;
+	}
+
+	/* Find end of HTTP header */
+	request_length = get_http_header_len(buf, len);
+	if (request_length <= 0) {
+		return request_length;
 	}
+	buf[request_length - 1] = '\0';
+
+	/* RFC says that all initial whitespaces should be ingored */
+	while ((*buf != '\0') && isspace(*(unsigned char *)buf)) {
+		buf++;
+	}
+	if ((*buf == 0) || (*buf == '\r') || (*buf == '\n')) {
+		return -1;
+	}
+
+	/* The first word has to be the HTTP method */
+	ri->request_method = buf;
+
+	skip_to_end_of_word_and_terminate(&buf, 0);
+
+	/* Check for a valid http method */
+	if (!is_valid_http_method(ri->request_method)) {
+		return -1;
+	}
+
+	/* The second word is the URI */
+	ri->request_uri = buf;
+	skip_to_end_of_word_and_terminate(&buf, 0);
+
+	/* Next would be the HTTP version */
+	ri->http_version = buf;
+	skip_to_end_of_word_and_terminate(&buf, 0);
+
+	/* Check for a valid HTTP version key */
+	if (!strncmp(ri->http_version, "HTTP/", 5)) {
+		/* Invalid request */
+		return -1;
+	}
+	ri->http_version += 5;
+
+	/* Parse all HTTP headers */
+	if (parse_http_headers(&buf, ri) < 0) {
+		/* Error while parsing headers */
+		return -1;
+	}
+
 	return request_length;
 }
 
@@ -8733,7 +8766,7 @@ read_request(FILE *fp,
 		}
 	}
 
-	request_len = get_request_len(buf, *nread);
+	request_len = get_http_header_len(buf, *nread);
 
 	/* first time reading from this connection */
 	clock_gettime(CLOCK_MONOTONIC, &last_action_time);
@@ -8758,7 +8791,7 @@ read_request(FILE *fp,
 		}
 		if (n > 0) {
 			*nread += n;
-			request_len = get_request_len(buf, *nread);
+			request_len = get_http_header_len(buf, *nread);
 		} else {
 			request_len = 0;
 		}
@@ -14216,7 +14249,7 @@ get_rel_url_at_current_server(const char *uri, const struct mg_connection *conn)
 
 
 static int
-getreq(struct mg_connection *conn, char *ebuf, size_t ebuf_len, int *err)
+get_request(struct mg_connection *conn, char *ebuf, size_t ebuf_len, int *err)
 {
 	const char *cl;
 
@@ -14286,7 +14319,7 @@ getreq(struct mg_connection *conn, char *ebuf, size_t ebuf_len, int *err)
 			*err = 0;
 		}
 		return 0;
-	} else if (parse_http_message(conn->buf,
+	} else if (parse_http_request(conn->buf,
 	                              conn->buf_size,
 	                              &conn->request_info) <= 0) {
 		mg_snprintf(conn,
@@ -14362,7 +14395,7 @@ mg_get_response(struct mg_connection *conn,
 		}
 
 		conn->ctx = &rctx;
-		ret = getreq(conn, ebuf, ebuf_len, &err);
+		ret = get_response(conn, ebuf, ebuf_len, &err);
 		conn->ctx = octx;
 
 #if defined(MG_LEGACY_INTERFACE)
@@ -14410,7 +14443,7 @@ mg_download(const char *host,
 			            "%s",
 			            "Error sending request");
 		} else {
-			getreq(conn, ebuf, ebuf_len, &reqerr);
+			get_response(conn, ebuf, ebuf_len, &reqerr);
 
 #if defined(MG_LEGACY_INTERFACE)
 			/* TODO: 1) uri is deprecated;
@@ -14636,10 +14669,10 @@ process_new_connection(struct mg_connection *conn)
 		conn->handled_requests = 0;
 		do {
 
-			DEBUG_TRACE("calling getreq (%i times for this connection)",
+			DEBUG_TRACE("calling get_request (%i times for this connection)",
 			            conn->handled_requests + 1);
 
-			if (!getreq(conn, ebuf, sizeof(ebuf), &reqerr)) {
+			if (!get_request(conn, ebuf, sizeof(ebuf), &reqerr)) {
 				/* The request sent by the client could not be understood by
 				 * the server, or it was incomplete or a timeout. Send an
 				 * error message and close the connection. */

+ 41 - 24
test/private.c

@@ -70,29 +70,50 @@ START_TEST(test_parse_http_message)
 
 	char req11[] = "GET /\r\nError: X\r\n\r\n";
 
-	ck_assert_int_eq(sizeof(req9) - 1,
-	                 parse_http_message(req9, sizeof(req9), &ri));
-	ck_assert_int_eq(1, ri.num_headers);
 
-	ck_assert_int_eq(sizeof(req1) - 1,
-	                 parse_http_message(req1, sizeof(req1), &ri));
+	ck_assert_int_eq(0, parse_http_request(empty, 0, &ri));
+
+
+	ck_assert_int_eq(strlen(req1), parse_http_request(req1, strlen(req1), &ri));
 	ck_assert_str_eq("1.1", ri.http_version);
 	ck_assert_int_eq(0, ri.num_headers);
 
-	ck_assert_int_eq(-1, parse_http_message(req2, sizeof(req2), &ri));
-	ck_assert_int_eq(0, parse_http_message(req3, sizeof(req3), &ri));
-	ck_assert_int_eq(0, parse_http_message(req6, sizeof(req6), &ri));
-	ck_assert_int_eq(0, parse_http_message(req7, sizeof(req7), &ri));
-	ck_assert_int_eq(0, parse_http_message(empty, 0, &ri));
-	ck_assert_int_eq(sizeof(req8) - 1,
-	                 parse_http_message(req8, sizeof(req8), &ri));
+
+	ck_assert_int_eq(-1, parse_http_request(req2, strlen(req2), &ri));
+
+
+	ck_assert_int_eq(0, parse_http_request(req3, strlen(req3), &ri));
+
 
 	/* Multiline header are obsolete, so return an error
 	 * (https://tools.ietf.org/html/rfc7230#section-3.2.4). */
-	ck_assert_int_eq(-1, parse_http_message(req4, sizeof(req4), &ri));
+	ck_assert_int_eq(-1, parse_http_request(req4, strlen(req4), &ri));
+
 
-	ck_assert_int_eq(sizeof(req10) - 1,
-	                 parse_http_message(req10, sizeof(req10), &ri));
+	ck_assert_int_eq(strlen(req5), parse_http_request(req5, strlen(req5), &ri));
+	ck_assert_str_eq("GET", ri.request_method);
+	ck_assert_str_eq("1.1", ri.http_version);
+
+
+	ck_assert_int_eq(0, parse_http_request(req6, strlen(req6), &ri));
+
+
+	ck_assert_int_eq(0, parse_http_request(req7, strlen(req7), &ri));
+
+
+	ck_assert_int_eq(-1, parse_http_request(req8, strlen(req8), &ri));
+	ck_assert_int_eq(strlen(req8),
+	                 parse_http_response(req8, strlen(req8), &ri));
+
+
+	ck_assert_int_eq(-1, parse_http_request(req9, strlen(req9), &ri));
+	ck_assert_int_eq(strlen(req9),
+	                 parse_http_response(req9, strlen(req9), &ri));
+	ck_assert_int_eq(1, ri.num_headers);
+
+
+	ck_assert_int_eq(strlen(req10),
+	                 parse_http_request(req10, strlen(req10), &ri));
 	ck_assert_str_eq("1.1", ri.http_version);
 	ck_assert_int_eq(2, ri.num_headers);
 	ck_assert_str_eq("A", ri.http_headers[0].name);
@@ -100,12 +121,8 @@ START_TEST(test_parse_http_message)
 	ck_assert_str_eq("B", ri.http_headers[1].name);
 	ck_assert_str_eq("bar", ri.http_headers[1].value);
 
-	ck_assert_int_eq(-1, parse_http_message(req11, sizeof(req11), &ri));
 
-	ck_assert_int_eq(sizeof(req5) - 1,
-	                 parse_http_message(req5, sizeof(req5), &ri));
-	ck_assert_str_eq("GET", ri.request_method);
-	ck_assert_str_eq("1.1", ri.http_version);
+	ck_assert_int_eq(-1, parse_http_request(req11, strlen(req11), &ri));
 }
 END_TEST
 
@@ -126,7 +143,7 @@ START_TEST(test_should_keep_alive)
 
 	memset(&conn, 0, sizeof(conn));
 	conn.ctx = &ctx;
-	ck_assert_int_eq(parse_http_message(req1, sizeof(req1), &conn.request_info),
+	ck_assert_int_eq(parse_http_request(req1, sizeof(req1), &conn.request_info),
 	                 sizeof(req1) - 1);
 
 	ctx.config[ENABLE_KEEP_ALIVE] = no;
@@ -139,13 +156,13 @@ START_TEST(test_should_keep_alive)
 	ck_assert_int_eq(should_keep_alive(&conn), 0);
 
 	conn.must_close = 0;
-	parse_http_message(req2, sizeof(req2), &conn.request_info);
+	parse_http_request(req2, sizeof(req2), &conn.request_info);
 	ck_assert_int_eq(should_keep_alive(&conn), 0);
 
-	parse_http_message(req3, sizeof(req3), &conn.request_info);
+	parse_http_request(req3, sizeof(req3), &conn.request_info);
 	ck_assert_int_eq(should_keep_alive(&conn), 0);
 
-	parse_http_message(req4, sizeof(req4), &conn.request_info);
+	parse_http_request(req4, sizeof(req4), &conn.request_info);
 	ck_assert_int_eq(should_keep_alive(&conn), 1);
 
 	conn.status_code = 401;