浏览代码

Rewrite pattern matching (#499)

Pattern matching caused several problems. For details see #499 and
issues referenced therein. There are only minor incompatibilities
as compared to the original behavior:

Now: The ? pattern will match every character, except '\0' and '/'
Before: The ? pattern will match every character except '\0'
Reasoning: It is not useful to have ? match /.
E.g. "???" would match "abc" as well as "a/c" as well as "../".

Now: The "|x" pattern will match 0 characters.
Before: The result was -1, but this was inconsistent with "x|".
The order of an empty match pattern in a match containing "|" had
influence if the empty match returned -1 or 0.

Furthermore a pattern "**" with pattern_length of 1 lead to a crash.
This has been fixed as well.
bel2125 3 年之前
父节点
当前提交
0874322e76

+ 1 - 0
VisualStudio/civetweb_lua/civetweb_lua.vcxproj

@@ -207,6 +207,7 @@
   </ItemGroup>
   <ItemGroup>
     <None Include="..\..\src\handle_form.inl" />
+    <None Include="..\..\src\match.inl" />
     <None Include="..\..\src\md5.inl" />
     <None Include="..\..\src\http2.inl" />
     <None Include="..\..\src\mod_lua_shared.inl" />

+ 1 - 0
VisualStudio/civetweb_lua/civetweb_lua.vcxproj.filters

@@ -27,5 +27,6 @@
     <None Include="..\..\src\timer.inl" />
     <None Include="..\..\src\mod_mbedtls.inl" />
     <None Include="..\..\src\openssl_dl.inl" />
+    <None Include="..\..\src\match.inl" />
   </ItemGroup>
 </Project>

+ 1 - 0
format.bat

@@ -5,6 +5,7 @@ clang-format -i src/CivetServer.cpp
 clang-format -i src/civetweb_private_lua.h
 clang-format -i src/md5.inl
 clang-format -i src/sha1.inl
+clang-format -i src/match.inl
 clang-format -i src/mod_lua.inl
 clang-format -i src/mod_lua_shared.inl
 clang-format -i src/mod_duktape.inl

+ 4 - 78
src/civetweb.c

@@ -3968,83 +3968,8 @@ header_has_option(const char *header, const char *option)
 }
 
 
-/* Perform case-insensitive match of string against pattern */
-static ptrdiff_t
-match_prefix(const char *pattern, size_t pattern_len, const char *str)
-{
-	const char *or_str = (const char *)memchr(pattern, '|', pattern_len);
-	ptrdiff_t i, j, len, res;
-
-	if (or_str != NULL) {
-		/* Split at | for alternative match */
-		res = match_prefix(pattern, (size_t)(or_str - pattern), str);
-		if (res > 0) {
-			return res;
-		}
-		return match_prefix(or_str + 1,
-		                    (size_t)((pattern + pattern_len) - (or_str + 1)),
-		                    str);
-	}
-
-	/* Parse string */
-	i = 0; /* index of pattern */
-	j = 0; /* index of str */
-	while (i < (ptrdiff_t)pattern_len) {
-		/* Pattern ? matches one character, except / */
-		if ((pattern[i] == '?') && (str[j] != '\0') && (str[j] != '/')) {
-			i++;
-			j++;
-			continue;
-		}
-
-		/* Pattern $ matches end of string */
-		if (pattern[i] == '$') {
-			return (str[j] == '\0') ? j : -1;
-		}
-
-		/* Pattern * or ** matches multiple characters */
-		if (pattern[i] == '*') {
-			i++;
-			if ((pattern[i] == '*') && (i < (ptrdiff_t)pattern_len)) {
-				/* Pattern ** matches all */
-				i++;
-				len = (ptrdiff_t)strlen(str + j);
-			} else {
-				/* Pattern * matches all except / character */
-				len = (ptrdiff_t)strcspn(str + j, "/");
-			}
-
-			if (i == (ptrdiff_t)pattern_len) {
-				/* End of pattern reached */
-				return j + len;
-			}
-			do {
-				res = match_prefix(pattern + i,
-				                   (pattern_len - (size_t)i),
-				                   str + j + len);
-			} while ((res == -1) && (len-- > 0));
-
-			return (res == -1) ? -1 : (j + res + len);
-
-		} else if (lowercase(&pattern[i]) != lowercase(&str[j])) {
-			/* case insensitive compare: mismatch */
-			return -1;
-		}
-		i++;
-		j++;
-	}
-	return (ptrdiff_t)j;
-}
-
-
-static ptrdiff_t
-match_prefix_strlen(const char *pattern, const char *str)
-{
-	if (pattern == NULL) {
-		return -1;
-	}
-	return match_prefix(pattern, strlen(pattern), str);
-}
+/* Pattern matching has been reimplemented in a new file */
+#include "match.inl"
 
 
 /* HTTP 1.1 assumes keep alive if "Connection:" header is not set
@@ -17984,6 +17909,8 @@ mg_connect_client_impl(const struct mg_client_options *client_options,
 	size_t ctx_size = ((sizeof(struct mg_context) + 7) >> 3) << 3;
 	size_t alloc_size = conn_size + ctx_size + max_req_size;
 
+	(void)init; /* TODO: Implement required options */
+
 	conn = (struct mg_connection *)mg_calloc(1, alloc_size);
 
 	if (error != NULL) {
@@ -18252,7 +18179,6 @@ mg_connect_client2(const char *host,
                    struct mg_error_data *error)
 {
 	(void)path;
-	(void)init;
 
 	int is_ssl, is_ws;
 	/* void *user_data = (init != NULL) ? init->user_data : NULL; -- TODO */

+ 1 - 1
src/main.c

@@ -1279,7 +1279,7 @@ start_civetweb(int argc, char *argv[])
 	}
 
 	/* Initialize options structure */
-	memset(options, 0, sizeof(options));
+	memset((void *)options, 0, sizeof(options));
 	set_option(options, "document_root", ".");
 
 	/* Update config based on command line arguments */

+ 61 - 0
unittest/private.c

@@ -652,6 +652,66 @@ START_TEST(test_match_prefix_fuzz)
 END_TEST
 
 
+START_TEST(test_mg_match)
+{
+	/* Copyright (c) 2022 the CivetWeb developers */
+	struct mg_match_context mcx;
+
+	ck_assert_int_eq(4, mg_match("a*D", 3, "abcde", NULL));
+
+	memset(&mcx, 0, sizeof(mcx));
+	mcx.case_sensitive = 0;
+	ck_assert_int_eq(4, mg_match("a*D", 3, "abcde", &mcx));
+	ck_assert_int_eq(1, (int)mcx.num_matches);
+	ck_assert_int_eq(2, (int)mcx.match_len[0]);
+	ck_assert(!memcmp(mcx.match_str[0], "bc", 2));
+
+	memset(&mcx, 0, sizeof(mcx));
+	mcx.case_sensitive = 1;
+	ck_assert_int_eq(-1, mg_match("a*D", 3, "abcde", &mcx));
+	ck_assert_int_eq(0, (int)mcx.num_matches);
+
+	memset(&mcx, 0, sizeof(mcx));
+	mcx.case_sensitive = 1;
+	ck_assert_int_eq(4, mg_match("a??d", 4, "abcde", &mcx));
+	ck_assert_int_eq(1, (int)mcx.num_matches);
+	ck_assert_int_eq(2, (int)mcx.match_len[0]);
+	ck_assert(!memcmp(mcx.match_str[0], "bc", 2));
+
+	memset(&mcx, 0, sizeof(mcx));
+	mcx.case_sensitive = 1;
+	ck_assert_int_eq(5, mg_match("a??d*", 5, "abcde", &mcx));
+	ck_assert_int_eq(2, (int)mcx.num_matches);
+	ck_assert_int_eq(2, (int)mcx.match_len[0]);
+	ck_assert(!memcmp(mcx.match_str[0], "bc", 2));
+	ck_assert_int_eq(1, (int)mcx.match_len[1]);
+	ck_assert(!memcmp(mcx.match_str[1], "e", 1));
+
+	memset(&mcx, 0, sizeof(mcx));
+	mcx.case_sensitive = 1;
+	ck_assert_int_eq(4, mg_match("a??d*", 5, "abcd", &mcx));
+	ck_assert_int_eq(2, (int)mcx.num_matches);
+	ck_assert_int_eq(2, (int)mcx.match_len[0]);
+	ck_assert(!memcmp(mcx.match_str[0], "bc", 2));
+	ck_assert_int_eq(0, (int)mcx.match_len[1]);
+
+	memset(&mcx, 0, sizeof(mcx));
+	mcx.case_sensitive = 0;
+	ck_assert_int_eq(2, mg_match("a?|?B", 5, "ABC", &mcx));
+	ck_assert_int_eq(1, (int)mcx.num_matches);
+	ck_assert_int_eq(1, (int)mcx.match_len[0]);
+	ck_assert(!memcmp(mcx.match_str[0], "B", 1));
+
+	memset(&mcx, 0, sizeof(mcx));
+	mcx.case_sensitive = 1;
+	ck_assert_int_eq(2, mg_match("a?|?B", 5, "ABC", &mcx));
+	ck_assert_int_eq(1, (int)mcx.num_matches);
+	ck_assert_int_eq(1, (int)mcx.match_len[0]);
+	ck_assert(!memcmp(mcx.match_str[0], "A", 1));
+}
+END_TEST
+
+
 START_TEST(test_remove_dot_segments)
 {
 	int i;
@@ -1695,6 +1755,7 @@ make_private_suite(void)
 	tcase_add_test(tcase_url_parsing_1, test_match_prefix);
 	tcase_add_test(tcase_url_parsing_1, test_match_prefix_strlen);
 	tcase_add_test(tcase_url_parsing_1, test_match_prefix_fuzz);
+	tcase_add_test(tcase_url_parsing_1, test_mg_match);
 	tcase_set_timeout(tcase_url_parsing_1, civetweb_min_test_timeout);
 	suite_add_tcase(suite, tcase_url_parsing_1);