Parcourir la source

Pattern matching: ? should not match /

Add unit tests and examples
bel2125 il y a 3 ans
Parent
commit
185e845cd9
3 fichiers modifiés avec 172 ajouts et 19 suppressions
  1. 11 5
      docs/UserManual.md
  2. 37 14
      src/civetweb.c
  3. 124 0
      unittest/private.c

+ 11 - 5
docs/UserManual.md

@@ -92,16 +92,22 @@ Pattern match starts at the beginning of the string, so essentially
 patterns are prefix patterns. Syntax is as follows:
 
      **      Matches everything
-     *       Matches everything but slash character, '/'
-     ?       Matches any character
+     *       Matches everything but the slash character ('/')
+     ?       Matches any character but the slash character ('/')
      $       Matches the end of the string
      |       Matches if pattern on the left side or the right side matches.
 
 All other characters in the pattern match themselves. Examples:
 
-    **.cgi$      Any string that ends with .cgi
-    /foo         Any string that begins with /foo
-    **a$|**b$    Any string that ends with a or b
+    **.cgi$          Any string that ends with .cgi
+    /foo             Any string that begins with /foo
+    **a$|**b$        Any string that ends with a or b
+    
+    /data/????.css$  Matches css files with 4 letter names in "/data" folder.
+    /data/*.js$      Matches all js file names in "/data" folder.
+    /api/*/*.cgi$    Matches "/api/resourcetype/resourcename.cgi"
+    /*.jpg$|/*.jpeg$ JPG and JPEG files in root folder
+    **.jpg$|**.jpeg$ JPG and JPEG files anywhere
 
 
 ## Options from `civetweb.c`

+ 37 - 14
src/civetweb.c

@@ -3972,43 +3972,66 @@ header_has_option(const char *header, const char *option)
 static ptrdiff_t
 match_prefix(const char *pattern, size_t pattern_len, const char *str)
 {
-	const char *or_str;
+	const char *or_str = (const char *)memchr(pattern, '|', pattern_len);
 	ptrdiff_t i, j, len, res;
 
-	if ((or_str = (const char *)memchr(pattern, '|', pattern_len)) != NULL) {
+	if (or_str != NULL) {
+		/* Split at | for alternative match */
 		res = match_prefix(pattern, (size_t)(or_str - pattern), str);
-		return (res > 0) ? res
-		                 : match_prefix(or_str + 1,
-		                                (size_t)((pattern + pattern_len)
-		                                         - (or_str + 1)),
-		                                str);
+		if (res > 0) {
+			return res;
+		}
+		return match_prefix(or_str + 1,
+		                    (size_t)((pattern + pattern_len) - (or_str + 1)),
+		                    str);
 	}
 
-	for (i = 0, j = 0; (i < (ptrdiff_t)pattern_len); i++, j++) {
-		if ((pattern[i] == '?') && (str[j] != '\0')) {
+	/* Parse string */
+	i = 0; /* index of pattern */
+	j = 0; /* index of str */
+	while (i < (ptrdiff_t)pattern_len) {
+		/* Pattern ? matches one character, except / */
+		if ((pattern[i] == '?') && (str[j] != '\0') && (str[j] != '/')) {
+			i++;
+			j++;
 			continue;
-		} else if (pattern[i] == '$') {
+		}
+
+		/* Pattern $ matches end of string */
+		if (pattern[i] == '$') {
 			return (str[j] == '\0') ? j : -1;
-		} else if (pattern[i] == '*') {
+		}
+
+		/* Pattern * or ** matches multiple characters */
+		if (pattern[i] == '*') {
 			i++;
 			if (pattern[i] == '*') {
+				/* Pattern ** matches all */
 				i++;
 				len = (ptrdiff_t)strlen(str + j);
 			} else {
+				/* Pattern * matches all except / character */
 				len = (ptrdiff_t)strcspn(str + j, "/");
 			}
+
 			if (i == (ptrdiff_t)pattern_len) {
+				/* End of pattern reached */
 				return j + len;
 			}
 			do {
 				res = match_prefix(pattern + i,
 				                   (pattern_len - (size_t)i),
 				                   str + j + len);
-			} while (res == -1 && len-- > 0);
-			return (res == -1) ? -1 : j + res + len;
+			} while ((res == -1) && (len-- > 0));
+
+			return (res == -1) ? -1 : (j + res + len);
+
 		} else if (lowercase(&pattern[i]) != lowercase(&str[j])) {
+			/* case insensitive compare: mismatch */
 			return -1;
 		}
+		i++;
+		j++;
 	}
 	return (ptrdiff_t)j;
 }
@@ -13777,7 +13800,7 @@ handle_websocket_request(struct mg_connection *conn,
 			    conn->dom_ctx->config[LUA_WEBSOCKET_EXTENSIONS], path);
 		}
 
-		if (lua_websock) {
+		if (lua_websock > 0) {
 			/* Step 3.2: Lua is responsible: call it. */
 			conn->lua_websocket_state = lua_websocket_new(path, conn);
 			if (!conn->lua_websocket_state) {

+ 124 - 0
unittest/private.c

@@ -324,6 +324,129 @@ START_TEST(test_match_prefix)
 END_TEST
 
 
+START_TEST(test_match_prefix_strlen)
+{
+	/* Copyright (c) 2022 the CivetWeb developers */
+	ck_assert_int_eq(5, match_prefix_strlen("/Test", "/test"));
+	ck_assert_int_eq(-1, match_prefix_strlen("/Test", "/my/test"));
+	ck_assert_int_eq(3, match_prefix_strlen("/my", "/my/test"));
+	ck_assert_int_eq(-1, match_prefix_strlen("/my$", "/my/test"));
+	ck_assert_int_eq(8, match_prefix_strlen("/*/Test", "/my/test"));
+
+	ck_assert_int_eq(17,
+	                 match_prefix_strlen("/api/*/*.cgi", "/api/obj/prop.cgi"));
+	ck_assert_int_eq(-1,
+	                 match_prefix_strlen("/abc/*/*.cgi", "/api/obj/prop.cgi"));
+	ck_assert_int_eq(18,
+	                 match_prefix_strlen("/api/*/*.cgi", "/api/obj/other.cgi"));
+	ck_assert_int_eq(-1,
+	                 match_prefix_strlen("/api/*/*.cgi",
+	                                     "/api/obj/too/deep.cgi"));
+	ck_assert_int_eq(17,
+	                 match_prefix_strlen("/api/*/*.cgi$", "/api/obj/prop.cgi"));
+	ck_assert_int_eq(18,
+	                 match_prefix_strlen("/api/*/*.cgi$",
+	                                     "/api/obj/other.cgi"));
+	ck_assert_int_eq(17,
+	                 match_prefix_strlen("/api/*/*.cgi",
+	                                     "/api/obj/prop.cgiZZZ"));
+	ck_assert_int_eq(-1,
+	                 match_prefix_strlen("/api/*/*.cgi$",
+	                                     "/api/obj/prop.cgiZZZ"));
+	ck_assert_int_eq(-1, match_prefix_strlen("/*/*.cgi", "/api/obj/prop.cgi"));
+
+	ck_assert_int_eq(7, match_prefix_strlen("I????IT", "ItestIT"));
+	ck_assert_int_eq(-1, match_prefix_strlen("I????IT", "IseeIT"));
+	ck_assert_int_eq(23, match_prefix_strlen("**$", "EveryThing/matches this"));
+	ck_assert_int_eq(23,
+	                 match_prefix_strlen("?**$", "EveryThing/matches this"));
+	ck_assert_int_eq(23,
+	                 match_prefix_strlen("**?$", "EveryThing/matches this"));
+	ck_assert_int_eq(0, match_prefix_strlen("**$", ""));
+	ck_assert_int_eq(-1, match_prefix_strlen("?**$", ""));
+	ck_assert_int_eq(-1, match_prefix_strlen("**?$", ""));
+	ck_assert_int_eq(-1, match_prefix_strlen("/**?$", "/"));
+	ck_assert_int_eq(1, match_prefix_strlen("/**$", "/"));
+	ck_assert_int_eq(-1, match_prefix_strlen("//", "/"));
+	ck_assert_int_eq(1, match_prefix_strlen("/", "//"));
+	ck_assert_int_eq(-1, match_prefix_strlen("/$", "//"));
+
+	/* ? pattern should not match / character */
+	ck_assert_int_eq(-1, match_prefix_strlen("/?", "//"));
+	ck_assert_int_eq(3, match_prefix_strlen("/?/$", "/a/"));
+	ck_assert_int_eq(-1, match_prefix_strlen("/?/$", "///"));
+
+	/* Pattern From UserManual.md */
+	ck_assert_int_eq(20,
+	                 match_prefix_strlen("**.cgi$", "anywhere/anyname.cgi"));
+	ck_assert_int_eq(-1, match_prefix_strlen("**.cgi$", "name.cgi.not.at.end"));
+	ck_assert_int_eq(4, match_prefix_strlen("/foo", "/foo"));
+	ck_assert_int_eq(4, match_prefix_strlen("/foo", "/foobar"));
+	ck_assert_int_eq(-1, match_prefix_strlen("/foo", "not.at.start./foo"));
+	ck_assert_int_eq(1, match_prefix_strlen("**a$|**b$", "a"));
+	ck_assert_int_eq(2, match_prefix_strlen("**a$|**b$", "xb"));
+	ck_assert_int_eq(-1, match_prefix_strlen("**a$|**b$", "abc"));
+
+	ck_assert_int_eq(14,
+	                 match_prefix_strlen("/data/????.css$", "/data/12.4.css"));
+	ck_assert_int_eq(-1,
+	                 match_prefix_strlen("/data/????.css$", "/data/12/4.css"));
+	ck_assert_int_eq(-1,
+	                 match_prefix_strlen("/data/????.css$", "/data/../4.css"));
+	ck_assert_int_eq(-1,
+	                 match_prefix_strlen("/data/????.css$", "/else/12.4.css"));
+	ck_assert_int_eq(-1,
+	                 match_prefix_strlen("/data/????.css$", "/data/1234.cssx"));
+	ck_assert_int_eq(13, match_prefix_strlen("/data/*.js$", "/data/1234.js"));
+	ck_assert_int_eq(17,
+	                 match_prefix_strlen("/data/*.js$", "/data/12345678.js"));
+	ck_assert_int_eq(-1, match_prefix_strlen("/data/*.js$", "/else/1234.js"));
+	ck_assert_int_eq(-1,
+	                 match_prefix_strlen("/data/*.js$", "/data/../some.js"));
+	ck_assert_int_eq(-1, match_prefix_strlen("/data/*.js$", "/data//x.js"));
+	ck_assert_int_eq(-1, match_prefix_strlen("/data/*.js$", "/data/./x.js"));
+	ck_assert_int_eq(34,
+	                 match_prefix_strlen("/api/*/*.cgi$",
+	                                     "/api/resourcetype/resourcename.cgi"));
+	ck_assert_int_eq(-1,
+	                 match_prefix_strlen("/api/*/*.cgi$",
+	                                     "/api/resourcename.cgi"));
+	ck_assert_int_eq(-1,
+	                 match_prefix_strlen("/*.jpg$|/*.jpeg$",
+	                                     "/somewhere/something.txt"));
+	ck_assert_int_eq(-1,
+	                 match_prefix_strlen("/*.jpg$|/*.jpeg$", "/something.txt"));
+	ck_assert_int_eq(10, match_prefix_strlen("/*.jpg$|/*.jpeg$", "/image.jpg"));
+	ck_assert_int_eq(11,
+	                 match_prefix_strlen("/*.jpg$|/*.jpeg$", "/image.jpeg"));
+	ck_assert_int_eq(-1,
+	                 match_prefix_strlen("/*.jpg$|/*.jpeg$",
+	                                     "/image.jpeg.exe"));
+	ck_assert_int_eq(-1,
+	                 match_prefix_strlen("/*.jpg$|/*.jpeg$", "/sub/image.jpg"));
+	ck_assert_int_eq(-1,
+	                 match_prefix_strlen("/*.jpg$|/*.jpeg$",
+	                                     "/sub/image.jpeg"));
+	ck_assert_int_eq(-1,
+	                 match_prefix_strlen("**.jpg$|**.jpeg$",
+	                                     "/somewhere/something.txt"));
+	ck_assert_int_eq(-1,
+	                 match_prefix_strlen("**.jpg$|**.jpeg$", "/something.txt"));
+	ck_assert_int_eq(10, match_prefix_strlen("**.jpg$|**.jpeg$", "/image.jpg"));
+	ck_assert_int_eq(11,
+	                 match_prefix_strlen("**.jpg$|**.jpeg$", "/image.jpeg"));
+	ck_assert_int_eq(-1,
+	                 match_prefix_strlen("**.jpg$|**.jpeg$",
+	                                     "/image.jpeg.exe"));
+	ck_assert_int_eq(14,
+	                 match_prefix_strlen("**.jpg$|**.jpeg$", "/sub/image.jpg"));
+	ck_assert_int_eq(15,
+	                 match_prefix_strlen("**.jpg$|**.jpeg$",
+	                                     "/sub/image.jpeg"));
+}
+END_TEST
+
+
 START_TEST(test_remove_dot_segments)
 {
 	int i;
@@ -1365,6 +1488,7 @@ make_private_suite(void)
 	suite_add_tcase(suite, tcase_http_keep_alive);
 
 	tcase_add_test(tcase_url_parsing_1, test_match_prefix);
+	tcase_add_test(tcase_url_parsing_1, test_match_prefix_strlen);
 	tcase_set_timeout(tcase_url_parsing_1, civetweb_min_test_timeout);
 	suite_add_tcase(suite, tcase_url_parsing_1);