瀏覽代碼

Rewrite pattern matching (#499)

Add new file "match.inl" to git
bel2125 3 年之前
父節點
當前提交
2f01cc95f3
共有 2 個文件被更改,包括 208 次插入0 次删除
  1. 1 0
      docs/Embedding.md
  2. 207 0
      src/match.inl

+ 1 - 0
docs/Embedding.md

@@ -31,6 +31,7 @@ but all functions required to run a HTTP server.
     - src/sha1.inl (SHA calculation)
     - src/handle\_form.inl (HTML form handling functions)
     - src/response.inl (helper for generating HTTP response headers)
+    - src/match.inl (pattern matching)
     - src/timer.inl (optional timer support)
     - src/http2.inl (optional HTTP2 support)
   - Optional: C++ wrapper

+ 207 - 0
src/match.inl

@@ -0,0 +1,207 @@
+/* Reimplementation of pattern matching */
+/* This file is part of the CivetWeb web server.
+ * See https://github.com/civetweb/civetweb/
+ */
+
+#if !defined(MG_MATCH_CONTEXT_MAX_MATCHES)
+#define MG_MATCH_CONTEXT_MAX_MATCHES (32)
+#endif
+
+struct mg_match_context {
+	int case_sensitive;
+	size_t num_matches;
+	const char *match_str[MG_MATCH_CONTEXT_MAX_MATCHES];
+	size_t match_len[MG_MATCH_CONTEXT_MAX_MATCHES];
+};
+
+
+/* Add a new match to the list of matches */
+static void
+match_context_push(const char *str, size_t len, struct mg_match_context *mcx)
+{
+	if (mcx->num_matches < MG_MATCH_CONTEXT_MAX_MATCHES) {
+		mcx->match_str[mcx->num_matches] = str;
+		mcx->match_len[mcx->num_matches] = len;
+		mcx->num_matches++;
+	}
+}
+
+
+static ptrdiff_t
+mg_match_impl(const char *pat,
+              size_t pat_len,
+              const char *str,
+              struct mg_match_context *mcx)
+{
+	/* Parse string */
+	size_t i_pat = 0; /* Pattern index */
+	size_t i_str = 0; /* Pattern index */
+
+	uint8_t case_sensitive = ((mcx != NULL) ? mcx->case_sensitive : 0);
+
+	while (i_pat < pat_len) {
+
+		/* Pattern ? matches one character, except / and NULL character */
+		if ((pat[i_pat] == '?') && (str[i_str] != '\0')
+		    && (str[i_str] != '/')) {
+			size_t i_str_start = i_str;
+			do {
+				/* Advance as long as there are ? */
+				i_pat++;
+				i_str++;
+			} while ((pat[i_pat] == '?') && (str[i_str] != '\0')
+			         && (str[i_str] != '/') && (i_pat < pat_len));
+
+			/* If we have a match context, add the substring we just found */
+			if (mcx) {
+				match_context_push(str + i_str_start, i_str - i_str_start, mcx);
+			}
+
+			/* Reached end of pattern ? */
+			if (i_pat == pat_len) {
+				return (ptrdiff_t)i_str;
+			}
+		}
+
+		/* Pattern $ matches end of string */
+		if (pat[i_pat] == '$') {
+			return (str[i_str] == '\0') ? i_str : -1;
+		}
+
+		/* Pattern * or ** matches multiple characters */
+		if (pat[i_pat] == '*') {
+			size_t len; /* lenght matched by "*" or "**" */
+			ptrdiff_t ret;
+
+			i_pat++;
+			if ((pat[i_pat] == '*') && (i_pat < pat_len)) {
+				/* Pattern ** matches all */
+				i_pat++;
+				len = strlen(str + i_str);
+			} else {
+				/* Pattern * matches all except / character */
+				len = strcspn(str + i_str, "/");
+			}
+
+			if (i_pat == pat_len) {
+				/* End of pattern reached. Add all to match context. */
+				if (mcx) {
+					match_context_push(str + i_str, len, mcx);
+				}
+				return (i_str + len);
+			}
+
+			/* This loop searches for the longest possible match */
+			do {
+				ret = mg_match_impl(pat + i_pat,
+				                    (pat_len - (size_t)i_pat),
+				                    str + i_str + len,
+				                    mcx);
+			} while ((ret == -1) && (len-- > 0));
+
+			/* If we have a match context, add the substring we just found */
+			if (ret >= 0) {
+				if (mcx) {
+					match_context_push(str + i_str, len, mcx);
+				}
+				return (i_str + ret + len);
+			}
+
+			return -1;
+		}
+
+
+		/* Single character compare */
+		if (case_sensitive) {
+			if (pat[i_pat] != str[i_str]) {
+				/* case sensitive compare: mismatch */
+				return -1;
+			}
+		} else if (lowercase(&pat[i_pat]) != lowercase(&str[i_str])) {
+			/* case insensitive compare: mismatch */
+			return -1;
+		}
+
+		i_pat++;
+		i_str++;
+	}
+	return (ptrdiff_t)i_str;
+}
+
+
+static ptrdiff_t
+mg_match_alternatives(const char *pat,
+                      size_t pat_len,
+                      const char *str,
+                      struct mg_match_context *mcx)
+{
+	const char *match_alternative = (const char *)memchr(pat, '|', pat_len);
+	while (match_alternative != NULL) {
+		/* Split at | for alternative match */
+		size_t left_size = (size_t)(match_alternative - pat);
+
+		/* Try left string first */
+		ptrdiff_t ret = mg_match_impl(pat, left_size, str, mcx);
+		if (ret >= 0) {
+			/* A 0-byte match is also valid */
+			return ret;
+		}
+
+		/* If no match: try right side */
+		pat += left_size + 1;
+		pat_len -= left_size + 1;
+		match_alternative = (const char *)memchr(pat, '|', pat_len);
+	}
+
+	/* Handled all | operators. This is the final string. */
+	return mg_match_impl(pat, pat_len, str, mcx);
+}
+
+
+/* Export as public API? */
+static ptrdiff_t
+mg_match(const char *pat,
+         size_t pat_len,
+         const char *str,
+         struct mg_match_context *mcx)
+{
+	ptrdiff_t ret;
+	if (mcx != NULL) {
+		mcx->num_matches = 0;
+		memset((void *)(mcx->match_str), 0, sizeof(mcx->match_str));
+		memset(mcx->match_len, 0, sizeof(mcx->match_len));
+	}
+	ret = mg_match_alternatives(pat, pat_len, str, mcx);
+	if (mcx != NULL) {
+		if (ret < 0) {
+			mcx->num_matches = 0;
+			memset((void *)(mcx->match_str), 0, sizeof(mcx->match_str));
+			memset(mcx->match_len, 0, sizeof(mcx->match_len));
+		} else {
+			/* TODO: Join matches */
+		}
+	}
+	return ret;
+}
+
+
+static ptrdiff_t
+match_prefix(const char *pattern, size_t pattern_len, const char *str)
+{
+	if (pattern == NULL) {
+		return -1;
+	}
+	return mg_match_alternatives(pattern, pattern_len, str, NULL);
+}
+
+
+static ptrdiff_t
+match_prefix_strlen(const char *pattern, const char *str)
+{
+	if (pattern == NULL) {
+		return -1;
+	}
+	return mg_match_alternatives(pattern, strlen(pattern), str, NULL);
+}
+
+/* End of match.inl */