Selaa lähdekoodia

Options ssi_extensions, cgi_extensions became cgi_pattern, ssi_pattern

valenok 13 vuotta sitten
vanhempi
commit
ed70793670
4 muutettua tiedostoa jossa 115 lisäystä ja 109 poistoa
  1. 41 20
      mongoose.1
  2. 49 88
      mongoose.c
  3. 19 1
      test/test.pl
  4. 6 0
      test/unit_test.c

+ 41 - 20
mongoose.1

@@ -29,8 +29,22 @@ mongoose listen on HTTP port 80 and HTTPS port 443, one should start it as:
 .Pp
 Unlike other web servers,
 .Nm
-does not expect CGI scripts to be put in a special directory. CGI scripts can
-be anywhere. CGI (and SSI) files are recognized by the file extension.
+does not require CGI scripts be put in a special directory. CGI scripts can
+be anywhere. CGI (and SSI) files are recognized by the file name pattern.
+.Nm
+uses shell-like glob patterns with the following syntax:
+.Bl -tag -compact -width indent
+.It **
+Matches everything
+.It *
+Matches everything but slash character, '/'
+.It ?
+Matches any character
+.It |
+Matches if pattern on the left side or the right side matches. Pattern on the
+left side is matched first
+.El
+All other characters in the pattern match themselves.
 .Pp
 If no arguments are given,
 .Nm
@@ -48,9 +62,11 @@ Add/edit user's password in the passwords file. Deleting users can be done
 with any text editor. Functionality is similar to Apache's
 .Ic htdigest
 utility.
-.It Fl C Ar cgi_extensions
-Comma-separated list of CGI extensions.  All files having these extensions
-are treated as CGI scripts. Default: ".cgi,.pl,.php"
+.It Fl C Ar cgi_pattern
+All files that fully match cgi_pattern are treated as CGI.
+Default pattern allows CGI files be
+anywhere. To restrict CGIs to certain directory, use e.g. "-C /cgi-bin/**.cgi".
+Default: "**.cgi|**.pl|**.php"
 .It Fl E Ar cgi_environment
 Extra environment variables to be passed to the CGI script in addition to
 standard ones. The list must be comma-separated list of X=Y pairs, like this:
@@ -62,8 +78,8 @@ DELETE methods are used. Default: ""
 Use
 .Ar cgi_interpreter
 as a CGI interpreter for all CGI scripts regardless script extension.
-Default: "". Mongoose decides which interpreter to use by looking at
-the first line of a CGI script.
+Mongoose decides which interpreter to use by looking at
+the first line of a CGI script.  Default: "".
 .It Fl M Ar max_request_size
 Maximum HTTP request size in bytes. Default: "16384"
 .It Fl P Ar protect_uri
@@ -71,10 +87,10 @@ Comma separated list of URI=PATH pairs, specifying that given URIs
 must be protected with respected password files. Default: ""
 .It Fl R Ar authentication_domain
 Authorization realm. Default: "mydomain.com"
-.It Fl S Ar ssi_extensions
-Comma separated list of SSI extensions. Unknown SSI directives are silently
-ignored. Currently, two SSI directives supported, "include" and "exec".
-Default: "shtml,shtm"
+.It Fl S Ar ssi_pattern
+All files that fully match ssi_pattern are treated as SSI.
+Unknown SSI directives are silently ignored. Currently, two SSI directives
+are supported, "include" and "exec".  Default: "**.shtml|**.shtm"
 .It Fl a Ar access_log_file
 Access log file. Default: "", no logging is done.
 .It Fl d Ar enable_directory_listing
@@ -109,17 +125,21 @@ prepended to the port number. For example, to bind to a loopback interface
 on port 80 and to all interfaces on HTTPS port 443, use
 "mongoose -p 127.0.0.1:80,443s". Default: "8080"
 .It Fl r Ar document_root
-Location of the WWW root directory. A comma separated list of
-URI_PREFIX=DIRECTORY
-pairs could be appended to it, allowing Mongoose to serve from multiple
-directories. For example, "mongoose -p /var/www,/config=/etc,/garbage=/tmp".
-Default: "."
+Location of the WWW root directory. Default: "."
 .It Fl s Ar ssl_certificate
 Location of SSL certificate file. Default: ""
 .It Fl t Ar num_threads
 Number of worker threads to start. Default: "10"
 .It Fl u Ar run_as_user
 Switch to given user's credentials after startup. Default: ""
+.It Fl w Ar url_rewrite_patterns
+Comma-separated list of URL rewrites in the form of
+"pattern=substitution,..." If the "pattern" matches some prefix
+of the requested URL, then matched prefix gets substituted with "substitution".
+For example, "-w /config=/etc,**.doc|**.rtf=/cgi-bin/handle_doc.cgi"
+will serve all URLs that start with "/config" from the "/etc" directory, and
+call handle_doc.cgi script for .doc and .rtf file requests.
+Default: ""
 .El
 .Pp
 .Sh EMBEDDING
@@ -131,13 +151,14 @@ for details.
 .Pp
 .Sh EXAMPLES
 .Bl -tag -width indent
-.It Nm Fl r Ar /var/www,/aa=/tmp,/bb=/etc Fl s Ar /etc/cert.pem Fl p Ar 8080,8043s
-Start listening on port 8080 for HTTP, and 8043 for HTTPS connections.
-Use /etc/cert.pem as SSL certificate file. Web root is /var/www. In addition,
-map directory /tmp to URI /aa, directory /etc to URI /bb.
+.It Nm Fl r Ar /var/www Fl s Ar /etc/cert.pem Fl p Ar 8080,8043s
+Start serving files from /var/www. Listen on port 8080 for HTTP, and 8043
+for HTTPS connections.  Use /etc/cert.pem as SSL certificate file.
 .It Nm Fl l Ar -0.0.0.0/0,+10.0.0.0/8,+1.2.3.4
 Deny connections from everywhere, allow only IP address 1.2.3.4 and
 all IP addresses from 10.0.0.0/8 subnet to connect.
+.It Nm Fl w Ar **=/my/script.cgi
+Invoke /my/script.cgi for every incoming request, regardless of the URL.
 .El
 .Pp
 .Sh COPYRIGHT

+ 49 - 88
mongoose.c

@@ -410,13 +410,13 @@ enum {
 };
 
 static const char *config_options[] = {
-  "C", "cgi_extensions", ".cgi,.pl,.php",
+  "C", "cgi_pattern", "**.cgi|**.pl|**.php",
   "E", "cgi_environment", NULL,
   "G", "put_delete_passwords_file", NULL,
   "I", "cgi_interpreter", NULL,
   "P", "protect_uri", NULL,
   "R", "authentication_domain", "mydomain.com",
-  "S", "ssi_extensions", ".shtml,.shtm",
+  "S", "ssi_pattern", "**.shtml|**.shtm",
   "a", "access_log_file", NULL,
   "c", "ssl_chain_file", NULL,
   "d", "enable_directory_listing", "yes",
@@ -432,7 +432,7 @@ static const char *config_options[] = {
   "s", "ssl_certificate", NULL,
   "t", "num_threads", "10",
   "u", "run_as_user", NULL,
-  "w", "rewrite", NULL,
+  "w", "url_rewrite_patterns", NULL,
   NULL
 };
 #define ENTRIES_PER_CONFIG_OPTION 3
@@ -761,19 +761,44 @@ static const char *next_option(const char *list, struct vec *val,
   return list;
 }
 
-static int match_extension(const char *path, const char *ext_list) {
-  struct vec ext_vec;
-  size_t path_len;
+static int match_prefix(const char *pattern, int pattern_len, const char *str) {
+  const char *or_str;
+  int i, j, len, res;
 
-  path_len = strlen(path);
+  if ((or_str = memchr(pattern, '|', pattern_len)) != NULL) {
+    res = match_prefix(pattern, or_str - pattern, str);
+    return res > 0 ? res :
+        match_prefix(or_str + 1, (pattern + pattern_len) - (or_str + 1), str);
+  }
 
-  while ((ext_list = next_option(ext_list, &ext_vec, NULL)) != NULL)
-    if (ext_vec.len < path_len &&
-        mg_strncasecmp(path + path_len - ext_vec.len,
-          ext_vec.ptr, ext_vec.len) == 0)
-      return 1;
+  i = j = res = 0;
+  for (; i < pattern_len; i++, j++) {
+    if (pattern[i] == '?' && str[j] != '\0') {
+      continue;
+    } else if (pattern[i] == '*') {
+      i++;
+      if (pattern[i] == '*') {
+        i++;
+        len = strlen(str + j);
+      } else {
+        len = strcspn(str + j, "/");
+      }
+      if (i == pattern_len) {
+        return j + len;
+      }
+      do {
+        res = match_prefix(pattern + i, pattern_len - i, str + j + len);
+      } while (res == 0 && len-- > 0);
+      return res == 0 ? 0 : j + res + len;
+    } else if (pattern[i] != str[j]) {
+      return 0;
+    }
+  }
+  return j;
+}
 
-  return 0;
+static int full_match(const char *path, const char *pattern) {
+  return match_prefix(pattern, strlen(pattern), path) == (int) strlen(path);
 }
 
 // HTTP 1.1 assumes keep alive if "Connection:" header is not set
@@ -1521,74 +1546,15 @@ int mg_get_cookie(const struct mg_connection *conn, const char *cookie_name,
   return len;
 }
 
-// Mongoose allows to specify multiple directories to serve,
-// like /var/www,/~bob=/home/bob. That means that root directory depends on URI.
-// This function returns root dir for given URI.
-static int get_document_root(const struct mg_connection *conn,
-                             struct vec *document_root) {
-  const char *root, *uri;
-  int len_of_matched_uri;
-  struct vec uri_vec, path_vec;
-
-  uri = conn->request_info.uri;
-  len_of_matched_uri = 0;
-  root = next_option(conn->ctx->config[DOCUMENT_ROOT], document_root, NULL);
-
-  while ((root = next_option(root, &uri_vec, &path_vec)) != NULL) {
-    if (memcmp(uri, uri_vec.ptr, uri_vec.len) == 0) {
-      *document_root = path_vec;
-      len_of_matched_uri = uri_vec.len;
-      break;
-    }
-  }
-
-  return len_of_matched_uri;
-}
-
-static int match_prefix(const char *pattern, int pattern_len, const char *str) {
-  const char *or_str;
-  int i, j, len, res;
-
-  if ((or_str = memchr(pattern, '|', pattern_len)) != NULL) {
-    res = match_prefix(or_str + 1, (pattern + pattern_len) - (or_str + 1), str);
-    return res > 0 ? res : match_prefix(pattern, or_str - pattern, str);
-  }
-
-  i = j = res = 0;
-  for (; i < pattern_len; i++, j++) {
-    if (pattern[i] == '?' && str[j] != '\0') {
-      continue;
-    } else if (pattern[i] == '*') {
-      i++;
-      if (pattern[i] == '*') {
-        i++;
-        len = strlen(str + j);
-      } else {
-        len = strcspn(str + j, "/");
-      }
-      if (i == pattern_len) {
-        return j + len;
-      }
-      do {
-        res = match_prefix(pattern + i, pattern_len - i, str + j + len);
-      } while (res == 0 && len-- > 0);
-      return res == 0 ? 0 : j + res + len;
-    } else if (pattern[i] != str[j]) {
-      return 0;
-    }
-  }
-  return j;
-}
-
 static void convert_uri_to_file_name(struct mg_connection *conn,
                                      const char *uri, char *buf,
                                      size_t buf_len) {
-  struct vec vec, a, b;
+  struct vec a, b;
   const char *rewrite;
   int match_len;
 
-  match_len = get_document_root(conn, &vec);
-  mg_snprintf(conn, buf, buf_len, "%.*s%s", vec.len, vec.ptr, uri + match_len);
+  mg_snprintf(conn, buf, buf_len, "%s%s", conn->ctx->config[DOCUMENT_ROOT],
+              uri);
 
   rewrite = conn->ctx->config[REWRITE];
   while ((rewrite = next_option(rewrite, &a, &b)) != NULL) {
@@ -2852,18 +2818,16 @@ static void prepare_cgi_environment(struct mg_connection *conn,
                                     const char *prog,
                                     struct cgi_env_block *blk) {
   const char *s, *slash;
-  struct vec var_vec, root;
+  struct vec var_vec;
   char *p;
   int  i;
 
   blk->len = blk->nvars = 0;
   blk->conn = conn;
 
-  get_document_root(conn, &root);
-
   addenv(blk, "SERVER_NAME=%s", conn->ctx->config[AUTHENTICATION_DOMAIN]);
-  addenv(blk, "SERVER_ROOT=%.*s", root.len, root.ptr);
-  addenv(blk, "DOCUMENT_ROOT=%.*s", root.len, root.ptr);
+  addenv(blk, "SERVER_ROOT=%s", conn->ctx->config[DOCUMENT_ROOT]);
+  addenv(blk, "DOCUMENT_ROOT=%s", conn->ctx->config[DOCUMENT_ROOT]);
 
   // Prepare the environment block
   addenv(blk, "%s", "GATEWAY_INTERFACE=CGI/1.1");
@@ -3139,18 +3103,15 @@ static void send_ssi_file(struct mg_connection *, const char *, FILE *, int);
 static void do_ssi_include(struct mg_connection *conn, const char *ssi,
                            char *tag, int include_level) {
   char file_name[BUFSIZ], path[PATH_MAX], *p;
-  struct vec root;
   int is_ssi;
   FILE *fp;
 
-  get_document_root(conn, &root);
-
   // sscanf() is safe here, since send_ssi_file() also uses buffer
   // of size BUFSIZ to get the tag. So strlen(tag) is always < BUFSIZ.
   if (sscanf(tag, " virtual=\"%[^\"]\"", file_name) == 1) {
     // File name is relative to the webserver root
-    (void) mg_snprintf(conn, path, sizeof(path), "%.*s%c%s",
-        root.len, root.ptr, DIRSEP, file_name);
+    (void) mg_snprintf(conn, path, sizeof(path), "%s%c%s",
+        conn->ctx->config[DOCUMENT_ROOT], DIRSEP, file_name);
   } else if (sscanf(tag, " file=\"%[^\"]\"", file_name) == 1) {
     // File name is relative to the webserver working directory
     // or it is absolute system path
@@ -3173,7 +3134,7 @@ static void do_ssi_include(struct mg_connection *conn, const char *ssi,
         tag, path, strerror(ERRNO));
   } else {
     set_close_on_exec(fileno(fp));
-    is_ssi = match_extension(path, conn->ctx->config[SSI_EXTENSIONS]);
+    is_ssi = full_match(path, conn->ctx->config[SSI_EXTENSIONS]);
     if (is_ssi) {
       send_ssi_file(conn, path, fp, include_level + 1);
     } else {
@@ -3408,7 +3369,7 @@ static void handle_request(struct mg_connection *conn) {
           "Directory listing denied");
     }
 #if !defined(NO_CGI)
-  } else if (match_extension(path, conn->ctx->config[CGI_EXTENSIONS])) {
+  } else if (full_match(path, conn->ctx->config[CGI_EXTENSIONS])) {
     if (strcmp(ri->request_method, "POST") &&
         strcmp(ri->request_method, "GET")) {
       send_http_error(conn, 501, "Not Implemented",
@@ -3417,7 +3378,7 @@ static void handle_request(struct mg_connection *conn) {
       handle_cgi_request(conn, path);
     }
 #endif // !NO_CGI
-  } else if (match_extension(path, conn->ctx->config[SSI_EXTENSIONS])) {
+  } else if (full_match(path, conn->ctx->config[SSI_EXTENSIONS])) {
     handle_ssi_file_request(conn, path);
   } else if (is_not_modified(conn, &st)) {
     send_http_error(conn, 304, "Not Modified", "");

+ 19 - 1
test/test.pl

@@ -150,6 +150,11 @@ if (scalar(@ARGV) > 0 and $ARGV[0] eq 'embedded') {
   exit 0;
 }
 
+if (scalar(@ARGV) > 0 and $ARGV[0] eq 'unit') {
+  do_unit_test();
+  exit 0;
+}
+
 # Make sure we load config file if no options are given.
 # Command line options override config files settings
 write_file($config, "access_log_file access.log\nlistening_ports 12345\n");
@@ -166,7 +171,7 @@ my $cmd = "$exe $config -listening_ports $port -access_log_file access.log ".
 "-extra_mime_types .bar=foo/bar,.tar.gz=blah,.baz=foo " .
 '-put_delete_passwords_file test/passfile ' .
 '-access_control_list -0.0.0.0/0,+127.0.0.1 ' .
-"-document_root $root,/aiased=/etc/,/ta=$test_dir";
+"-document_root $root -url_rewrite_patterns /aiased=/etc/,/ta=$test_dir";
 $cmd .= ' -cgi_interpreter perl' if on_windows();
 spawn($cmd);
 
@@ -386,6 +391,7 @@ unless (scalar(@ARGV) > 0 and $ARGV[0] eq "basic_tests") {
 
   do_PUT_test();
   kill_spawned_child();
+  do_unit_test();
   do_embedded_test();
 }
 
@@ -415,6 +421,18 @@ sub do_PUT_test {
     "HTTP/1.1 100 Continue.+HTTP/1.1 200", 'PUT 100-Continue');
 }
 
+sub do_unit_test {
+  my $cmd = "cc -W -Wall -o $unit_test_exe $root/unit_test.c -I. ".
+  "-pthread -DNO_SSL ";
+  if (on_windows()) {
+    $cmd = "cl $root/embed.c mongoose.c /I. /nologo /DNO_SSL ".
+    "/DLISTENING_PORT=\\\"$port\\\" /link /out:$embed_exe.exe ws2_32.lib ";
+  }
+  print $cmd, "\n";
+  system($cmd) == 0 or fail("Cannot compile unit test");
+  system($unit_test_exe) == 0 or fail("Unit test failed!");
+}
+
 sub do_embedded_test {
   my $cmd = "cc -W -Wall -o $embed_exe $root/embed.c mongoose.c -I. ".
   "-pthread -DNO_SSL -DLISTENING_PORT=\\\"$port\\\"";

+ 6 - 0
test/unit_test.c

@@ -8,6 +8,12 @@ int main(void) {
   assert(match_prefix("/*", 2, "/a/b/c") == 2);
   assert(match_prefix("*/*", 3, "/a/b/c") == 2);
   assert(match_prefix("**/", 3, "/a/b/c") == 5);
+  assert(match_prefix("**.foo|**.bar", 13, "a.bar") == 5);
+  assert(match_prefix("a|b|cd", 6, "cdef") == 2);
+  assert(match_prefix("a|b|c?", 6, "cdef") == 2);
+  assert(match_prefix("a|?|cd", 6, "cdef") == 1);
+  assert(match_prefix("/a/**.cgi", 9, "/foo/bar/x.cgi") == 0);
+  assert(match_prefix("/a/**.cgi", 9, "/a/bar/x.cgi") == 12);
 
   return 0;
 }