From a5c151646821e57546fa680090056d4e7a3a8af9 Mon Sep 17 00:00:00 2001
From: Lorenz Stechauner <lorenz.stechauner@necronda.net>
Date: Fri, 18 Dec 2020 22:06:41 +0100
Subject: [PATCH] Added uri parsing

---
 src/client.c |  79 ++++++++++++++++++++++-----
 src/uri.c    | 149 +++++++++++++++++++++++++++++++++++++++++++++++----
 src/uri.h    |  25 +++++----
 src/utils.h  |   5 +-
 4 files changed, 224 insertions(+), 34 deletions(-)

diff --git a/src/client.c b/src/client.c
index 6828400..1255068 100644
--- a/src/client.c
+++ b/src/client.c
@@ -46,6 +46,15 @@ int client_request_handler(sock *client, int req_num) {
     char *host, *hdr_connection, *webroot;
     unsigned long content_length = 0;
 
+    http_res res;
+    sprintf(res.version, "1.1");
+    res.status = http_get_status(501);
+    res.hdr.field_num = 0;
+    http_add_header_field(&res.hdr, "Date", http_get_date(buf, sizeof(buf)));
+    http_add_header_field(&res.hdr, "Server", SERVER_STR);
+
+    clock_gettime(CLOCK_MONOTONIC, &begin);
+
     fd_set socket_fds;
     FD_ZERO(&socket_fds);
     FD_SET(client->socket, &socket_fds);
@@ -53,15 +62,15 @@ int client_request_handler(sock *client, int req_num) {
     client_timeout.tv_usec = 0;
     ret = select(client->socket + 1, &socket_fds, NULL, NULL, &client_timeout);
     if (ret <= 0) {
-        return 1;
+        if (errno != 0) {
+            return 1;
+        }
+        client_keep_alive = 0;
+        res.status = http_get_status(408);
+        goto respond;
     }
     clock_gettime(CLOCK_MONOTONIC, &begin);
 
-    http_res res;
-    sprintf(res.version, "1.1");
-    res.status = http_get_status(501);
-    res.hdr.field_num = 0;
-
     http_req req;
     ret = http_receive_request(client, &req);
     if (ret != 0) {
@@ -94,11 +103,54 @@ int client_request_handler(sock *client, int req_num) {
 
     webroot = get_webroot(host);
     http_uri uri;
-    uri_init(&uri, webroot, req.uri);
+    ret = uri_init(&uri, webroot, req.uri, URI_DIR_MODE_INFO);
+    if (ret != 0) {
+        if (ret == 1) {
+            sprintf(err_msg, "Invalid URI: has to start with slash.");
+        } else if (ret == 2) {
+            sprintf(err_msg, "Invalid URI: contains relative path change (/../).");
+        }
+        res.status = http_get_status(400);
+        goto respond;
+    }
+
+    /*
+    print("webroot:       %s", uri.webroot);
+    print("req_path:      %s", uri.req_path);
+    print("path:          %s", uri.path);
+    print("pathinfo:      %s", uri.pathinfo);
+    print("query:         %s", uri.query);
+    print("filename:      %s", uri.filename);
+    print("filename_comp: %s", uri.filename_comp);
+    print("uri:           %s", uri.uri);
+    print("is_static:     %i", uri.is_static);
+    print("is_dir:        %i", uri.is_dir);
+     */
+
+    if (strcmp(uri.uri, req.uri) != 0) {
+        res.status = http_get_status(308);
+        http_add_header_field(&res.hdr, "Location", uri.uri);
+        goto respond;
+    }
+
+    if (uri.filename == NULL && (int) uri.is_static && (int) uri.is_dir && strlen(uri.pathinfo) == 0) {
+        res.status = http_get_status(403);
+        sprintf(err_msg, "It is not allowed to list the contents of this directory.");
+        goto respond;
+    } else if (uri.filename == NULL && (int) !uri.is_static && (int) uri.is_dir && strlen(uri.pathinfo) == 0) {
+        res.status = http_get_status(501);
+        sprintf(err_msg, "Listing contents of an directory is currently not implemented.");
+        // TODO list directory contents
+        goto respond;
+    } else if (uri.filename == NULL || (strlen(uri.pathinfo) > 0 && (int) uri.is_static)) {
+        res.status = http_get_status(404);
+        goto respond;
+    }
+
+
+
 
     respond:
-    http_add_header_field(&res.hdr, "Date", http_get_date(buf, sizeof(buf)));
-    http_add_header_field(&res.hdr, "Server", SERVER_STR);
     if (server_keep_alive && client_keep_alive) {
         http_add_header_field(&res.hdr, "Connection", "keep-alive");
         sprintf(buf, "timeout=%i, max=%i", CLIENT_TIMEOUT, REQ_PER_CONNECTION);
@@ -107,7 +159,7 @@ int client_request_handler(sock *client, int req_num) {
         http_add_header_field(&res.hdr, "Connection", "close");
     }
     unsigned long len = 0;
-    if (res.status->code >= 300 && res.status->code < 600) {
+    if (res.status->code >= 400 && res.status->code < 600) {
         http_error_msg *http_msg = http_get_error_msg(res.status->code);
         sprintf(msg_pre_buf, http_error_document, res.status->code, res.status->msg,
                 http_msg != NULL ? http_msg->err_msg : "", err_msg[0] != 0 ? err_msg : "");
@@ -136,12 +188,13 @@ int client_request_handler(sock *client, int req_num) {
     }
 
     clock_gettime(CLOCK_MONOTONIC, &end);
+    char *location = http_get_header_field(&res.hdr, "Location", HTTP_PRESERVE_UPPER);
     unsigned long micros = (end.tv_nsec - begin.tv_nsec) / 1000 + (end.tv_sec - begin.tv_sec) * 1000000;
-    print("%s%03i %s (%s)%s", http_get_status_color(res.status), res.status->code, res.status->msg,
-          format_duration(micros, buf), CLR_STR);
+    print("%s%03i %s%s%s (%s)%s", http_get_status_color(res.status), res.status->code, res.status->msg,
+          location != NULL ? " -> " : "", location != NULL ? location : "", format_duration(micros, buf), CLR_STR);
 
     abort:
-
+    uri_free(&uri);
     http_free_req(&req);
     http_free_res(&res);
     return !client_keep_alive;
diff --git a/src/uri.c b/src/uri.c
index 601c041..12ef631 100644
--- a/src/uri.c
+++ b/src/uri.c
@@ -8,11 +8,45 @@
 #include "uri.h"
 
 
-int uri_init(http_uri *uri, const char *webroot, const char *uri_str) {
+int path_is_directory(const char *path) {
+    struct stat statbuf;
+    return stat(path, &statbuf) == 0 && S_ISDIR(statbuf.st_mode) != 0;
+}
+
+int path_is_file(const char *path) {
+    struct stat statbuf;
+    return stat(path, &statbuf) == 0 && S_ISDIR(statbuf.st_mode) == 0;
+}
+
+int path_exists(const char *path) {
+    struct stat statbuf;
+    return stat(path, &statbuf) == 0;
+}
+
+int uri_init(http_uri *uri, const char *webroot, const char *uri_str, int dir_mode) {
+    char buf0[1024];
+    char buf1[1024];
+    char buf2[1024];
+    char buf3[1024];
+    char buf4[1024];
+    uri->webroot = NULL;
+    uri->req_path = NULL;
+    uri->path = NULL;
+    uri->pathinfo = NULL;
+    uri->query = NULL;
+    uri->filename = NULL;
+    uri->filename_comp = NULL;
+    uri->uri = NULL;
+    uri->etag = NULL;
+    uri->is_static = 1;
+    uri->is_dir = 0;
+    if (uri_str[0] != '/') {
+        return 1;
+    }
     uri->webroot = malloc(strlen(webroot) + 1);
     strcpy(uri->webroot, webroot);
 
-    char* query = strchr(uri_str, '?');
+    char *query = strchr(uri_str, '?');
     if (query == NULL) {
         uri->query = NULL;
     } else {
@@ -20,21 +54,116 @@ int uri_init(http_uri *uri, const char *webroot, const char *uri_str) {
         query++;
         ssize_t size = strlen(query) + 1;
         uri->query = malloc(size);
-        url_decode(query, uri->query, &size);
+        strcpy(uri->query, query);
     }
 
     ssize_t size = strlen(uri_str) + 1;
-    char *uri_dec = malloc(size);
-    url_decode(uri_str, uri_dec, &size);
+    uri->req_path = malloc(size);
+    url_decode(uri_str, uri->req_path, &size);
+    if (strstr(uri->req_path, "/../") != NULL || strstr(uri->req_path, "/./") != NULL) {
+        if (uri->query != NULL) free(uri->query);
+        return 2;
+    }
+
+    size = strlen(uri->req_path) + 1;
+    uri->path = malloc(size);
+    uri->pathinfo = malloc(size);
+    strcpy(uri->path, uri->req_path);
+    strcpy(uri->pathinfo, "");
+    if (uri->path[strlen(uri->path) - 1] == '/') {
+        uri->path[strlen(uri->path) - 1] = 0;
+    }
+    while (1) {
+        sprintf(buf0, "%s%s", uri->webroot, uri->path);
+        sprintf(buf1, "%s.php", buf0);
+        sprintf(buf2, "%s.html", buf0);
+
+        if (strlen(uri->path) <= 1 || path_exists(buf0) || path_is_file(buf1) || path_is_file(buf2)) {
+            break;
+        }
+
+        char *ptr;
+        parent_dir:
+        ptr = strrchr(uri->path, '/');
+        size = strlen(ptr);
+        sprintf(buf3, "%.*s%s", (int) size, ptr, uri->pathinfo);
+        strcpy(uri->pathinfo, buf3);
+        ptr[0] = 0;
+    }
+    if (uri->pathinfo[0] != 0) {
+        sprintf(buf3, "%s", uri->pathinfo + 1);
+        strcpy(uri->pathinfo, buf3);
+    }
+
+    if (path_is_file(buf0)) {
+        uri->filename = malloc(strlen(buf0) + 1);
+        strcpy(uri->filename, buf0);
+        ssize_t len = strlen(uri->path);
+        if (strncmp(uri->path + len - 5, ".html", 5) == 0) {
+            uri->path[len - 5] = 0;
+        } else if (strncmp(uri->path + len - 4, ".php", 4) ==  0) {
+            uri->path[len - 4] = 0;
+            uri->is_static = 0;
+        }
+    } else if (path_is_file(buf1)) {
+        uri->is_static = 0;
+        uri->filename = malloc(strlen(buf1) + 1);
+        strcpy(uri->filename, buf1);
+    } else if (path_is_file(buf2)) {
+        uri->filename = malloc(strlen(buf2) + 1);
+        strcpy(uri->filename, buf2);
+    } else {
+        uri->is_dir = 1;
+        strcpy(uri->path + strlen(uri->path), "/");
+        sprintf(buf1, "%s%sindex.php", uri->webroot, uri->path);
+        sprintf(buf2, "%s%sindex.html", uri->webroot, uri->path);
+        if (path_is_file(buf1)) {
+            uri->filename = malloc(strlen(buf1) + 1);
+            strcpy(uri->filename, buf1);
+            uri->is_static = 0;
+        } else if (path_is_file(buf2)) {
+            uri->filename = malloc(strlen(buf2) + 1);
+            strcpy(uri->filename, buf2);
+        } else {
+            if (dir_mode == URI_DIR_MODE_FORBIDDEN) {
+                uri->is_static = 1;
+            } else if (dir_mode == URI_DIR_MODE_LIST) {
+                uri->is_static = 0;
+            } else if (dir_mode == URI_DIR_MODE_INFO) {
+                if (strlen(uri->path) > 1) {
+                    uri->path[strlen(uri->path) - 1] = 0;
+                    sprintf(buf0, "/%s", uri->pathinfo);
+                    strcpy(uri->pathinfo, buf0);
+                    goto parent_dir;
+                }
+            }
+        }
+    }
+
+    if (strcmp(uri->path + strlen(uri->path) - 5, "index") == 0) {
+        uri->path[strlen(uri->path) - 5] = 0;
+    }
+    if (strcmp(uri->pathinfo, "index.php") == 0 || strcmp(uri->pathinfo, "index.html") == 0) {
+        uri->pathinfo[0] = 0;
+    }
+
+    sprintf(buf0, "%s%s%s%s%s", uri->path,
+            (strlen(uri->pathinfo) == 0 || uri->path[strlen(uri->path) - 1] == '/') ? "" : "/", uri->pathinfo,
+            uri->query != NULL ? "?" : "", uri->query != NULL ? uri->query : "");
+    uri->uri = malloc(strlen(buf0) + 1);
+    strcpy(uri->uri, buf0);
 
     return 0;
 }
 
 void uri_free(http_uri *uri) {
-    free(uri->webroot);
-    free(uri->path);
-    free(uri->pathinfo);
+    if (uri->webroot != NULL) free(uri->webroot);
+    if (uri->req_path != NULL) free(uri->req_path);
+    if (uri->path != NULL) free(uri->path);
+    if (uri->pathinfo != NULL) free(uri->pathinfo);
     if (uri->query != NULL) free(uri->query);
-    free(uri->filename);
-    free(uri->uri);
+    if (uri->filename != NULL) free(uri->filename);
+    if (uri->filename_comp != NULL) free(uri->filename_comp);
+    if (uri->etag != NULL) free(uri->etag);
+    if (uri->uri != NULL) free(uri->uri);
 }
diff --git a/src/uri.h b/src/uri.h
index a3193d3..1d632d2 100644
--- a/src/uri.h
+++ b/src/uri.h
@@ -10,20 +10,27 @@
 
 #include <sys/stat.h>
 
+#define URI_DIR_MODE_FORBIDDEN 0
+#define URI_DIR_MODE_LIST 1
+#define URI_DIR_MODE_INFO 2
+
 typedef struct {
-    char *webroot;
-    char *path;
-    char *pathinfo;
-    char *query;
-    char *filename;
-    char *filename_comp;
-    char *uri;
+    char *webroot;        // "/srv/www/www.test.org"
+    char *req_path;       // "/account/login"
+    char *path;           // "/account/"
+    char *pathinfo;       // "login"
+    char *query;          // "username=test"
+    char *filename;       // "/account/index.php"
+    char *filename_comp;  // "/srv/www/www.test.org/res/.file.css.compressed"
+    char *uri;            // "/account/login?username=test"
+    char *etag;
     struct stat stat;
-    int is_static:1;
+    unsigned int is_static:1;
+    unsigned int is_dir:1;
 } http_uri;
 
 
-int uri_init(http_uri *uri, const char *webroot, const char *uri_str);
+int uri_init(http_uri *uri, const char *webroot, const char *uri_str, int dir_mode);
 
 void uri_free(http_uri *uri);
 
diff --git a/src/utils.h b/src/utils.h
index 954e297..0b69f2a 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -13,10 +13,11 @@ char *log_prefix;
 #define out_1(fmt) fprintf(parent_stdout, "%s" fmt "\n", log_prefix)
 #define out_2(fmt, args...) fprintf(parent_stdout, "%s" fmt "\n", log_prefix, args)
 
-#define out_x(x, arg1, arg2, arg3, arg4, arg5, arg6, arg7, FUNC, ...) FUNC
+#define out_x(x, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, FUNC, ...) FUNC
 
 #define print(...) out_x(, ##__VA_ARGS__, out_2(__VA_ARGS__), out_2(__VA_ARGS__), out_2(__VA_ARGS__), \
-                         out_2(__VA_ARGS__), out_2(__VA_ARGS__), out_2(__VA_ARGS__), out_1(__VA_ARGS__))
+                         out_2(__VA_ARGS__), out_2(__VA_ARGS__), out_2(__VA_ARGS__), out_2(__VA_ARGS__), \
+                         out_2(__VA_ARGS__), out_1(__VA_ARGS__))
 
 
 char *format_duration(unsigned long micros, char *buf);