From d8a32a1005c33f6b95bbb3699a8f41ff5e19800e Mon Sep 17 00:00:00 2001
From: dzwdz
Date: Mon, 9 Aug 2021 12:47:46 +0200
Subject: a sloppy implementation of path_simplify()

it's kinda bad. it passes the tests, though...
---
 src/kernel/tests/vfs.c | 14 ++++++++++++--
 src/kernel/vfs/path.c  | 29 +++++++++++++++++++++++++----
 src/kernel/vfs/path.h  |  3 ---
 3 files changed, 37 insertions(+), 9 deletions(-)

diff --git a/src/kernel/tests/vfs.c b/src/kernel/tests/vfs.c
index 017f36d..b982098 100644
--- a/src/kernel/tests/vfs.c
+++ b/src/kernel/tests/vfs.c
@@ -8,8 +8,11 @@ TEST(path_simplify) {
 		if (result == 0) { \
 			TEST_COND(len < 0); \
 		} else { \
-			TEST_COND(len > 0); \
-			/* TODO check equality */ \
+			if (len == sizeof(result) - 1) { \
+				TEST_COND(0 == memcmp(result, buf, len)); \
+			} else { \
+				TEST_COND(false); \
+			} \
 		} \
 	} while (0)
 
@@ -17,6 +20,8 @@ TEST(path_simplify) {
 
 	// some easy cases first
 	TEST_WRAPPER("/",         "/");
+	TEST_WRAPPER("/.",        "/");
+	TEST_WRAPPER("//",        "/");
 	TEST_WRAPPER("/asdf",     "/asdf");
 	TEST_WRAPPER("/asdf/",    "/asdf/");
 	TEST_WRAPPER("/asdf//",   "/asdf/");
@@ -31,10 +36,15 @@ TEST(path_simplify) {
 	TEST_WRAPPER("/asdf//.",  "/asdf/");
 
 	// going under the root or close to it
+	TEST_WRAPPER("/..",        0);
 	TEST_WRAPPER("/../asdf",   0);
 	TEST_WRAPPER("/../asdf/",  0);
 	TEST_WRAPPER("/./a/../..", 0);
 	TEST_WRAPPER("/a/a/../..", "/");
+	TEST_WRAPPER("/a/../a/..", "/");
+	TEST_WRAPPER("/a/../../a", 0);
+	TEST_WRAPPER("/../a/../a", 0);
+	TEST_WRAPPER("/../../a/a", 0);
 	TEST_WRAPPER("/////../..", 0);
 	TEST_WRAPPER("//a//../..", 0);
 
diff --git a/src/kernel/vfs/path.c b/src/kernel/vfs/path.c
index bd17e29..1cdcfe4 100644
--- a/src/kernel/vfs/path.c
+++ b/src/kernel/vfs/path.c
@@ -5,16 +5,22 @@ int path_simplify(const char *in, char *out, size_t len) {
 	if (len == 0)     return -1; // empty paths are invalid
 	if (in[0] != '/') return -1; // so are relative paths
 
-	int depth = 0;
+	int depth = 0; // shouldn't be needed!
 	int seg_len; // the length of the current path segment
+	int out_pos = 0;
+	bool directory = 0;
 
 	for (int i = 0; i < len; i += seg_len + 1) {
 		// TODO implement assert
 		if (in[i] != '/') panic();
 
 		seg_len = 0;
+		directory = false;
 		for (int j = i + 1; j < len; j++) {
-			if (in[j] == '/') break;
+			if (in[j] == '/') {
+				directory = true;
+				break;
+			}
 			seg_len++;
 		}
 
@@ -29,16 +35,31 @@ int path_simplify(const char *in, char *out, size_t len) {
 
 		if (seg_len == 0 || (seg_len == 1 && in[i + 1] == '.')) {
 			// the segment is // or /./
-			// the depth doesn't change
+			directory = true;
 		} else if (seg_len == 2 && in[i + 1] == '.' && in[i + 2] == '.') {
 			// the segment is /../
 			if (--depth < 0)
 				return -1;
+			// backtrack to last slash
+			while (out[--out_pos] != '/');
 		} else {
 			// normal segment
+			out[out_pos] = '/';
+			memcpy(&out[out_pos + 1], &in[i + 1], seg_len);
+			out_pos += seg_len + 1;
 			depth++;
 		}
+
 	}
 
-	return 1; // TODO
+	/* if we were backtracking, out_pos can become 0. i don't like this,
+	 * it feels sloppy. this algorithm should be implemented differently. TODO? */
+	if (out_pos == 0)
+		out[out_pos++] = '/';
+
+	if (directory) // if the path refers to a directory, append a trailing slash
+		if (out[out_pos-1] != '/') // unless it's already there
+			out[out_pos++] = '/';
+
+	return out_pos;
 }
diff --git a/src/kernel/vfs/path.h b/src/kernel/vfs/path.h
index 3aefccf..769aa27 100644
--- a/src/kernel/vfs/path.h
+++ b/src/kernel/vfs/path.h
@@ -1,10 +1,7 @@
 #pragma once
-#include <stdbool.h>
 #include <stddef.h>
 
 /** Reduce a path to its simplest form.
- * *in and *out can't overlap unless they're equal. Then, the path is modified
- * in-place.
  *
  * @return length of the string in *out, always less than len. Negative if the path was invalid.
  */
-- 
cgit v1.2.3