diff options
author | dzwdz | 2023-06-04 20:43:51 +0200 |
---|---|---|
committer | dzwdz | 2023-06-04 20:43:51 +0200 |
commit | 78cb60b644538a33e0479f25393d6c861e3605f8 (patch) | |
tree | 15d310b2bba5cce086633c025080155ca36e7c43 /src | |
parent | 8fd4943b2721696f86783d22dd2e8d593a22a766 (diff) |
kernel: rework /proc/ and process IDs
I'm yet to write proper docs but the TL;DR is:
Mounting /proc/ creates a new pid namespace. You're still visible in the old
namespace with your old pid, but your children won't be. You see your own pid
as 1. Current pids of children will be preserved, pids will be allocated starting
from the highest one of your children.
Diffstat (limited to 'src')
-rw-r--r-- | src/kernel/proc.c | 119 | ||||
-rw-r--r-- | src/kernel/proc.h | 18 | ||||
-rw-r--r-- | src/kernel/syscalls.c | 2 | ||||
-rw-r--r-- | src/kernel/vfs/procfs.c | 95 | ||||
-rw-r--r-- | src/shared/printf.c | 1 | ||||
-rw-r--r-- | src/user/app/ps/ps.c | 77 | ||||
-rw-r--r-- | src/user/app/shell/shell.c | 10 |
7 files changed, 222 insertions, 100 deletions
diff --git a/src/kernel/proc.c b/src/kernel/proc.c index 65cbd81..108a006 100644 --- a/src/kernel/proc.c +++ b/src/kernel/proc.c @@ -30,10 +30,12 @@ Proc *proc_seed(void *data, size_t datalen) { proc_first->pages = pagedir_new(); proc_first->mount = vfs_mount_seed(); proc_first->globalid = next_pid++; - proc_first->cid = 1; - proc_first->nextcid = 1; proc_first->_handles = kzalloc(sizeof(Handle) * HANDLE_MAX); + proc_first->pns = proc_first; + proc_first->localid = 1; + proc_first->nextlid = 2; + // map .shared extern char _shared_len; for (size_t p = 0; p < (size_t)&_shared_len; p += PAGE_SIZE) @@ -74,12 +76,18 @@ Proc *proc_fork(Proc *parent, int flags) { child->parent = parent; parent->child = child; - if (parent->nextcid == 0) + if (next_pid == 0) { panic_unimplemented(); - child->cid = parent->nextcid++; - child->nextcid = 1; + } child->globalid = next_pid++; + child->pns = parent->pns; + if (child->pns->nextlid == 0) { + panic_unimplemented(); + } + child->localid = child->pns->nextlid++; + + if ((flags & FORK_NEWFS) == 0 && parent->controlled) { child->controlled = parent->controlled; assert(child->controlled->provhcnt); @@ -110,6 +118,106 @@ Proc *proc_fork(Proc *parent, int flags) { return child; } +bool proc_ns_contains(Proc *ns, Proc *proc) { + /* a namespace contains all the processes with ->ns == ns and all their + * direct children */ + if (ns == proc) return true; + if (proc->parent == NULL) return false; + return proc->parent->pns == ns; +} + +uint32_t proc_ns_id(Proc *ns, Proc *proc) { + if (proc == ns) { + return 1; + } else { + if (proc->pns == proc) { + assert(proc->parent->pns == ns); + } else { + assert(proc->pns == ns); + } + return proc->localid; + } +} + +Proc *proc_ns_byid(Proc *ns, uint32_t id) { + assert(ns->pns == ns); + for (Proc *it = ns; it; it = proc_ns_next(ns, it)) { + if (proc_ns_id(ns, it) == id) { + return it; + } + } + return NULL; +} + +Proc *proc_ns_next(Proc *ns, Proc *p) { + Proc *ret = NULL; + /* see comments in proc_next */ + + if (!p) goto end; + /* descend into children who own their own namespace, but no further */ + if (p->child && proc_ns_contains(ns, p->child)) { + ret = p->child; + goto end; + } + // TODO diverged from proc_next, integrate this fix into it + // also once you do that do regression tests - this behaviour is buggy + if (p == ns) { + /* don't escape the root */ + goto end; + } + while (!p->sibling) { + p = p->parent; + assert(p); + if (p == ns) goto end; + } + ret = p->sibling; + +end: + if (ret != NULL) { + assert(proc_ns_contains(ns, ret)); + } + return ret; +} + +void proc_ns_create(Proc *proc) { + // TODO test this. lots of fucky behaviour can happen here + // TODO document process namespaces + Proc *old = proc->pns; + if (old == proc) return; + proc->pns = proc; + proc->nextlid = 2; + for (Proc *it = proc; it; ) { + if (it != proc) { + if (proc->nextlid < it->localid + 1) { + proc->nextlid = it->localid + 1; + } + if (it->pns == old) { + it->pns = proc; + } else { + assert(it->pns == it); + } + } + + /* analogous to proc_ns_next - which can't be used directly as it gets + * confused by changing namespaces */ + + /* descend into children who own their own namespace, but no further */ + if (it->child && (proc_ns_contains(proc, it->child) || proc_ns_contains(old, it->child))) { + it = it->child; + continue; + } + if (it == proc) { + break; + } + while (!it->sibling) { + it = it->parent; + if (it == proc) break; + assert(it); + } + it = it->sibling; + } +} + /* meant to be used with p->*_refcount */ static bool unref(uint64_t *refcount) { if (!refcount) return true; @@ -394,6 +502,7 @@ Handle *proc_handle_get(Proc *p, hid_t id) { } else if (id == HANDLE_PROCFS) { if (!p->specialh.procfs) { Handle *h = kmalloc(sizeof *h); + proc_ns_create(p); *h = (Handle){ .type = HANDLE_FS_FRONT, .backend = procfs_backend(p), diff --git a/src/kernel/proc.h b/src/kernel/proc.h index 8a19d8f..dce99fb 100644 --- a/src/kernel/proc.h +++ b/src/kernel/proc.h @@ -69,12 +69,19 @@ struct Proc { Handle *procfs; } specialh; - uint32_t cid; /* child id. unique amongst all of this process' siblings */ - uint32_t nextcid; /* the child id to assign to the next spawned child */ uint32_t globalid; /* only for internal use, don't expose to userland */ uint32_t refcount; /* non-owning. should always be 0 on kill */ bool noreap; + /* localid is unique in a process namespace. + * if pns == self: the process owns a namespace + * the lid it sees is 1 + * the lid its parent sees is localid + * otheriwse: nextlid is unused */ + Proc *pns; + uint32_t localid; + uint32_t nextlid; + /* allocated once, the requests from WAITS4FS get stored here */ VfsReq *reqslot; @@ -97,6 +104,13 @@ extern Proc *proc_cur; Proc *proc_seed(void *data, size_t datalen); Proc *proc_fork(Proc *parent, int flags); +bool proc_ns_contains(Proc *ns, Proc *proc); +uint32_t proc_ns_id(Proc *ns, Proc *proc); +Proc *proc_ns_byid(Proc *ns, uint32_t id); +/** Like proc_next, but stays in *ns */ +Proc *proc_ns_next(Proc *ns, Proc *p); +void proc_ns_create(Proc *proc); + void proc_kill(Proc *proc, int ret); /** Kills all descendants. */ void proc_filicide(Proc *proc, int ret); diff --git a/src/kernel/syscalls.c b/src/kernel/syscalls.c index 0810720..589098b 100644 --- a/src/kernel/syscalls.c +++ b/src/kernel/syscalls.c @@ -74,7 +74,7 @@ long _sys_fork(int flags, hid_t __user *fs_front) { pcpy_to(proc_cur, fs_front, &hid, sizeof hid); } } - SYSCALL_RETURN(child->cid); + SYSCALL_RETURN(proc_ns_id(proc_cur->pns, child)); } hid_t _sys_open(const char __user *path, long len, int flags) { diff --git a/src/kernel/vfs/procfs.c b/src/kernel/vfs/procfs.c index 4f2bbd1..7669b78 100644 --- a/src/kernel/vfs/procfs.c +++ b/src/kernel/vfs/procfs.c @@ -7,6 +7,7 @@ #include <shared/mem.h> enum phandle_type { + PhRoot, PhDir, PhIntr, PhMem, @@ -24,48 +25,52 @@ static void procfs_cleanup(VfsBackend *be); static int isdigit(int c); static struct phandle * -openpath(const char *path, size_t len, Proc *p) +openpath(const char *path, size_t len, Proc *root) { struct phandle *h; enum phandle_type type; + uint32_t gid = 0; if (len == 0) return NULL; path++, len--; - while (len && isdigit(*path)) { - /* parse numerical segment / "directory" name */ - uint32_t cid = 0; + if (len == 0) { + type = PhRoot; + } else if (isdigit(*path)) { + Proc *p; + uint32_t lid = 0; for (; 0 < len && *path != '/'; path++, len--) { - char c = *path; - if (!isdigit(c)) { + if (!isdigit(*path)) { return NULL; } - cid = cid * 10 + *path - '0'; + lid = lid * 10 + *path - '0'; + } + if (len == 0) { + return NULL; } - if (len == 0) return NULL; assert(*path == '/'); path++, len--; - p = p->child; - if (!p) return NULL; - while (p->cid != cid) { - p = p->sibling; - if (!p) return NULL; + if (len == 0) { + type = PhDir; + } else if (len == 4 && memcmp(path, "intr", 4) == 0) { + type = PhIntr; + } else if (len == 3 && memcmp(path, "mem", 3) == 0) { + type = PhMem; + } else { + return NULL; } - } - /* parse the per-process part */ - if (len == 0) { - type = PhDir; - } else if (len == 4 && memcmp(path, "intr", 4) == 0) { - type = PhIntr; - } else if (len == 3 && memcmp(path, "mem", 3) == 0) { - type = PhMem; + p = proc_ns_byid(root, lid); + if (!p) { + return NULL; + } + gid = p->globalid; } else { return NULL; } h = kmalloc(sizeof *h); - h->gid = p->globalid; + h->gid = gid; h->type = type; return h; } @@ -87,36 +92,47 @@ procfs_accept(VfsReq *req) Proc *p; char buf[512]; assert(root); + assert(root->pns == root); + if (req->type == VFSOP_OPEN) { assert(req->input.kern); h = openpath(req->input.buf_kern, req->input.len, root); vfsreq_finish_short(req, h ? (long)h : -ENOENT); return; - } - assert(h); - p = findgid(h->gid, root); - if (!p) { - vfsreq_finish_short(req, -EGENERIC); + } else if (req->type == VFSOP_CLOSE) { + assert(h); + kfree(h); + vfsreq_finish_short(req, 0); return; + } else { + assert(h); } - if (req->type == VFSOP_READ && h->type == PhDir) { + if (h->type != PhRoot) { + p = findgid(h->gid, root); + if (!p) { + vfsreq_finish_short(req, -ENOENT); + return; + } + } + + if (req->type == VFSOP_READ && (h->type == PhDir || h->type == PhRoot)) { // TODO port dirbuild to kernel int pos = 0; if (req->offset != 0) { vfsreq_finish_short(req, -ENOSYS); return; } - pos += snprintf(buf + pos, 512 - pos, "intr")+1; - pos += snprintf(buf + pos, 512 - pos, "mem")+1; - for (Proc *iter = p->child; iter; iter = iter->sibling) { - assert(pos < 512); - // processes could possibly be identified by unique identifiers instead - // e.g. an encrypted gid, or just a randomly generated one - // con: would require bringing in a crypto library - pos += snprintf(buf + pos, 512 - pos, "%d/", iter->cid) + 1; - if (512 <= pos) { - vfsreq_finish_short(req, -1); + if (h->type == PhDir) { + pos += snprintf(buf + pos, 512 - pos, "intr")+1; + pos += snprintf(buf + pos, 512 - pos, "mem")+1; + } else { + for (Proc *it = root; it; it = proc_ns_next(root, it)) { + assert(pos < 512); + pos += snprintf(buf + pos, 512 - pos, "%d/", proc_ns_id(root, it)) + 1; + if (512 <= pos) { + vfsreq_finish_short(req, -EGENERIC); + } } } assert(0 <= pos && (size_t)pos <= sizeof buf); @@ -136,9 +152,6 @@ procfs_accept(VfsReq *req) } else if (req->type == VFSOP_WRITE && h->type == PhIntr) { proc_intr(p); vfsreq_finish_short(req, req->input.len); - } else if (req->type == VFSOP_CLOSE) { - kfree(h); - vfsreq_finish_short(req, 0); } else { vfsreq_finish_short(req, -ENOSYS); } diff --git a/src/shared/printf.c b/src/shared/printf.c index 514b73e..fffd801 100644 --- a/src/shared/printf.c +++ b/src/shared/printf.c @@ -187,6 +187,7 @@ int __printf_internal(const char *fmt, va_list argp, case 's': const char *s = va_arg(argp, char*); if (s == NULL) s = "(null)"; + // TODO can segfault even if precision caps the string len = strlen(s); if (len > m.precision && m.precision != 0) len = m.precision; diff --git a/src/user/app/ps/ps.c b/src/user/app/ps/ps.c index 78d4204..d2b9ded 100644 --- a/src/user/app/ps/ps.c +++ b/src/user/app/ps/ps.c @@ -5,69 +5,50 @@ #include <stdlib.h> #include <string.h> -/* returns a pointer that can be set to NUL to undo the strcat */ -static char * -strtcat(char *dst, const char *src) -{ - char *s = dst + strlen(dst); - strcpy(s, src); - return s; -} - -static void -do_proc(char *path) +int +main(void) { - const int bufl = 4096; - char *buf = malloc(bufl); - FILE *f; - - { /* read the psdata into buf */ - char *s = strtcat(path, "mem"); - f = fopen(path, "r"); - *s = '\0'; - if (!f) errx(1, "couldn't open '%s'", path); - fseek(f, (long)_libc_psdata, SEEK_SET); - if (fread(buf, 1, 128, f) <= 0) { - strcpy(buf, "(no psdata)"); - } - buf[128] = '\0'; - fclose(f); + char *readbuf = malloc(4096); + char *procbuf = malloc(4096); + FILE *f = fopen("/proc/", "r"); + if (!f) { + err(1, "couldn't open /proc/"); } - printf("%20s %s\n", path, buf); - - f = fopen(path, "r"); - if (!f) errx(1, "couldn't open '%s'", path); - // TODO library for iterating over directories for (;;) { - int len = fread(buf, 1, bufl, f); + int len = fread(readbuf, 1, 4096, f); if (len <= 0) break; for (int pos = 0; pos < len; ) { - const char *end = memchr(buf + pos, 0, len - pos); + char *end = memchr(readbuf + pos, 0, len - pos); if (!end) { errx(1, "unimplemented: buffer overflow"); } - size_t entryl = end - (buf + pos) + 1; - if (isdigit(buf[pos])) { - /* yup, no overflow check */ - char *s = strtcat(path, buf + pos); - do_proc(path); - *s = '\0'; + size_t entryl = end - (readbuf+pos) + 1; + if (isdigit(readbuf[pos])) { + FILE *g; + sprintf(procbuf, "/proc/%smem", readbuf+pos); + g = fopen(procbuf, "r"); + if (!g) { + warn("couldn't open \"%s\"", procbuf); + strcpy(procbuf, "(can't peek)"); + } else { + fseek(g, (long)_libc_psdata, SEEK_SET); + if (fread(procbuf, 1, 128, g) <= 0) { + strcpy(procbuf, "(no psdata)"); + } + procbuf[128] = '\0'; + fclose(g); + } + end[-1] = '\0'; /* remove trailing slash */ + printf("%s\t%s\n", readbuf+pos, procbuf); } pos += entryl; } } - free(buf); + free(readbuf); + free(procbuf); fclose(f); -} - -int -main(void) -{ - char *buf = malloc(4096); - strcpy(buf, "/proc/"); - do_proc(buf); return 0; } diff --git a/src/user/app/shell/shell.c b/src/user/app/shell/shell.c index d2d7e37..becc8d8 100644 --- a/src/user/app/shell/shell.c +++ b/src/user/app/shell/shell.c @@ -58,10 +58,14 @@ void run_args(int argc, char **argv, struct redir *redir) { return; } _sys_mount(HANDLE_PROCFS, argv[1], strlen(argv[1])); - if (!fork2_n_mount("/")) { - fs_dir_inject(argv[1]); - exit(1); + /* + if (!(3 <= argc && !strcmp(argv[2], "raw"))) { + if (!fork2_n_mount("/")) { + fs_dir_inject(argv[1]); + exit(1); + } } + */ return; } else if (!strcmp(argv[0], "cd")) { if (chdir(argc > 1 ? argv[1] : "/") < 0) |