summaryrefslogtreecommitdiff
path: root/src/kernel
diff options
context:
space:
mode:
authordzwdz2023-06-04 20:43:51 +0200
committerdzwdz2023-06-04 20:43:51 +0200
commit78cb60b644538a33e0479f25393d6c861e3605f8 (patch)
tree15d310b2bba5cce086633c025080155ca36e7c43 /src/kernel
parent8fd4943b2721696f86783d22dd2e8d593a22a766 (diff)
kernel: rework /proc/ and process IDs
I'm yet to write proper docs but the TL;DR is: Mounting /proc/ creates a new pid namespace. You're still visible in the old namespace with your old pid, but your children won't be. You see your own pid as 1. Current pids of children will be preserved, pids will be allocated starting from the highest one of your children.
Diffstat (limited to 'src/kernel')
-rw-r--r--src/kernel/proc.c119
-rw-r--r--src/kernel/proc.h18
-rw-r--r--src/kernel/syscalls.c2
-rw-r--r--src/kernel/vfs/procfs.c95
4 files changed, 185 insertions, 49 deletions
diff --git a/src/kernel/proc.c b/src/kernel/proc.c
index 65cbd81..108a006 100644
--- a/src/kernel/proc.c
+++ b/src/kernel/proc.c
@@ -30,10 +30,12 @@ Proc *proc_seed(void *data, size_t datalen) {
proc_first->pages = pagedir_new();
proc_first->mount = vfs_mount_seed();
proc_first->globalid = next_pid++;
- proc_first->cid = 1;
- proc_first->nextcid = 1;
proc_first->_handles = kzalloc(sizeof(Handle) * HANDLE_MAX);
+ proc_first->pns = proc_first;
+ proc_first->localid = 1;
+ proc_first->nextlid = 2;
+
// map .shared
extern char _shared_len;
for (size_t p = 0; p < (size_t)&_shared_len; p += PAGE_SIZE)
@@ -74,12 +76,18 @@ Proc *proc_fork(Proc *parent, int flags) {
child->parent = parent;
parent->child = child;
- if (parent->nextcid == 0)
+ if (next_pid == 0) {
panic_unimplemented();
- child->cid = parent->nextcid++;
- child->nextcid = 1;
+ }
child->globalid = next_pid++;
+ child->pns = parent->pns;
+ if (child->pns->nextlid == 0) {
+ panic_unimplemented();
+ }
+ child->localid = child->pns->nextlid++;
+
+
if ((flags & FORK_NEWFS) == 0 && parent->controlled) {
child->controlled = parent->controlled;
assert(child->controlled->provhcnt);
@@ -110,6 +118,106 @@ Proc *proc_fork(Proc *parent, int flags) {
return child;
}
+bool proc_ns_contains(Proc *ns, Proc *proc) {
+ /* a namespace contains all the processes with ->ns == ns and all their
+ * direct children */
+ if (ns == proc) return true;
+ if (proc->parent == NULL) return false;
+ return proc->parent->pns == ns;
+}
+
+uint32_t proc_ns_id(Proc *ns, Proc *proc) {
+ if (proc == ns) {
+ return 1;
+ } else {
+ if (proc->pns == proc) {
+ assert(proc->parent->pns == ns);
+ } else {
+ assert(proc->pns == ns);
+ }
+ return proc->localid;
+ }
+}
+
+Proc *proc_ns_byid(Proc *ns, uint32_t id) {
+ assert(ns->pns == ns);
+ for (Proc *it = ns; it; it = proc_ns_next(ns, it)) {
+ if (proc_ns_id(ns, it) == id) {
+ return it;
+ }
+ }
+ return NULL;
+}
+
+Proc *proc_ns_next(Proc *ns, Proc *p) {
+ Proc *ret = NULL;
+ /* see comments in proc_next */
+
+ if (!p) goto end;
+ /* descend into children who own their own namespace, but no further */
+ if (p->child && proc_ns_contains(ns, p->child)) {
+ ret = p->child;
+ goto end;
+ }
+ // TODO diverged from proc_next, integrate this fix into it
+ // also once you do that do regression tests - this behaviour is buggy
+ if (p == ns) {
+ /* don't escape the root */
+ goto end;
+ }
+ while (!p->sibling) {
+ p = p->parent;
+ assert(p);
+ if (p == ns) goto end;
+ }
+ ret = p->sibling;
+
+end:
+ if (ret != NULL) {
+ assert(proc_ns_contains(ns, ret));
+ }
+ return ret;
+}
+
+void proc_ns_create(Proc *proc) {
+ // TODO test this. lots of fucky behaviour can happen here
+ // TODO document process namespaces
+ Proc *old = proc->pns;
+ if (old == proc) return;
+ proc->pns = proc;
+ proc->nextlid = 2;
+ for (Proc *it = proc; it; ) {
+ if (it != proc) {
+ if (proc->nextlid < it->localid + 1) {
+ proc->nextlid = it->localid + 1;
+ }
+ if (it->pns == old) {
+ it->pns = proc;
+ } else {
+ assert(it->pns == it);
+ }
+ }
+
+ /* analogous to proc_ns_next - which can't be used directly as it gets
+ * confused by changing namespaces */
+
+ /* descend into children who own their own namespace, but no further */
+ if (it->child && (proc_ns_contains(proc, it->child) || proc_ns_contains(old, it->child))) {
+ it = it->child;
+ continue;
+ }
+ if (it == proc) {
+ break;
+ }
+ while (!it->sibling) {
+ it = it->parent;
+ if (it == proc) break;
+ assert(it);
+ }
+ it = it->sibling;
+ }
+}
+
/* meant to be used with p->*_refcount */
static bool unref(uint64_t *refcount) {
if (!refcount) return true;
@@ -394,6 +502,7 @@ Handle *proc_handle_get(Proc *p, hid_t id) {
} else if (id == HANDLE_PROCFS) {
if (!p->specialh.procfs) {
Handle *h = kmalloc(sizeof *h);
+ proc_ns_create(p);
*h = (Handle){
.type = HANDLE_FS_FRONT,
.backend = procfs_backend(p),
diff --git a/src/kernel/proc.h b/src/kernel/proc.h
index 8a19d8f..dce99fb 100644
--- a/src/kernel/proc.h
+++ b/src/kernel/proc.h
@@ -69,12 +69,19 @@ struct Proc {
Handle *procfs;
} specialh;
- uint32_t cid; /* child id. unique amongst all of this process' siblings */
- uint32_t nextcid; /* the child id to assign to the next spawned child */
uint32_t globalid; /* only for internal use, don't expose to userland */
uint32_t refcount; /* non-owning. should always be 0 on kill */
bool noreap;
+ /* localid is unique in a process namespace.
+ * if pns == self: the process owns a namespace
+ * the lid it sees is 1
+ * the lid its parent sees is localid
+ * otheriwse: nextlid is unused */
+ Proc *pns;
+ uint32_t localid;
+ uint32_t nextlid;
+
/* allocated once, the requests from WAITS4FS get stored here */
VfsReq *reqslot;
@@ -97,6 +104,13 @@ extern Proc *proc_cur;
Proc *proc_seed(void *data, size_t datalen);
Proc *proc_fork(Proc *parent, int flags);
+bool proc_ns_contains(Proc *ns, Proc *proc);
+uint32_t proc_ns_id(Proc *ns, Proc *proc);
+Proc *proc_ns_byid(Proc *ns, uint32_t id);
+/** Like proc_next, but stays in *ns */
+Proc *proc_ns_next(Proc *ns, Proc *p);
+void proc_ns_create(Proc *proc);
+
void proc_kill(Proc *proc, int ret);
/** Kills all descendants. */
void proc_filicide(Proc *proc, int ret);
diff --git a/src/kernel/syscalls.c b/src/kernel/syscalls.c
index 0810720..589098b 100644
--- a/src/kernel/syscalls.c
+++ b/src/kernel/syscalls.c
@@ -74,7 +74,7 @@ long _sys_fork(int flags, hid_t __user *fs_front) {
pcpy_to(proc_cur, fs_front, &hid, sizeof hid);
}
}
- SYSCALL_RETURN(child->cid);
+ SYSCALL_RETURN(proc_ns_id(proc_cur->pns, child));
}
hid_t _sys_open(const char __user *path, long len, int flags) {
diff --git a/src/kernel/vfs/procfs.c b/src/kernel/vfs/procfs.c
index 4f2bbd1..7669b78 100644
--- a/src/kernel/vfs/procfs.c
+++ b/src/kernel/vfs/procfs.c
@@ -7,6 +7,7 @@
#include <shared/mem.h>
enum phandle_type {
+ PhRoot,
PhDir,
PhIntr,
PhMem,
@@ -24,48 +25,52 @@ static void procfs_cleanup(VfsBackend *be);
static int isdigit(int c);
static struct phandle *
-openpath(const char *path, size_t len, Proc *p)
+openpath(const char *path, size_t len, Proc *root)
{
struct phandle *h;
enum phandle_type type;
+ uint32_t gid = 0;
if (len == 0) return NULL;
path++, len--;
- while (len && isdigit(*path)) {
- /* parse numerical segment / "directory" name */
- uint32_t cid = 0;
+ if (len == 0) {
+ type = PhRoot;
+ } else if (isdigit(*path)) {
+ Proc *p;
+ uint32_t lid = 0;
for (; 0 < len && *path != '/'; path++, len--) {
- char c = *path;
- if (!isdigit(c)) {
+ if (!isdigit(*path)) {
return NULL;
}
- cid = cid * 10 + *path - '0';
+ lid = lid * 10 + *path - '0';
+ }
+ if (len == 0) {
+ return NULL;
}
- if (len == 0) return NULL;
assert(*path == '/');
path++, len--;
- p = p->child;
- if (!p) return NULL;
- while (p->cid != cid) {
- p = p->sibling;
- if (!p) return NULL;
+ if (len == 0) {
+ type = PhDir;
+ } else if (len == 4 && memcmp(path, "intr", 4) == 0) {
+ type = PhIntr;
+ } else if (len == 3 && memcmp(path, "mem", 3) == 0) {
+ type = PhMem;
+ } else {
+ return NULL;
}
- }
- /* parse the per-process part */
- if (len == 0) {
- type = PhDir;
- } else if (len == 4 && memcmp(path, "intr", 4) == 0) {
- type = PhIntr;
- } else if (len == 3 && memcmp(path, "mem", 3) == 0) {
- type = PhMem;
+ p = proc_ns_byid(root, lid);
+ if (!p) {
+ return NULL;
+ }
+ gid = p->globalid;
} else {
return NULL;
}
h = kmalloc(sizeof *h);
- h->gid = p->globalid;
+ h->gid = gid;
h->type = type;
return h;
}
@@ -87,36 +92,47 @@ procfs_accept(VfsReq *req)
Proc *p;
char buf[512];
assert(root);
+ assert(root->pns == root);
+
if (req->type == VFSOP_OPEN) {
assert(req->input.kern);
h = openpath(req->input.buf_kern, req->input.len, root);
vfsreq_finish_short(req, h ? (long)h : -ENOENT);
return;
- }
- assert(h);
- p = findgid(h->gid, root);
- if (!p) {
- vfsreq_finish_short(req, -EGENERIC);
+ } else if (req->type == VFSOP_CLOSE) {
+ assert(h);
+ kfree(h);
+ vfsreq_finish_short(req, 0);
return;
+ } else {
+ assert(h);
}
- if (req->type == VFSOP_READ && h->type == PhDir) {
+ if (h->type != PhRoot) {
+ p = findgid(h->gid, root);
+ if (!p) {
+ vfsreq_finish_short(req, -ENOENT);
+ return;
+ }
+ }
+
+ if (req->type == VFSOP_READ && (h->type == PhDir || h->type == PhRoot)) {
// TODO port dirbuild to kernel
int pos = 0;
if (req->offset != 0) {
vfsreq_finish_short(req, -ENOSYS);
return;
}
- pos += snprintf(buf + pos, 512 - pos, "intr")+1;
- pos += snprintf(buf + pos, 512 - pos, "mem")+1;
- for (Proc *iter = p->child; iter; iter = iter->sibling) {
- assert(pos < 512);
- // processes could possibly be identified by unique identifiers instead
- // e.g. an encrypted gid, or just a randomly generated one
- // con: would require bringing in a crypto library
- pos += snprintf(buf + pos, 512 - pos, "%d/", iter->cid) + 1;
- if (512 <= pos) {
- vfsreq_finish_short(req, -1);
+ if (h->type == PhDir) {
+ pos += snprintf(buf + pos, 512 - pos, "intr")+1;
+ pos += snprintf(buf + pos, 512 - pos, "mem")+1;
+ } else {
+ for (Proc *it = root; it; it = proc_ns_next(root, it)) {
+ assert(pos < 512);
+ pos += snprintf(buf + pos, 512 - pos, "%d/", proc_ns_id(root, it)) + 1;
+ if (512 <= pos) {
+ vfsreq_finish_short(req, -EGENERIC);
+ }
}
}
assert(0 <= pos && (size_t)pos <= sizeof buf);
@@ -136,9 +152,6 @@ procfs_accept(VfsReq *req)
} else if (req->type == VFSOP_WRITE && h->type == PhIntr) {
proc_intr(p);
vfsreq_finish_short(req, req->input.len);
- } else if (req->type == VFSOP_CLOSE) {
- kfree(h);
- vfsreq_finish_short(req, 0);
} else {
vfsreq_finish_short(req, -ENOSYS);
}