summaryrefslogtreecommitdiff
path: root/src/kernel/malloc.c
diff options
context:
space:
mode:
authordzwdz2024-07-16 02:34:03 +0200
committerdzwdz2024-07-16 02:34:03 +0200
commite29f0e294ac841e2036fe514df4ed66f5d0ec46f (patch)
treed37c40bdfecb153ab580f6119bf71294f3d77e93 /src/kernel/malloc.c
parent4fbb7e3c43440bd7eb4bf081c5be51dfe6f6e088 (diff)
kernel: use a slab allocator for kmalloc
For a while during development it managed to be faster than the old allocator despite having more overhead, probably because it makes better use of the cache. It no longer is - not sure why. The code definitely could be optimized to hell, though, and it'd probably recover its original edge. Non-power-of-2 sizeclasses would be really useful, as I could then e.g. fit four processes into a page instead of three, but I think implementing them "well" would be too complicated for now. In hindsight arbitrary allocation descriptions were probably a bad idea too. I've kept them in for now, but I'll switch to an enum of allocation types soon. I think I can fit all the per-allocation state (size+type) in 2 bytes. There are so few allocations in the kernel that I no longer care about the backtraces, so I'm leaving them out. The "large" allocations could probably be tested with execbuf in userland, so I might actually implement them soon too.
Diffstat (limited to 'src/kernel/malloc.c')
-rw-r--r--src/kernel/malloc.c245
1 files changed, 154 insertions, 91 deletions
diff --git a/src/kernel/malloc.c b/src/kernel/malloc.c
index 44434ed..3c10108 100644
--- a/src/kernel/malloc.c
+++ b/src/kernel/malloc.c
@@ -6,127 +6,190 @@
#include <stdbool.h>
#include <stdint.h>
-#define MALLOC_MAGIC 0x616c6c6f63686472 /* "allochdr" */
#define DESCLEN 8
-typedef struct Allocation Allocation;
-struct Allocation {
- uint64_t magic;
- uint64_t len;
- Allocation *next, *prev;
- void *stacktrace[4];
- char desc[DESCLEN];
+#define SCMIN 6 /* 1<<6 == 64 */
+#define SCMAX 12 /* 1<<11 == 2048 */
+
+typedef struct Slab Slab;
+struct Slab {
+ /* The slab is divided up into 1<<sizeclass sized regions.
+ * SCMIN <= sizeclass < SCMAX */
+ int sizeclass;
+ /* Conveniently, 4096/64 = 64, so I can fit the entire region bitmap
+ * in a word. */
+ uint64_t bitmap;
+ Slab **slot, *next;
};
-static Allocation *malloc_last = NULL;
+/* The code assumes the header fits within the first region. */
+static_assert(sizeof(Slab) <= 1<<SCMIN);
-static size_t
-page_amt(size_t bytes)
+/* Yes, I'm wasting a bit of memory here. */
+static Slab *freeslabs[SCMAX] = {NULL};
+static Slab *fullslabs[SCMAX] = {NULL};
+static uint64_t curslabs = 0;
+
+static uint64_t
+findfreebit(uint64_t bitmap)
{
- return (bytes + PAGE_SIZE - 1) / PAGE_SIZE;
+ /* I'm hoping gcc is smart enough to optimize this into a BSF */
+ bitmap = ~bitmap;
+ for (uint64_t i = 0; i < 64; i++) {
+ if (bitmap & (1ull<<i)) return i;
+ }
+ return 64;
}
-void
-kmalloc_debugprint(void)
+static bool
+isfull(uint64_t bitmap, int sizeclass)
+{
+ return findfreebit(bitmap)<<sizeclass == PAGE_SIZE;
+}
+
+static uint64_t
+crawllist(Slab *al, int sizeclass, bool full)
{
- size_t count = 0, bytes = 0, pages = 0;
- kprintf("[kern] current allocations:\n");
- for (Allocation *iter = malloc_last; iter; iter = iter->prev) {
+ (void) full;
+ uint64_t tally = 0;
+ int regsize = 1ull << sizeclass;
+ for (; al; al = al->next) {
kprintf(
- "%08p %6dB %.8s ",
- ((void*)iter) + sizeof(Allocation),
- iter->len - sizeof(Allocation),
- iter->desc
+ "%08p %016llx %s %d\n",
+ al, al->bitmap, full ? "full" : "part", 1ull<<al->sizeclass
);
- for (size_t i = 0; i < 4; i++) {
- kprintf(" k/%08x", iter->stacktrace[i]);
+
+ assert(al->sizeclass == sizeclass);
+ assert(al->bitmap & 1);
+ assert(al->slot && *al->slot == al);
+ assert(isfull(al->bitmap, sizeclass) == full);
+ tally++;
+
+ for (int i = 1; i * regsize < PAGE_SIZE; i++) {
+ char *p = ((char*)al) + i * regsize;
+ if (al->bitmap & (1ull<<i)) {
+ kprintf("%08p %.8s\n", p, p + regsize - DESCLEN);
+ }
}
- kprintf("\n");
+ }
+ return tally;
+}
- count++;
- bytes += iter->len;
- pages += page_amt(iter->len);
+void
+kmalloc_debugprint(void)
+{
+ uint64_t tally = 0;
+ for (int i = 0; i < SCMAX; i++) {
+ tally += crawllist(freeslabs[i], i, false);
+ tally += crawllist(fullslabs[i], i, true);
+ }
+ assert(tally == curslabs);
+ // TODO count waste
+}
+
+static int
+getsizeclass(size_t len)
+{
+ int i;
+ for (i = SCMIN; i < SCMAX-1; i++) { /* last iteration can be skipped */
+ if (len <= (1ull<<i)) break;
}
- kprintf(
- "%d in total, %d bytes, %d pages = %dB used\n",
- count, bytes, pages, pages*PAGE_SIZE
- );
+ assert(len <= (1ull<<i));
+ return i;
}
-static void
-kmalloc_sanity(const void *addr)
+static Slab *
+getheader(void *addr)
{
- assert(addr);
- const Allocation *hdr = addr - sizeof(Allocation);
- assert(hdr->magic == MALLOC_MAGIC);
- if (hdr->next) assert(hdr->next->prev == hdr);
- if (hdr->prev) assert(hdr->prev->next == hdr);
+ Slab *hdr = (void*)((uintptr_t)addr & ~PAGE_MASK);
+ assert(SCMIN <= hdr->sizeclass && hdr->sizeclass < SCMAX);
+ assert(hdr->bitmap & 1);
+ return hdr;
}
void *
kmalloc(size_t len, const char *desc)
{
- Allocation *hdr;
- void *addr;
-
- if (KMALLOC_MAX < len) {
- panic_invalid_state();
+ Slab *al;
+ void *ret;
+ int sizeclass, regsize;
+ uint64_t idx;
+
+ assert(len <= KMALLOC_MAX);
+ len += DESCLEN;
+ sizeclass = getsizeclass(len);
+ regsize = 1ull << sizeclass;
+
+ if (freeslabs[sizeclass] == NULL) {
+ al = page_alloc(1);
+ al->sizeclass = sizeclass;
+ al->bitmap = 1; /* mark header as allocated */
+ al->slot = &freeslabs[sizeclass];
+ al->next = NULL;
+ freeslabs[sizeclass] = al;
+ curslabs++;
}
- len += sizeof(Allocation);
- assert(len <= PAGE_SIZE);
- hdr = page_alloc(page_amt(len));
- hdr->magic = MALLOC_MAGIC;
- hdr->len = len;
-
- memset(hdr->desc, ' ', DESCLEN);
- if (desc) {
- for (int i = 0; i < DESCLEN; i++) {
- if (desc[i] == '\0') break;
- hdr->desc[i] = desc[i];
- }
- }
-
- hdr->next = NULL;
- hdr->prev = malloc_last;
- if (hdr->prev) {
- assert(!hdr->prev->next);
- hdr->prev->next = hdr;
+ al = freeslabs[sizeclass];
+
+ idx = findfreebit(al->bitmap);
+ assert(1 <= idx && idx < 64);
+ assert((al->bitmap & (1ull<<idx)) == 0);
+ assert(regsize * idx < PAGE_SIZE);
+ ret = ((char*)al) + regsize * idx;
+ al->bitmap |= 1ull<<idx;
+
+ char *descloc = ret + regsize - DESCLEN;
+ memset(descloc, ' ', DESCLEN);
+ for (int i = 0; i < DESCLEN && desc[i]; i++) {
+ descloc[i] = desc[i];
}
- for (size_t i = 0; i < 4; i++)
- hdr->stacktrace[i] = debug_caller(i);
+ /* if this was the last free region, move to fullslabs */
+ if (isfull(al->bitmap, sizeclass)) {
+ *al->slot = al->next;
+ al->next->slot = al->slot;
- malloc_last = hdr;
+ al->next = fullslabs[sizeclass];
+ al->next->slot = &al->next;
+ fullslabs[sizeclass] = al;
+ al->slot = &fullslabs[sizeclass];
+ }
- addr = (void*)hdr + sizeof(Allocation);
-#ifndef NDEBUG
- memset(addr, 0xCC, len);
-#endif
- kmalloc_sanity(addr);
- return addr;
+ return ret;
}
void
kfree(void *ptr)
{
- Allocation *hdr;
- size_t len;
- if (ptr == NULL) return;
-
- kmalloc_sanity(ptr);
- hdr = ptr - sizeof(Allocation);
- len = hdr->len;
-
- if (hdr->next)
- hdr->next->prev = hdr->prev;
- if (hdr->prev)
- hdr->prev->next = hdr->next;
- if (malloc_last == hdr)
- malloc_last = hdr->prev;
-#ifndef NDEBUG
- memset(hdr, 0xC0, len);
-#else
- hdr->magic = ~MALLOC_MAGIC; /* (hopefully) detect double frees */
-#endif
- page_free(hdr, page_amt(len));
+ assert(ptr);
+ Slab *al = getheader(ptr);
+ int sizeclass = al->sizeclass;
+ int regsize = 1ull << sizeclass;
+
+ uint64_t off = ((uint64_t)ptr) & PAGE_MASK;
+ uint64_t idx = off >> sizeclass;
+ bool full = isfull(al->bitmap, sizeclass);
+
+ assert(((char*)al) + idx * regsize == ptr); /* aligned? */
+ assert(1 <= idx); /* not the header? */
+ assert(al->bitmap & (1ull<<idx)); /* actually allocated? */
+ al->bitmap &= ~(1ull<<idx);
+
+ memset(ptr, 0xCC, regsize);
+
+ if (al->bitmap == 1) { /* we're empty */
+ *al->slot = al->next;
+ al->next->slot = al->slot;
+ page_free(al, 1);
+ curslabs--;
+ } else if (full) { /* we're no longer full */
+ *al->slot = al->next;
+ al->next->slot = al->slot;
+
+ al->next = freeslabs[sizeclass];
+ al->next->slot = &al->next;
+ freeslabs[sizeclass] = al;
+ al->slot = &freeslabs[sizeclass];
+ assert(fullslabs[sizeclass] != al);
+ }
}