diff options
author | dzwdz | 2024-07-16 02:34:03 +0200 |
---|---|---|
committer | dzwdz | 2024-07-16 02:34:03 +0200 |
commit | e29f0e294ac841e2036fe514df4ed66f5d0ec46f (patch) | |
tree | d37c40bdfecb153ab580f6119bf71294f3d77e93 | |
parent | 4fbb7e3c43440bd7eb4bf081c5be51dfe6f6e088 (diff) |
kernel: use a slab allocator for kmalloc
For a while during development it managed to be faster than the old allocator
despite having more overhead, probably because it makes better use of the
cache. It no longer is - not sure why. The code definitely could be optimized
to hell, though, and it'd probably recover its original edge.
Non-power-of-2 sizeclasses would be really useful, as I could then e.g.
fit four processes into a page instead of three, but I think implementing them
"well" would be too complicated for now.
In hindsight arbitrary allocation descriptions were probably a bad idea too.
I've kept them in for now, but I'll switch to an enum of allocation types soon.
I think I can fit all the per-allocation state (size+type) in 2 bytes.
There are so few allocations in the kernel that I no longer care about the
backtraces, so I'm leaving them out.
The "large" allocations could probably be tested with execbuf in userland, so
I might actually implement them soon too.
-rw-r--r-- | src/kernel/malloc.c | 245 | ||||
-rw-r--r-- | src/kernel/malloc.h | 10 | ||||
-rw-r--r-- | src/libk/include/camellia/execbuf.h | 2 |
3 files changed, 161 insertions, 96 deletions
diff --git a/src/kernel/malloc.c b/src/kernel/malloc.c index 44434ed..3c10108 100644 --- a/src/kernel/malloc.c +++ b/src/kernel/malloc.c @@ -6,127 +6,190 @@ #include <stdbool.h> #include <stdint.h> -#define MALLOC_MAGIC 0x616c6c6f63686472 /* "allochdr" */ #define DESCLEN 8 -typedef struct Allocation Allocation; -struct Allocation { - uint64_t magic; - uint64_t len; - Allocation *next, *prev; - void *stacktrace[4]; - char desc[DESCLEN]; +#define SCMIN 6 /* 1<<6 == 64 */ +#define SCMAX 12 /* 1<<11 == 2048 */ + +typedef struct Slab Slab; +struct Slab { + /* The slab is divided up into 1<<sizeclass sized regions. + * SCMIN <= sizeclass < SCMAX */ + int sizeclass; + /* Conveniently, 4096/64 = 64, so I can fit the entire region bitmap + * in a word. */ + uint64_t bitmap; + Slab **slot, *next; }; -static Allocation *malloc_last = NULL; +/* The code assumes the header fits within the first region. */ +static_assert(sizeof(Slab) <= 1<<SCMIN); -static size_t -page_amt(size_t bytes) +/* Yes, I'm wasting a bit of memory here. */ +static Slab *freeslabs[SCMAX] = {NULL}; +static Slab *fullslabs[SCMAX] = {NULL}; +static uint64_t curslabs = 0; + +static uint64_t +findfreebit(uint64_t bitmap) { - return (bytes + PAGE_SIZE - 1) / PAGE_SIZE; + /* I'm hoping gcc is smart enough to optimize this into a BSF */ + bitmap = ~bitmap; + for (uint64_t i = 0; i < 64; i++) { + if (bitmap & (1ull<<i)) return i; + } + return 64; } -void -kmalloc_debugprint(void) +static bool +isfull(uint64_t bitmap, int sizeclass) +{ + return findfreebit(bitmap)<<sizeclass == PAGE_SIZE; +} + +static uint64_t +crawllist(Slab *al, int sizeclass, bool full) { - size_t count = 0, bytes = 0, pages = 0; - kprintf("[kern] current allocations:\n"); - for (Allocation *iter = malloc_last; iter; iter = iter->prev) { + (void) full; + uint64_t tally = 0; + int regsize = 1ull << sizeclass; + for (; al; al = al->next) { kprintf( - "%08p %6dB %.8s ", - ((void*)iter) + sizeof(Allocation), - iter->len - sizeof(Allocation), - iter->desc + "%08p %016llx %s %d\n", + al, al->bitmap, full ? "full" : "part", 1ull<<al->sizeclass ); - for (size_t i = 0; i < 4; i++) { - kprintf(" k/%08x", iter->stacktrace[i]); + + assert(al->sizeclass == sizeclass); + assert(al->bitmap & 1); + assert(al->slot && *al->slot == al); + assert(isfull(al->bitmap, sizeclass) == full); + tally++; + + for (int i = 1; i * regsize < PAGE_SIZE; i++) { + char *p = ((char*)al) + i * regsize; + if (al->bitmap & (1ull<<i)) { + kprintf("%08p %.8s\n", p, p + regsize - DESCLEN); + } } - kprintf("\n"); + } + return tally; +} - count++; - bytes += iter->len; - pages += page_amt(iter->len); +void +kmalloc_debugprint(void) +{ + uint64_t tally = 0; + for (int i = 0; i < SCMAX; i++) { + tally += crawllist(freeslabs[i], i, false); + tally += crawllist(fullslabs[i], i, true); + } + assert(tally == curslabs); + // TODO count waste +} + +static int +getsizeclass(size_t len) +{ + int i; + for (i = SCMIN; i < SCMAX-1; i++) { /* last iteration can be skipped */ + if (len <= (1ull<<i)) break; } - kprintf( - "%d in total, %d bytes, %d pages = %dB used\n", - count, bytes, pages, pages*PAGE_SIZE - ); + assert(len <= (1ull<<i)); + return i; } -static void -kmalloc_sanity(const void *addr) +static Slab * +getheader(void *addr) { - assert(addr); - const Allocation *hdr = addr - sizeof(Allocation); - assert(hdr->magic == MALLOC_MAGIC); - if (hdr->next) assert(hdr->next->prev == hdr); - if (hdr->prev) assert(hdr->prev->next == hdr); + Slab *hdr = (void*)((uintptr_t)addr & ~PAGE_MASK); + assert(SCMIN <= hdr->sizeclass && hdr->sizeclass < SCMAX); + assert(hdr->bitmap & 1); + return hdr; } void * kmalloc(size_t len, const char *desc) { - Allocation *hdr; - void *addr; - - if (KMALLOC_MAX < len) { - panic_invalid_state(); + Slab *al; + void *ret; + int sizeclass, regsize; + uint64_t idx; + + assert(len <= KMALLOC_MAX); + len += DESCLEN; + sizeclass = getsizeclass(len); + regsize = 1ull << sizeclass; + + if (freeslabs[sizeclass] == NULL) { + al = page_alloc(1); + al->sizeclass = sizeclass; + al->bitmap = 1; /* mark header as allocated */ + al->slot = &freeslabs[sizeclass]; + al->next = NULL; + freeslabs[sizeclass] = al; + curslabs++; } - len += sizeof(Allocation); - assert(len <= PAGE_SIZE); - hdr = page_alloc(page_amt(len)); - hdr->magic = MALLOC_MAGIC; - hdr->len = len; - - memset(hdr->desc, ' ', DESCLEN); - if (desc) { - for (int i = 0; i < DESCLEN; i++) { - if (desc[i] == '\0') break; - hdr->desc[i] = desc[i]; - } - } - - hdr->next = NULL; - hdr->prev = malloc_last; - if (hdr->prev) { - assert(!hdr->prev->next); - hdr->prev->next = hdr; + al = freeslabs[sizeclass]; + + idx = findfreebit(al->bitmap); + assert(1 <= idx && idx < 64); + assert((al->bitmap & (1ull<<idx)) == 0); + assert(regsize * idx < PAGE_SIZE); + ret = ((char*)al) + regsize * idx; + al->bitmap |= 1ull<<idx; + + char *descloc = ret + regsize - DESCLEN; + memset(descloc, ' ', DESCLEN); + for (int i = 0; i < DESCLEN && desc[i]; i++) { + descloc[i] = desc[i]; } - for (size_t i = 0; i < 4; i++) - hdr->stacktrace[i] = debug_caller(i); + /* if this was the last free region, move to fullslabs */ + if (isfull(al->bitmap, sizeclass)) { + *al->slot = al->next; + al->next->slot = al->slot; - malloc_last = hdr; + al->next = fullslabs[sizeclass]; + al->next->slot = &al->next; + fullslabs[sizeclass] = al; + al->slot = &fullslabs[sizeclass]; + } - addr = (void*)hdr + sizeof(Allocation); -#ifndef NDEBUG - memset(addr, 0xCC, len); -#endif - kmalloc_sanity(addr); - return addr; + return ret; } void kfree(void *ptr) { - Allocation *hdr; - size_t len; - if (ptr == NULL) return; - - kmalloc_sanity(ptr); - hdr = ptr - sizeof(Allocation); - len = hdr->len; - - if (hdr->next) - hdr->next->prev = hdr->prev; - if (hdr->prev) - hdr->prev->next = hdr->next; - if (malloc_last == hdr) - malloc_last = hdr->prev; -#ifndef NDEBUG - memset(hdr, 0xC0, len); -#else - hdr->magic = ~MALLOC_MAGIC; /* (hopefully) detect double frees */ -#endif - page_free(hdr, page_amt(len)); + assert(ptr); + Slab *al = getheader(ptr); + int sizeclass = al->sizeclass; + int regsize = 1ull << sizeclass; + + uint64_t off = ((uint64_t)ptr) & PAGE_MASK; + uint64_t idx = off >> sizeclass; + bool full = isfull(al->bitmap, sizeclass); + + assert(((char*)al) + idx * regsize == ptr); /* aligned? */ + assert(1 <= idx); /* not the header? */ + assert(al->bitmap & (1ull<<idx)); /* actually allocated? */ + al->bitmap &= ~(1ull<<idx); + + memset(ptr, 0xCC, regsize); + + if (al->bitmap == 1) { /* we're empty */ + *al->slot = al->next; + al->next->slot = al->slot; + page_free(al, 1); + curslabs--; + } else if (full) { /* we're no longer full */ + *al->slot = al->next; + al->next->slot = al->slot; + + al->next = freeslabs[sizeclass]; + al->next->slot = &al->next; + freeslabs[sizeclass] = al; + al->slot = &freeslabs[sizeclass]; + assert(fullslabs[sizeclass] != al); + } } diff --git a/src/kernel/malloc.h b/src/kernel/malloc.h index 436d899..bda2745 100644 --- a/src/kernel/malloc.h +++ b/src/kernel/malloc.h @@ -3,10 +3,12 @@ #include <shared/mem.h> #include <stddef.h> -/* This seems to be fine for now, and it means that i don't need to concern - * myself with allocating contiguous pages for now, which is way easier to do - * well. */ -#define KMALLOC_MAX 2048 +/* This seems to be fine for now. + * KMALLOC_MAX >= 2048 would require extending kmalloc with a new allocation + * type. KMALLOC_MAX >= 4096 would require a new (Buddy?) page allocator. + * Both are pretty easy, but I don't want to maintain what would currently be + * dead code. */ +#define KMALLOC_MAX 1024 void mem_init(void *memtop); void mem_reserve(void *addr, size_t len); diff --git a/src/libk/include/camellia/execbuf.h b/src/libk/include/camellia/execbuf.h index 8ffabad..a5704a2 100644 --- a/src/libk/include/camellia/execbuf.h +++ b/src/libk/include/camellia/execbuf.h @@ -1,7 +1,7 @@ #pragma once /* the instruction format is bound to change, atm it's extremely inefficient */ -#define EXECBUF_MAX_LEN 2048 +#define EXECBUF_MAX_LEN 1024 /* takes 6 arguments */ #define EXECBUF_SYSCALL 0xF0000001 |