diff options
Diffstat (limited to 'src/kernel/arch/amd64/32')
-rw-r--r-- | src/kernel/arch/amd64/32/32 | 130 | ||||
-rw-r--r-- | src/kernel/arch/amd64/32/boot.s | 86 | ||||
-rw-r--r-- | src/kernel/arch/amd64/32/farjump.s | 8 | ||||
-rw-r--r-- | src/kernel/arch/amd64/32/gdt.c | 118 | ||||
-rw-r--r-- | src/kernel/arch/amd64/32/gdt.h | 17 | ||||
-rw-r--r-- | src/kernel/arch/amd64/32/paging.c | 131 |
6 files changed, 490 insertions, 0 deletions
diff --git a/src/kernel/arch/amd64/32/32 b/src/kernel/arch/amd64/32/32 new file mode 100644 index 0000000..9bd97e6 --- /dev/null +++ b/src/kernel/arch/amd64/32/32 @@ -0,0 +1,130 @@ +#include <kernel/arch/generic.h> +#include <kernel/arch/amd64/gdt.h> +#include <shared/mem.h> +#include <stdbool.h> +#include <stdint.h> + +extern char _isr_mini_stack; + +struct gdt_entry { + uint64_t limit_low : 16; + uint64_t base_low : 24; + uint64_t accessed : 1; // set by the processor + // CODE | DATA + uint64_t rw : 1; // readable? | writeable? + uint64_t conforming : 1; // conforming? | expands down? + uint64_t code : 1; // 1 | 0 + + uint64_t codeordata : 1; // 1 for everything other than TSS and LDT + uint64_t ring : 2; + uint64_t present : 1; // always 1 + uint64_t limit_high : 4; + uint64_t available : 1; // ??? + uint64_t long_mode : 1; + uint64_t x32 : 1; + uint64_t gran : 1; // 1 - 4kb, 0 - 1b + uint64_t base_high : 8; +} __attribute__((packed)); + +struct tss_entry { + uint32_t _unused0; + uint32_t esp0; // kernel mode stack pointer + uint32_t ss0; // kernel mode stack segment + uint8_t _unused1[0x5c]; +} __attribute__((packed)); + +struct lgdt_arg { + uint16_t limit; + uint32_t base; +} __attribute__((packed)); + +__attribute__((section(".shared"))) +static struct gdt_entry GDT[SEG_end]; +__attribute__((section(".shared"))) +static struct tss_entry TSS; +static struct lgdt_arg lgdt_arg; // probably doesn't need to be global + +static void gdt_fillout(struct gdt_entry* entry, uint8_t ring, bool code); +static void gdt_prepare(void); +static void gdt_load(void); + + +static void gdt_fillout(struct gdt_entry* entry, uint8_t ring, bool code) { + *entry = (struct gdt_entry) { + // set up the identity mapping + .limit_low = 0xFFFF, + .limit_high = 0xF, + .gran = 1, // 4KB * 0xFFFFF = (almost) 4GB + .base_low = 0, + .base_high = 0, + + .ring = ring, + .code = code, + + .accessed = 0, + .rw = 1, + .conforming = 0, + .codeordata = 1, + .present = 1, + .long_mode = 0, // ??? + .available = 1, // ??? + .x32 = 1, + }; +} + +static void gdt_prepare(void) { + GDT[SEG_null].present = 0; + + gdt_fillout(&GDT[SEG_r0code], 0, true); + gdt_fillout(&GDT[SEG_r0data], 0, false); + gdt_fillout(&GDT[SEG_r3code], 3, true); + gdt_fillout(&GDT[SEG_r3data], 3, false); + + // tss + memset(&TSS, 0, sizeof(TSS)); + TSS.ss0 = SEG_r0data << 3; // kernel data segment + TSS.esp0 = (uintptr_t) &_isr_mini_stack; + + GDT[SEG_TSS] = (struct gdt_entry) { + .limit_low = sizeof(TSS), + .limit_high = sizeof(TSS) >> 16, + .gran = 0, + .base_low = (uintptr_t) &TSS, + .base_high = ((uintptr_t) &TSS) >> 24, + + .accessed = 1, // 1 for TSS + .rw = 0, // 1 busy / 0 not busy + .conforming = 0, // 0 for TSS + .code = 1, // 32bit + .codeordata = 0, // is a system entry + .ring = 3, + .present = 1, + .available = 0, // 0 for TSS + .long_mode = 0, + .x32 = 0, // idk + }; +} + +static void gdt_load(void) { + lgdt_arg.limit = sizeof(GDT) - 1; + lgdt_arg.base = (uintptr_t) &GDT; + asm("lgdt (%0)" + : : "r" (&lgdt_arg) : "memory"); + + asm("ltr %%ax" + : : "a" (SEG_TSS << 3 | 3) : "memory"); + + // update all segment registers + gdt_farjump(SEG_r0code << 3); + asm("mov %0, %%ds;" + "mov %0, %%ss;" + "mov %0, %%es;" + "mov %0, %%fs;" + "mov %0, %%gs;" + : : "r" (SEG_r0data << 3) : "memory"); +} + +void gdt_init(void) { + gdt_prepare(); + gdt_load(); +} diff --git a/src/kernel/arch/amd64/32/boot.s b/src/kernel/arch/amd64/32/boot.s new file mode 100644 index 0000000..eb33c28 --- /dev/null +++ b/src/kernel/arch/amd64/32/boot.s @@ -0,0 +1,86 @@ +.section .text +.global _start +.type _start, @function +_start: + mov $_stack_top, %esp + push %ebx // save the address of the multiboot struct + + mov $0x80000000, %eax // check CPUID extended functions + cpuid + cmp $0x80000001, %eax + jb panic_early + + mov $0x80000001, %eax + cpuid + test $(1<<29), %edx // check long mode support + jz panic_early + + mov %cr4, %eax + or $(1<<5), %eax // PAE + mov %eax, %cr4 + + call pml4_identity_init + mov $pml4_identity, %eax + mov %eax, %cr3 + + mov $0xC0000080, %ecx // EFER MSR + rdmsr + or $(1 | 1<<8 | 1<<11), %eax // sysenter | long mode | NX + wrmsr + + mov %cr0, %eax + or $0x80000000, %eax + mov %eax, %cr0 + + call gdt_init + lgdt (lgdt_arg) + + pop %edi + + // TODO import gdt.h for globals + mov $(2<<3), %eax + mov %eax, %ds // SEG_r0data + mov %eax, %ss + mov %eax, %es + mov %eax, %fs + mov %eax, %gs + + ljmp $(1<<3), $boot64 // SEG_r0code + +panic_early: + // output a vga Fuck + movl $0x4F754F46, 0xB872A + movl $0x4F6B4F63, 0xB872E + jmp cpu_halt + +// TODO not part of anything yet + call sysenter_setup + // TODO will fail + push %ebx // address of the Multiboot struct + call kmain_early + +.global cpu_shutdown +.type cpu_shutdown, @function +cpu_shutdown: +/* This quits QEMU. While I couldn't find this officially documented anywhere, + * it is used by QEMU in tests/tcg/i386/system/boot.S (as of commit 40d6ee), so + * I assume that this is safe-ish to use */ + mov $0x604, %edx + mov $0x2000, %eax + outw %ax, %dx + +.global cpu_halt +.type cpu_halt, @function +cpu_halt: + cli +1: hlt + jmp 1b + + +.global cpu_pause +.type cpu_pause, @function +cpu_pause: + sti + hlt + cli + ret diff --git a/src/kernel/arch/amd64/32/farjump.s b/src/kernel/arch/amd64/32/farjump.s new file mode 100644 index 0000000..2885d2b --- /dev/null +++ b/src/kernel/arch/amd64/32/farjump.s @@ -0,0 +1,8 @@ +.section .text +.global gdt_farjump +.type gdt_farjump, @function +gdt_farjump: + /* retf pops off the return address and code segment off the stack. + * it turns out that in the i386 cdecl calling convention they're in + * the correct place already. */ + retf diff --git a/src/kernel/arch/amd64/32/gdt.c b/src/kernel/arch/amd64/32/gdt.c new file mode 100644 index 0000000..2229330 --- /dev/null +++ b/src/kernel/arch/amd64/32/gdt.c @@ -0,0 +1,118 @@ +#include <kernel/arch/amd64/32/gdt.h> +#include <kernel/arch/generic.h> +#include <stdbool.h> +#include <stdint.h> + +extern char _isr_mini_stack; + +struct gdt_entry { + uint64_t limit_low : 16; + uint64_t base_low : 24; + uint64_t accessed : 1; // set by the processor + // CODE | DATA + uint64_t rw : 1; // readable? | writeable? + uint64_t conforming : 1; // conforming? | expands down? + uint64_t code : 1; // 1 | 0 + + uint64_t codeordata : 1; // 1 for everything other than TSS and LDT + uint64_t ring : 2; + uint64_t present : 1; // always 1 + uint64_t limit_high : 4; + uint64_t available : 1; // ??? + uint64_t long_mode : 1; + uint64_t x32 : 1; + uint64_t gran : 1; // 1 - 4kb, 0 - 1b + uint64_t base_high : 8; +} __attribute__((packed)); + +struct tss_entry { + uint32_t reserved0; + uint64_t rsp[3]; + uint64_t ist[8]; + uint64_t reserved2; + uint16_t reserved3; + uint16_t iopb; +} __attribute__((packed)); + +struct lgdt_arg { + uint16_t limit; + uint32_t base; +} __attribute__((packed)); + +__attribute__((section(".shared"))) +struct gdt_entry GDT[SEG_end]; +__attribute__((section(".shared"))) +struct tss_entry TSS; + +struct lgdt_arg lgdt_arg; + + +static void *memset32(void *s, int c, size_t n) { + uint8_t *s2 = s; + for (size_t i = 0; i < n; i++) + s2[i] = c; + return s; +} + + +static void gdt_fillout(struct gdt_entry* entry, uint8_t ring, bool code) { + *entry = (struct gdt_entry) { + // set up the identity mapping + .limit_low = 0xFFFF, + .limit_high = 0xF, + .gran = 1, + .base_low = 0, + .base_high = 0, + + .ring = ring, + .code = code, + + .accessed = 0, + .rw = 1, + .conforming = 0, + .codeordata = 1, + .present = 1, + .long_mode = 1, + .available = 1, + .x32 = 0, + }; +} + +void gdt_init(void) { + GDT[SEG_null].present = 0; + + gdt_fillout(&GDT[SEG_r0code], 0, true); + gdt_fillout(&GDT[SEG_r0data], 0, false); + gdt_fillout(&GDT[SEG_r3code], 3, true); + gdt_fillout(&GDT[SEG_r3data], 3, false); + + lgdt_arg.limit = sizeof(GDT) - 1; + lgdt_arg.base = (uint64_t) &GDT; + + + memset32(&TSS, 0, sizeof(TSS)); + for (int i = 0; i < 3; i++) + TSS.rsp[i] = (uint64_t) &_isr_mini_stack; + TSS.ist[1] = (uint64_t) &_isr_mini_stack; + + uint64_t tss_addr = (uint64_t) &TSS; + GDT[SEG_TSS] = (struct gdt_entry) { + .limit_low = sizeof(TSS), + .limit_high = sizeof(TSS) >> 16, + .gran = 0, + .base_low = tss_addr, + .base_high = tss_addr >> 24, + + .accessed = 1, + .rw = 0, + .conforming = 0, + .code = 1, + .codeordata = 0, + .ring = 0, // was 3 pre-port + .present = 1, + .available = 1, + .long_mode = 0, + .x32 = 0, + }; + *((uint64_t*)&GDT[SEG_TSS2]) = (tss_addr >> 32); +} diff --git a/src/kernel/arch/amd64/32/gdt.h b/src/kernel/arch/amd64/32/gdt.h new file mode 100644 index 0000000..fbaf681 --- /dev/null +++ b/src/kernel/arch/amd64/32/gdt.h @@ -0,0 +1,17 @@ +#pragma once + +enum { + SEG_null, + // order dictated by SYSENTER + SEG_r0code, + SEG_r0data, + SEG_r3code, + SEG_r3data, + SEG_TSS, + SEG_TSS2, + + SEG_end +}; + +void gdt_init(void); +void gdt_farjump(int segment); diff --git a/src/kernel/arch/amd64/32/paging.c b/src/kernel/arch/amd64/32/paging.c new file mode 100644 index 0000000..975dd98 --- /dev/null +++ b/src/kernel/arch/amd64/32/paging.c @@ -0,0 +1,131 @@ +#include <stddef.h> +#include <stdint.h> + +struct pml4e { + uint64_t present : 1; + uint64_t writeable : 1; + uint64_t user : 1; + uint64_t writethru : 1; + + uint64_t uncached : 1; + uint64_t accessed : 1; + uint64_t _unused1 : 1; + uint64_t reserved : 1; // always 0 + + uint64_t _unused2 : 3; + uint64_t _unused3 : 1; // HLAT thing + + uint64_t address : 40; + + uint64_t _unused4 : 11; + uint64_t noexec : 1; +} __attribute__((packed)); + +struct pdpte { // page directory pointer table entry, 1gb page | 512 * pde + uint64_t present : 1; + uint64_t writeable : 1; + uint64_t user : 1; + uint64_t writethru : 1; + + uint64_t uncached : 1; + uint64_t accessed : 1; + uint64_t _unused1 : 1; + uint64_t large : 1; // 1gb page + + uint64_t _unused2 : 3; + uint64_t _unused3 : 1; // HLAT + + uint64_t address : 40; + + uint64_t _unused4 : 11; + uint64_t noexec : 1; +} __attribute__((packed)); + +struct pde { // page directory entry, 2mb page | 512 * pte + uint64_t present : 1; + uint64_t writeable : 1; + uint64_t user : 1; + uint64_t writethru : 1; + + uint64_t uncached : 1; + uint64_t accessed : 1; + uint64_t dirty : 1; // only if large + uint64_t large : 1; // 2mb + + uint64_t global : 1; // only if large ; TODO enable CR4.PGE + uint64_t _unused2 : 2; + uint64_t _unused3 : 1; // HLAT + + uint64_t address : 40; // highest bit - PAT + + uint64_t _unused4 : 7; + uint64_t pke : 4; + uint64_t noexec : 1; +} __attribute__((packed)); + +struct pte { // page table entry, 4kb page + uint64_t present : 1; + uint64_t writeable : 1; + uint64_t user : 1; + uint64_t writethru : 1; + + uint64_t uncached : 1; + uint64_t accessed : 1; + uint64_t dirty : 1; + uint64_t pat : 1; + + uint64_t global : 1; // TODO enable CR4.PGE + uint64_t _unused2 : 2; + uint64_t _unused3 : 1; // HLAT + + uint64_t address : 40; + + uint64_t _unused4 : 7; + uint64_t pke : 4; + uint64_t noexec : 1; +} __attribute__((packed)); + +__attribute__((aligned(4096))) +struct pml4e pml4_identity[512]; + +__attribute__((aligned(4096))) +struct pdpte pdpte_low[512]; // 0-512gb + +__attribute__((aligned(4096))) +struct pde pde_low[512]; // 0-1gb + + +static void *memset32(void *s, int c, size_t n) { + uint8_t *s2 = s; + for (size_t i = 0; i < n; i++) + s2[i] = c; + return s; +} + + +void pml4_identity_init(void) { + memset32(pml4_identity, 0, sizeof pml4_identity); + memset32(pdpte_low, 0, sizeof pdpte_low); + memset32(pde_low, 0, sizeof pde_low); + + pml4_identity[0] = (struct pml4e) { + .present = 1, + .writeable = 1, + .address = ((uintptr_t)pdpte_low) >> 12, + }; + + pdpte_low[0] = (struct pdpte) { + .present = 1, + .writeable = 1, + .address = ((uintptr_t)pde_low) >> 12, + }; + + for (int i = 0; i < 512; i++) { + pde_low[i] = (struct pde) { + .present = 1, + .writeable = 1, + .large = 1, + .address = (i * 2 * 1024 * 1024) >> 12, + }; + } +} |