summaryrefslogtreecommitdiff
path: root/src/kernel/arch/amd64/32
diff options
context:
space:
mode:
authordzwdz2022-07-16 13:33:00 +0200
committerdzwdz2022-07-16 13:33:00 +0200
commit912d2e3c7eb1baa71dda2c0a28aa5809eaa96f27 (patch)
tree4e27f3538466d5fd63a311d50916039a7a15a485 /src/kernel/arch/amd64/32
parent1eeb66af44ab335888410d716d604e569f20866e (diff)
amd64: barely boot into kernel code
Diffstat (limited to 'src/kernel/arch/amd64/32')
-rw-r--r--src/kernel/arch/amd64/32/32130
-rw-r--r--src/kernel/arch/amd64/32/boot.s86
-rw-r--r--src/kernel/arch/amd64/32/farjump.s8
-rw-r--r--src/kernel/arch/amd64/32/gdt.c118
-rw-r--r--src/kernel/arch/amd64/32/gdt.h17
-rw-r--r--src/kernel/arch/amd64/32/paging.c131
6 files changed, 490 insertions, 0 deletions
diff --git a/src/kernel/arch/amd64/32/32 b/src/kernel/arch/amd64/32/32
new file mode 100644
index 0000000..9bd97e6
--- /dev/null
+++ b/src/kernel/arch/amd64/32/32
@@ -0,0 +1,130 @@
+#include <kernel/arch/generic.h>
+#include <kernel/arch/amd64/gdt.h>
+#include <shared/mem.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+extern char _isr_mini_stack;
+
+struct gdt_entry {
+ uint64_t limit_low : 16;
+ uint64_t base_low : 24;
+ uint64_t accessed : 1; // set by the processor
+ // CODE | DATA
+ uint64_t rw : 1; // readable? | writeable?
+ uint64_t conforming : 1; // conforming? | expands down?
+ uint64_t code : 1; // 1 | 0
+
+ uint64_t codeordata : 1; // 1 for everything other than TSS and LDT
+ uint64_t ring : 2;
+ uint64_t present : 1; // always 1
+ uint64_t limit_high : 4;
+ uint64_t available : 1; // ???
+ uint64_t long_mode : 1;
+ uint64_t x32 : 1;
+ uint64_t gran : 1; // 1 - 4kb, 0 - 1b
+ uint64_t base_high : 8;
+} __attribute__((packed));
+
+struct tss_entry {
+ uint32_t _unused0;
+ uint32_t esp0; // kernel mode stack pointer
+ uint32_t ss0; // kernel mode stack segment
+ uint8_t _unused1[0x5c];
+} __attribute__((packed));
+
+struct lgdt_arg {
+ uint16_t limit;
+ uint32_t base;
+} __attribute__((packed));
+
+__attribute__((section(".shared")))
+static struct gdt_entry GDT[SEG_end];
+__attribute__((section(".shared")))
+static struct tss_entry TSS;
+static struct lgdt_arg lgdt_arg; // probably doesn't need to be global
+
+static void gdt_fillout(struct gdt_entry* entry, uint8_t ring, bool code);
+static void gdt_prepare(void);
+static void gdt_load(void);
+
+
+static void gdt_fillout(struct gdt_entry* entry, uint8_t ring, bool code) {
+ *entry = (struct gdt_entry) {
+ // set up the identity mapping
+ .limit_low = 0xFFFF,
+ .limit_high = 0xF,
+ .gran = 1, // 4KB * 0xFFFFF = (almost) 4GB
+ .base_low = 0,
+ .base_high = 0,
+
+ .ring = ring,
+ .code = code,
+
+ .accessed = 0,
+ .rw = 1,
+ .conforming = 0,
+ .codeordata = 1,
+ .present = 1,
+ .long_mode = 0, // ???
+ .available = 1, // ???
+ .x32 = 1,
+ };
+}
+
+static void gdt_prepare(void) {
+ GDT[SEG_null].present = 0;
+
+ gdt_fillout(&GDT[SEG_r0code], 0, true);
+ gdt_fillout(&GDT[SEG_r0data], 0, false);
+ gdt_fillout(&GDT[SEG_r3code], 3, true);
+ gdt_fillout(&GDT[SEG_r3data], 3, false);
+
+ // tss
+ memset(&TSS, 0, sizeof(TSS));
+ TSS.ss0 = SEG_r0data << 3; // kernel data segment
+ TSS.esp0 = (uintptr_t) &_isr_mini_stack;
+
+ GDT[SEG_TSS] = (struct gdt_entry) {
+ .limit_low = sizeof(TSS),
+ .limit_high = sizeof(TSS) >> 16,
+ .gran = 0,
+ .base_low = (uintptr_t) &TSS,
+ .base_high = ((uintptr_t) &TSS) >> 24,
+
+ .accessed = 1, // 1 for TSS
+ .rw = 0, // 1 busy / 0 not busy
+ .conforming = 0, // 0 for TSS
+ .code = 1, // 32bit
+ .codeordata = 0, // is a system entry
+ .ring = 3,
+ .present = 1,
+ .available = 0, // 0 for TSS
+ .long_mode = 0,
+ .x32 = 0, // idk
+ };
+}
+
+static void gdt_load(void) {
+ lgdt_arg.limit = sizeof(GDT) - 1;
+ lgdt_arg.base = (uintptr_t) &GDT;
+ asm("lgdt (%0)"
+ : : "r" (&lgdt_arg) : "memory");
+
+ asm("ltr %%ax"
+ : : "a" (SEG_TSS << 3 | 3) : "memory");
+
+ // update all segment registers
+ gdt_farjump(SEG_r0code << 3);
+ asm("mov %0, %%ds;"
+ "mov %0, %%ss;"
+ "mov %0, %%es;"
+ "mov %0, %%fs;"
+ "mov %0, %%gs;"
+ : : "r" (SEG_r0data << 3) : "memory");
+}
+
+void gdt_init(void) {
+ gdt_prepare();
+ gdt_load();
+}
diff --git a/src/kernel/arch/amd64/32/boot.s b/src/kernel/arch/amd64/32/boot.s
new file mode 100644
index 0000000..eb33c28
--- /dev/null
+++ b/src/kernel/arch/amd64/32/boot.s
@@ -0,0 +1,86 @@
+.section .text
+.global _start
+.type _start, @function
+_start:
+ mov $_stack_top, %esp
+ push %ebx // save the address of the multiboot struct
+
+ mov $0x80000000, %eax // check CPUID extended functions
+ cpuid
+ cmp $0x80000001, %eax
+ jb panic_early
+
+ mov $0x80000001, %eax
+ cpuid
+ test $(1<<29), %edx // check long mode support
+ jz panic_early
+
+ mov %cr4, %eax
+ or $(1<<5), %eax // PAE
+ mov %eax, %cr4
+
+ call pml4_identity_init
+ mov $pml4_identity, %eax
+ mov %eax, %cr3
+
+ mov $0xC0000080, %ecx // EFER MSR
+ rdmsr
+ or $(1 | 1<<8 | 1<<11), %eax // sysenter | long mode | NX
+ wrmsr
+
+ mov %cr0, %eax
+ or $0x80000000, %eax
+ mov %eax, %cr0
+
+ call gdt_init
+ lgdt (lgdt_arg)
+
+ pop %edi
+
+ // TODO import gdt.h for globals
+ mov $(2<<3), %eax
+ mov %eax, %ds // SEG_r0data
+ mov %eax, %ss
+ mov %eax, %es
+ mov %eax, %fs
+ mov %eax, %gs
+
+ ljmp $(1<<3), $boot64 // SEG_r0code
+
+panic_early:
+ // output a vga Fuck
+ movl $0x4F754F46, 0xB872A
+ movl $0x4F6B4F63, 0xB872E
+ jmp cpu_halt
+
+// TODO not part of anything yet
+ call sysenter_setup
+ // TODO will fail
+ push %ebx // address of the Multiboot struct
+ call kmain_early
+
+.global cpu_shutdown
+.type cpu_shutdown, @function
+cpu_shutdown:
+/* This quits QEMU. While I couldn't find this officially documented anywhere,
+ * it is used by QEMU in tests/tcg/i386/system/boot.S (as of commit 40d6ee), so
+ * I assume that this is safe-ish to use */
+ mov $0x604, %edx
+ mov $0x2000, %eax
+ outw %ax, %dx
+
+.global cpu_halt
+.type cpu_halt, @function
+cpu_halt:
+ cli
+1: hlt
+ jmp 1b
+
+
+.global cpu_pause
+.type cpu_pause, @function
+cpu_pause:
+ sti
+ hlt
+ cli
+ ret
diff --git a/src/kernel/arch/amd64/32/farjump.s b/src/kernel/arch/amd64/32/farjump.s
new file mode 100644
index 0000000..2885d2b
--- /dev/null
+++ b/src/kernel/arch/amd64/32/farjump.s
@@ -0,0 +1,8 @@
+.section .text
+.global gdt_farjump
+.type gdt_farjump, @function
+gdt_farjump:
+ /* retf pops off the return address and code segment off the stack.
+ * it turns out that in the i386 cdecl calling convention they're in
+ * the correct place already. */
+ retf
diff --git a/src/kernel/arch/amd64/32/gdt.c b/src/kernel/arch/amd64/32/gdt.c
new file mode 100644
index 0000000..2229330
--- /dev/null
+++ b/src/kernel/arch/amd64/32/gdt.c
@@ -0,0 +1,118 @@
+#include <kernel/arch/amd64/32/gdt.h>
+#include <kernel/arch/generic.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+extern char _isr_mini_stack;
+
+struct gdt_entry {
+ uint64_t limit_low : 16;
+ uint64_t base_low : 24;
+ uint64_t accessed : 1; // set by the processor
+ // CODE | DATA
+ uint64_t rw : 1; // readable? | writeable?
+ uint64_t conforming : 1; // conforming? | expands down?
+ uint64_t code : 1; // 1 | 0
+
+ uint64_t codeordata : 1; // 1 for everything other than TSS and LDT
+ uint64_t ring : 2;
+ uint64_t present : 1; // always 1
+ uint64_t limit_high : 4;
+ uint64_t available : 1; // ???
+ uint64_t long_mode : 1;
+ uint64_t x32 : 1;
+ uint64_t gran : 1; // 1 - 4kb, 0 - 1b
+ uint64_t base_high : 8;
+} __attribute__((packed));
+
+struct tss_entry {
+ uint32_t reserved0;
+ uint64_t rsp[3];
+ uint64_t ist[8];
+ uint64_t reserved2;
+ uint16_t reserved3;
+ uint16_t iopb;
+} __attribute__((packed));
+
+struct lgdt_arg {
+ uint16_t limit;
+ uint32_t base;
+} __attribute__((packed));
+
+__attribute__((section(".shared")))
+struct gdt_entry GDT[SEG_end];
+__attribute__((section(".shared")))
+struct tss_entry TSS;
+
+struct lgdt_arg lgdt_arg;
+
+
+static void *memset32(void *s, int c, size_t n) {
+ uint8_t *s2 = s;
+ for (size_t i = 0; i < n; i++)
+ s2[i] = c;
+ return s;
+}
+
+
+static void gdt_fillout(struct gdt_entry* entry, uint8_t ring, bool code) {
+ *entry = (struct gdt_entry) {
+ // set up the identity mapping
+ .limit_low = 0xFFFF,
+ .limit_high = 0xF,
+ .gran = 1,
+ .base_low = 0,
+ .base_high = 0,
+
+ .ring = ring,
+ .code = code,
+
+ .accessed = 0,
+ .rw = 1,
+ .conforming = 0,
+ .codeordata = 1,
+ .present = 1,
+ .long_mode = 1,
+ .available = 1,
+ .x32 = 0,
+ };
+}
+
+void gdt_init(void) {
+ GDT[SEG_null].present = 0;
+
+ gdt_fillout(&GDT[SEG_r0code], 0, true);
+ gdt_fillout(&GDT[SEG_r0data], 0, false);
+ gdt_fillout(&GDT[SEG_r3code], 3, true);
+ gdt_fillout(&GDT[SEG_r3data], 3, false);
+
+ lgdt_arg.limit = sizeof(GDT) - 1;
+ lgdt_arg.base = (uint64_t) &GDT;
+
+
+ memset32(&TSS, 0, sizeof(TSS));
+ for (int i = 0; i < 3; i++)
+ TSS.rsp[i] = (uint64_t) &_isr_mini_stack;
+ TSS.ist[1] = (uint64_t) &_isr_mini_stack;
+
+ uint64_t tss_addr = (uint64_t) &TSS;
+ GDT[SEG_TSS] = (struct gdt_entry) {
+ .limit_low = sizeof(TSS),
+ .limit_high = sizeof(TSS) >> 16,
+ .gran = 0,
+ .base_low = tss_addr,
+ .base_high = tss_addr >> 24,
+
+ .accessed = 1,
+ .rw = 0,
+ .conforming = 0,
+ .code = 1,
+ .codeordata = 0,
+ .ring = 0, // was 3 pre-port
+ .present = 1,
+ .available = 1,
+ .long_mode = 0,
+ .x32 = 0,
+ };
+ *((uint64_t*)&GDT[SEG_TSS2]) = (tss_addr >> 32);
+}
diff --git a/src/kernel/arch/amd64/32/gdt.h b/src/kernel/arch/amd64/32/gdt.h
new file mode 100644
index 0000000..fbaf681
--- /dev/null
+++ b/src/kernel/arch/amd64/32/gdt.h
@@ -0,0 +1,17 @@
+#pragma once
+
+enum {
+ SEG_null,
+ // order dictated by SYSENTER
+ SEG_r0code,
+ SEG_r0data,
+ SEG_r3code,
+ SEG_r3data,
+ SEG_TSS,
+ SEG_TSS2,
+
+ SEG_end
+};
+
+void gdt_init(void);
+void gdt_farjump(int segment);
diff --git a/src/kernel/arch/amd64/32/paging.c b/src/kernel/arch/amd64/32/paging.c
new file mode 100644
index 0000000..975dd98
--- /dev/null
+++ b/src/kernel/arch/amd64/32/paging.c
@@ -0,0 +1,131 @@
+#include <stddef.h>
+#include <stdint.h>
+
+struct pml4e {
+ uint64_t present : 1;
+ uint64_t writeable : 1;
+ uint64_t user : 1;
+ uint64_t writethru : 1;
+
+ uint64_t uncached : 1;
+ uint64_t accessed : 1;
+ uint64_t _unused1 : 1;
+ uint64_t reserved : 1; // always 0
+
+ uint64_t _unused2 : 3;
+ uint64_t _unused3 : 1; // HLAT thing
+
+ uint64_t address : 40;
+
+ uint64_t _unused4 : 11;
+ uint64_t noexec : 1;
+} __attribute__((packed));
+
+struct pdpte { // page directory pointer table entry, 1gb page | 512 * pde
+ uint64_t present : 1;
+ uint64_t writeable : 1;
+ uint64_t user : 1;
+ uint64_t writethru : 1;
+
+ uint64_t uncached : 1;
+ uint64_t accessed : 1;
+ uint64_t _unused1 : 1;
+ uint64_t large : 1; // 1gb page
+
+ uint64_t _unused2 : 3;
+ uint64_t _unused3 : 1; // HLAT
+
+ uint64_t address : 40;
+
+ uint64_t _unused4 : 11;
+ uint64_t noexec : 1;
+} __attribute__((packed));
+
+struct pde { // page directory entry, 2mb page | 512 * pte
+ uint64_t present : 1;
+ uint64_t writeable : 1;
+ uint64_t user : 1;
+ uint64_t writethru : 1;
+
+ uint64_t uncached : 1;
+ uint64_t accessed : 1;
+ uint64_t dirty : 1; // only if large
+ uint64_t large : 1; // 2mb
+
+ uint64_t global : 1; // only if large ; TODO enable CR4.PGE
+ uint64_t _unused2 : 2;
+ uint64_t _unused3 : 1; // HLAT
+
+ uint64_t address : 40; // highest bit - PAT
+
+ uint64_t _unused4 : 7;
+ uint64_t pke : 4;
+ uint64_t noexec : 1;
+} __attribute__((packed));
+
+struct pte { // page table entry, 4kb page
+ uint64_t present : 1;
+ uint64_t writeable : 1;
+ uint64_t user : 1;
+ uint64_t writethru : 1;
+
+ uint64_t uncached : 1;
+ uint64_t accessed : 1;
+ uint64_t dirty : 1;
+ uint64_t pat : 1;
+
+ uint64_t global : 1; // TODO enable CR4.PGE
+ uint64_t _unused2 : 2;
+ uint64_t _unused3 : 1; // HLAT
+
+ uint64_t address : 40;
+
+ uint64_t _unused4 : 7;
+ uint64_t pke : 4;
+ uint64_t noexec : 1;
+} __attribute__((packed));
+
+__attribute__((aligned(4096)))
+struct pml4e pml4_identity[512];
+
+__attribute__((aligned(4096)))
+struct pdpte pdpte_low[512]; // 0-512gb
+
+__attribute__((aligned(4096)))
+struct pde pde_low[512]; // 0-1gb
+
+
+static void *memset32(void *s, int c, size_t n) {
+ uint8_t *s2 = s;
+ for (size_t i = 0; i < n; i++)
+ s2[i] = c;
+ return s;
+}
+
+
+void pml4_identity_init(void) {
+ memset32(pml4_identity, 0, sizeof pml4_identity);
+ memset32(pdpte_low, 0, sizeof pdpte_low);
+ memset32(pde_low, 0, sizeof pde_low);
+
+ pml4_identity[0] = (struct pml4e) {
+ .present = 1,
+ .writeable = 1,
+ .address = ((uintptr_t)pdpte_low) >> 12,
+ };
+
+ pdpte_low[0] = (struct pdpte) {
+ .present = 1,
+ .writeable = 1,
+ .address = ((uintptr_t)pde_low) >> 12,
+ };
+
+ for (int i = 0; i < 512; i++) {
+ pde_low[i] = (struct pde) {
+ .present = 1,
+ .writeable = 1,
+ .large = 1,
+ .address = (i * 2 * 1024 * 1024) >> 12,
+ };
+ }
+}