MIT 6.S081 | 0x0a Mmap
You should implement enough mmap
and munmap
functionality to make the mmaptest
test program work. If mmaptest
doesn't use a mmap
feature, you don't need to implement that feature.
本节实验严重参考了 Millgon's blog 的做法
分析
之前都没怎么接触过这俩函数,于是问了下通义千问。
函数解释
mmap()
和 munmap()
是在Unix和类Unix操作系统(如Linux)中用于内存映射的系统调用。这些函数允许用户空间程序直接访问内核管理的内存或文件,提供了一种高效的数据访问机制。
mmap()
mmap()
函数的主要作用是将一个文件或其它对象映射到进程的地址空间中,这样文件的内容就可以像访问普通内存一样进行读写。当一个文件被映射后,对映射区域的任何修改都会反映到原始文件上,除非使用了某种特殊的映射标志来改变这种行为。
mmap()
的基本原型如下:
#include <sys/mman.h>
void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset);
参数解释:
addr
:建议的映射起始地址,通常设为NULL
让内核选择合适的地址。length
:要映射的字节数。prot
:页面保护标志,指定映射区域的权限,例如读、写等。flags
:映射类型和特殊标志,例如MAP_SHARED
(共享映射)或MAP_PRIVATE
(私有拷贝映射)。fd
:文件描述符,通常是通过open()
打开的文件。offset
:文件的偏移量,映射的起点位置。
munmap()
munmap()
函数的作用是取消之前通过mmap()
创建的内存映射。一旦调用munmap()
,对原来映射区域的访问将不再有效,且对该区域的任何引用都会导致段错误(Segmentation fault)。
munmap()
的基本原型如下:
#include <sys/mman.h>
int munmap(void *addr, size_t length);
参数解释:
addr
:映射区域的起始地址,这是mmap()
返回的地址。length
:映射区域的长度,与mmap()
中的length
相同。
这两个函数提供了非常强大的功能,可以用于实现高效的文件I/O、进程间的共享内存通信以及动态链接库的加载等场景。然而,由于它们直接操作内核资源,因此在使用时需要特别小心,确保正确的地址和长度被传递给函数,避免内存泄漏或其他潜在的错误。
实现思路
mmap()
是将一个文件映射到进程的地址空间,看一下进程在内存中的布局,这个映射的起始地址可以选为 TRAPEFRAME
之前的空闲区域。内存的映射和懒加载有点像 COW 那节实验做的事,mmap()
时只需要记录一下映射的信息,然后在缺页异常时再去加载文件内容。进程数据也需要记录一下这个映射信息,方便后续的各种操作。总结一下,需要做以下几个事情:
mmap()
:在proc
结构体中增加一个vma
数组,用于存储虚拟内存区域的信息。在sysfile.c
中实现sys_mmap()
函数,用于处理mmap()
系统调用。在trap.c
中处理缺页异常,实现懒加载。munmap()
:在sysfile.c
中实现sys_munmap()
函数,用于处理munmap()
系统调用。- 在
allocproc()
和freeproc()
中初始化和释放vma
。在fork()
中复制vma
。
代码
diff --git a/Makefile b/Makefile
index 9b83591..5f6af9c 100644
--- a/Makefile
+++ b/Makefile
@@ -188,6 +188,7 @@ UPROGS=\
$U/_grind\
$U/_wc\
$U/_zombie\
+ $U/_mmaptest\
diff --git a/kernel/defs.h b/kernel/defs.h
index a3c962b..14605aa 100644
--- a/kernel/defs.h
+++ b/kernel/defs.h
@@ -8,6 +8,7 @@ struct spinlock;
struct sleeplock;
struct stat;
struct superblock;
+struct vma;
// bio.c
void binit(void);
@@ -141,6 +142,10 @@ int fetchstr(uint64, char*, int);
int fetchaddr(uint64, uint64*);
void syscall();
+// sysfile.c
+int vmatryalloc(uint64 va);
+void vmaunmap(pagetable_t pagetable, uint64 va, uint64 sz, struct vma *v);
+
// trap.c
extern uint ticks;
void trapinit(void);
diff --git a/kernel/memlayout.h b/kernel/memlayout.h
index 776f98c..61c030c 100644
--- a/kernel/memlayout.h
+++ b/kernel/memlayout.h
@@ -65,3 +65,6 @@
// TRAPFRAME (p->trapframe, used by the trampoline)
// TRAMPOLINE (the same page as in the kernel)
#define TRAPFRAME (TRAMPOLINE - PGSIZE)
+
+// Used for virtual memory areas
+#define VMAEND TRAPFRAME
diff --git a/kernel/param.h b/kernel/param.h
index 6624bff..3b99e82 100644
--- a/kernel/param.h
+++ b/kernel/param.h
@@ -1,5 +1,6 @@
#define NPROC 64 // maximum number of processes
#define NCPU 8 // maximum number of CPUs
+#define NVMA 16 // maximum number of virtual memory areas
#define NOFILE 16 // open files per process
#define NFILE 100 // open files per system
#define NINODE 50 // maximum number of active i-nodes
diff --git a/kernel/proc.c b/kernel/proc.c
index 959b778..efd03c7 100644
--- a/kernel/proc.c
+++ b/kernel/proc.c
@@ -146,6 +146,10 @@ found:
p->context.ra = (uint64)forkret;
p->context.sp = p->kstack + PGSIZE;
+ for (int i = 0; i < NVMA; i++) {
+ p->vma[i].valid = 0;
+ }
+
return p;
}
@@ -158,6 +162,10 @@ freeproc(struct proc *p)
if(p->trapframe)
kfree((void*)p->trapframe);
p->trapframe = 0;
+ for (int i = 0; i < NVMA; i++) {
+ struct vma *v = &p->vma[i];
+ vmaunmap(p->pagetable, v->addr, v->sz, v);
+ }
if(p->pagetable)
proc_freepagetable(p->pagetable, p->sz);
p->pagetable = 0;
@@ -308,6 +316,14 @@ fork(void)
np->ofile[i] = filedup(p->ofile[i]);
np->cwd = idup(p->cwd);
+ for (i = 0; i < NVMA; i++) {
+ struct vma *v = &p->vma[i];
+ if (v->valid) {
+ np->vma[i] = *v;
+ filedup(v->f);
+ }
+ }
+
safestrcpy(np->name, p->name, sizeof(p->name));
pid = np->pid;
diff --git a/kernel/proc.h b/kernel/proc.h
index d021857..e1ff755 100644
--- a/kernel/proc.h
+++ b/kernel/proc.h
@@ -79,6 +79,18 @@ struct trapframe {
/* 280 */ uint64 t6;
};
+// Virtual memory areas
+struct vma {
+ int valid;
+
+ uint64 addr;
+ int sz;
+ struct file *f;
+ int prot;
+ int flags;
+ int offset;
+};
+
enum procstate { UNUSED, USED, SLEEPING, RUNNABLE, RUNNING, ZOMBIE };
// Per-process state
@@ -104,4 +116,5 @@ struct proc {
struct file *ofile[NOFILE]; // Open files
struct inode *cwd; // Current directory
char name[16]; // Process name (debugging)
+ struct vma vma[NVMA]; // Virual memory areas
};
diff --git a/kernel/riscv.h b/kernel/riscv.h
index 20a01db..3b14dca 100644
--- a/kernel/riscv.h
+++ b/kernel/riscv.h
@@ -343,6 +343,9 @@ typedef uint64 *pagetable_t; // 512 PTEs
#define PTE_W (1L << 2)
#define PTE_X (1L << 3)
#define PTE_U (1L << 4) // user can access
+#define PTE_G (1L << 5) // global mapping
+#define PTE_A (1L << 6) // accessed
+#define PTE_D (1L << 7) // dirty
// shift a physical address to the right place for a PTE.
#define PA2PTE(pa) ((((uint64)pa) >> 12) << 10)
diff --git a/kernel/syscall.c b/kernel/syscall.c
index ed65409..4fb9baa 100644
--- a/kernel/syscall.c
+++ b/kernel/syscall.c
@@ -101,6 +101,8 @@ extern uint64 sys_unlink(void);
extern uint64 sys_link(void);
extern uint64 sys_mkdir(void);
extern uint64 sys_close(void);
+extern uint64 sys_mmap(void);
+extern uint64 sys_munmap(void);
// An array mapping syscall numbers from syscall.h
// to the function that handles the system call.
@@ -126,6 +128,8 @@ static uint64 (*syscalls[])(void) = {
[SYS_link] sys_link,
[SYS_mkdir] sys_mkdir,
[SYS_close] sys_close,
+[SYS_mmap] sys_mmap,
+[SYS_munmap] sys_munmap,
};
void
diff --git a/kernel/syscall.h b/kernel/syscall.h
index bc5f356..e7b18d6 100644
--- a/kernel/syscall.h
+++ b/kernel/syscall.h
@@ -20,3 +20,5 @@
#define SYS_link 19
#define SYS_mkdir 20
#define SYS_close 21
+#define SYS_mmap 22
+#define SYS_munmap 23
diff --git a/kernel/sysfile.c b/kernel/sysfile.c
index 16b668c..849bdd2 100644
--- a/kernel/sysfile.c
+++ b/kernel/sysfile.c
@@ -15,6 +15,7 @@
#include "sleeplock.h"
#include "file.h"
#include "fcntl.h"
+#include "memlayout.h"
// Fetch the nth word-sized system call argument as a file descriptor
// and return both the descriptor and the corresponding struct file.
@@ -503,3 +504,180 @@ sys_pipe(void)
}
return 0;
}
+
+uint64
+sys_mmap(void)
+{
+ uint64 addr;
+ int sz, prot, flags, fd, offset;
+ struct file *f;
+ struct proc *p = myproc();
+
+ argaddr(0, &addr);
+ argint(1, &sz);
+ argint(2, &prot);
+ argint(3, &flags);
+ if (argfd(4, &fd, &f) < 0)
+ return -1;
+ argint(5, &offset);
+
+ if ((!f->readable && (prot & PROT_READ)) || (!f->writable && (prot & PROT_WRITE) && !(flags & MAP_PRIVATE)))
+ return -1;
+
+ sz = PGROUNDUP(sz);
+
+ struct vma* v = 0;
+
+ uint64 vmaend = VMAEND;
+
+ for (int i = 0; i < NVMA; i++) {
+ if (!p->vma[i].valid) {
+ v = &p->vma[i];
+ v->valid = 1;
+ break;
+ }
+ if (p->vma[i].addr < vmaend) {
+ vmaend = PGROUNDDOWN(p->vma[i].addr);
+ }
+ }
+
+ if (v == 0)
+ panic("mmap: no free space.");
+
+
+ v->addr = vmaend - sz;
+ v->sz = sz;
+ v->prot = prot;
+ v->flags = flags;
+ v->f = f;
+ v->offset = offset;
+
+ filedup(f);
+
+ return v->addr;
+}
+
+uint64
+sys_munmap(void)
+{
+ uint64 addr;
+ int sz;
+
+ argaddr(0, &addr);
+ argint(1, &sz);
+
+ struct proc *p = myproc();
+ struct vma *v = 0;
+ for (int i = 0; i < NVMA; i++) {
+ if (p->vma[i].valid && p->vma[i].addr <= addr && addr < p->vma[i].addr + p->vma[i].sz) {
+ v = &p->vma[i];
+ break;
+ }
+ }
+
+ if (v == 0) {
+ return -1;
+ }
+
+ if (addr > v->addr && addr + sz < v->addr + v->sz) {
+ return -1;
+ }
+
+ uint64 aaddr = addr;
+ if (addr > v->addr) {
+ aaddr = PGROUNDUP(addr);
+ }
+
+ int nbytes = sz - (aaddr - addr); // bytes to unmap
+ if (nbytes < 0) {
+ nbytes = 0;
+ }
+
+ vmaunmap(p->pagetable, aaddr, nbytes, v);
+
+ if (addr <= v->addr && addr + sz > v->addr) {
+ v->offset += addr + sz - v->addr;
+ v->addr = addr + sz;
+ }
+ v->sz -= sz;
+
+ if (v->sz <= 0) {
+ fileclose(v->f);
+ v->valid = 0;
+ }
+
+ return 0;
+}
+
+int
+vmatryalloc(uint64 va) {
+ struct proc *p = myproc();
+ struct vma *v = 0;
+
+ for (int i = 0; i < NVMA; i++) {
+ if (p->vma[i].valid && p->vma[i].addr <= va && va < p->vma[i].addr + p->vma[i].sz) {
+ v = &p->vma[i];
+ break;
+ }
+ }
+
+ if (v == 0) {
+ printf("vmatryalloc(): cannot find vma contain specific addr %p\n", va);
+ return -1;
+ }
+
+ void *pa = kalloc();
+ if (pa == 0)
+ panic("vmatryalloc(): kalloc");
+ memset(pa, 0, PGSIZE);
+
+ begin_op();
+ ilock(v->f->ip);
+ readi(v->f->ip, 0, (uint64)pa, v->offset + PGROUNDDOWN(va - v->addr), PGSIZE);
+ iunlock(v->f->ip);
+ end_op();
+
+ if (mappages(p->pagetable, va, PGSIZE, (uint64)pa, PTE_R | PTE_W | PTE_U) < 0)
+ panic("vmatryalloc(): mappages");
+
+ return 0;
+}
+
+void
+vmaunmap(pagetable_t pagetable, uint64 va, uint64 nbytes, struct vma *v)
+{
+ uint64 a;
+ pte_t *pte;
+
+ if((va % PGSIZE) != 0)
+ panic("vmaunmap: not aligned");
+
+ for(a = va; a < va + nbytes; a += PGSIZE){
+ if((pte = walk(pagetable, a, 0)) == 0)
+ continue;
+ if(PTE_FLAGS(*pte) == PTE_V)
+ panic("uvmunmap: not a leaf");
+ if(*pte & PTE_V) {
+ uint64 pa = PTE2PA(*pte);
+ if ((*pte & PTE_D) && (v->flags & MAP_SHARED)) {
+ begin_op();
+ ilock(v->f->ip);
+ uint64 off = a - v->addr;
+ if (off < 0) {
+ // the first page is not full
+ writei(v->f->ip, 0, pa + (-off), v->offset, PGSIZE + off);
+ } else if (off + PGSIZE > v->sz) {
+ // the last page is not full
+ writei(v->f->ip, 0, pa, v->offset + off, v->sz - off);
+ } else {
+ // full page
+ writei(v->f->ip, 0, pa, v->offset + off, PGSIZE);
+ }
+ iunlock(v->f->ip);
+ end_op();
+ }
+ kfree((void*)pa);
+ *pte = 0;
+ }
+ }
+}
\ No newline at end of file
diff --git a/kernel/trap.c b/kernel/trap.c
index 512c850..263726e 100644
--- a/kernel/trap.c
+++ b/kernel/trap.c
@@ -67,6 +67,11 @@ usertrap(void)
syscall();
} else if((which_dev = devintr()) != 0){
// ok
+ } else if(r_scause() == 13 || r_scause() == 15) {
+ if (vmatryalloc(r_stval()) != 0) {
+ printf("usertrap(): vma lazy allocation error!\n");
+ setkilled(p);
+ }
} else {
printf("usertrap(): unexpected scause %p pid=%d\n", r_scause(), p->pid);
printf(" sepc=%p stval=%p\n", r_sepc(), r_stval());
diff --git a/user/user.h b/user/user.h
index 4d398d5..eaf7e4e 100644
--- a/user/user.h
+++ b/user/user.h
@@ -22,6 +22,8 @@ int getpid(void);
char* sbrk(int);
int sleep(int);
int uptime(void);
+void* mmap(const void*, int, int, int, int, int);
+int munmap(const void*, int);
// ulib.c
int stat(const char*, struct stat*);
diff --git a/user/usys.pl b/user/usys.pl
index 01e426e..d23b9cc 100755
--- a/user/usys.pl
+++ b/user/usys.pl
@@ -36,3 +36,5 @@ entry("getpid");
entry("sbrk");
entry("sleep");
entry("uptime");
+entry("mmap");
+entry("munmap");