文章

MIT 6.S081 | 0x03 Page tables

MIT 6.S081 | 0x03 Page tables

Before you start coding, read Chapter 3 of the xv6 book, and related files:

  • kernel/memlayout.h, which captures the layout of memory.
  • kernel/vm.c, which contains most virtual memory (VM) code.
  • kernel/kalloc.c, which contains code for allocating and freeing physical memory.

It may also help to consult the RISC-V privileged architecture manual.

:green_square: Speed up system calls

When each process is created, map one read-only page at USYSCALL (a VA defined in memlayout.h). At the start of this page, store a struct usyscall (also defined in memlayout.h), and initialize it to store the PID of the current process. For this lab, ugetpid() has been provided on the userspace side and will automatically use the USYSCALL mapping. You will receive full credit for this part of the lab if the ugetpid test case passes when running pgtbltest.

分析

这个任务实际上是让我们为进程添加一个页表用来保存 usyscall 信息。通过完成这部分的内容,能够让我们知道给一个进程添加页表项需要哪些步骤:

  1. 首先,struct proc 中为 usyscall 维护一个指针
  2. allocproc 时为该页的指针分配空间
  3. 在页表中为 USYSCALL 添加对应的地址
  4. freeproc 时注意要释放空间

代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
diff --git a/kernel/proc.c b/kernel/proc.c
index 959b778..c76a405 100644
--- a/kernel/proc.c
+++ b/kernel/proc.c
@@ -132,6 +132,14 @@ found:
     return 0;
   }
 
+  if((p->us = (struct usyscall *)kalloc()) == 0){
+    freeproc(p);
+    release(&p->lock);
+    return 0;
+  }
+
+  p->us->pid = p->pid;
+
   // An empty user page table.
   p->pagetable = proc_pagetable(p);
   if(p->pagetable == 0){
@@ -160,6 +168,8 @@ freeproc(struct proc *p)
   p->trapframe = 0;
   if(p->pagetable)
     proc_freepagetable(p->pagetable, p->sz);
+  if(p->us)
+    kfree((void*)p->us);
   p->pagetable = 0;
   p->sz = 0;
   p->pid = 0;
@@ -202,6 +212,14 @@ proc_pagetable(struct proc *p)
     return 0;
   }
 
+  if(mappages(pagetable, USYSCALL, PGSIZE,
+              (uint64)(p->us), PTE_R | PTE_U) < 0) {
+    uvmunmap(pagetable, TRAMPOLINE, 1, 0);
+    uvmunmap(pagetable, TRAPFRAME, 1, 0);
+    uvmfree(pagetable, 0);
+    return 0;
+  }
+
   return pagetable;
 }
 
@@ -212,6 +230,7 @@ proc_freepagetable(pagetable_t pagetable, uint64 sz)
 {
   uvmunmap(pagetable, TRAMPOLINE, 1, 0);
   uvmunmap(pagetable, TRAPFRAME, 1, 0);
+  uvmunmap(pagetable, USYSCALL, 1, 0);
   uvmfree(pagetable, sz);
 }
 
diff --git a/kernel/proc.h b/kernel/proc.h
index d021857..adc2781 100644
--- a/kernel/proc.h
+++ b/kernel/proc.h
@@ -100,6 +100,7 @@ struct proc {
   uint64 sz;                   // Size of process memory (bytes)
   pagetable_t pagetable;       // User page table
   struct trapframe *trapframe; // data page for trampoline.S
+  struct usyscall *us;
   struct context context;      // swtch() here to run process
   struct file *ofile[NOFILE];  // Open files
   struct inode *cwd;           // Current directory

:green_square: Print a page table

Define a function called vmprint(). It should take a pagetable_t argument, and print that pagetable in the format described below. Insert if(p->pid==1) vmprint(p->pagetable) in exec.c just before the return argc, to print the first process’s page table. You receive full credit for this assignment if you pass the pte printout test of make grade.

分析

这一节的主要任务是实现 vmprint(),咱们狠狠地参考下 walk()freewalk() 这两个函数就可以了。因为页表是一个三级结构,这里就使用了 3 个 for 循环

代码

我这里还是写的麻烦了 :joy:,函数 printf 可以直接用 %p 以 16 进制打印指针的值,不用我再写一个函数来打印。可以看到下个任务我改过来了

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
diff --git a/kernel/defs.h b/kernel/defs.h
index a3c962b..487f4f2 100644
--- a/kernel/defs.h
+++ b/kernel/defs.h
@@ -173,6 +173,7 @@ uint64          walkaddr(pagetable_t, uint64);
 int             copyout(pagetable_t, uint64, char *, uint64);
 int             copyin(pagetable_t, char *, uint64, uint64);
 int             copyinstr(pagetable_t, char *, uint64, uint64);
+void            vmprint(pagetable_t);
 
 // plic.c
 void            plicinit(void);
diff --git a/kernel/exec.c b/kernel/exec.c
index e18bbb6..f62e22d 100644
--- a/kernel/exec.c
+++ b/kernel/exec.c
@@ -128,6 +128,8 @@ exec(char *path, char **argv)
   p->trapframe->sp = sp; // initial stack pointer
   proc_freepagetable(oldpagetable, oldsz);
 
+  if (p->pid == 1) vmprint(p->pagetable);
+
   return argc; // this ends up in a0, the first argument to main(argc, argv)
 
  bad:
diff --git a/kernel/vm.c b/kernel/vm.c
index 9f69783..dc8a8e1 100644
--- a/kernel/vm.c
+++ b/kernel/vm.c
@@ -437,3 +437,57 @@ copyinstr(pagetable_t pagetable, char *dst, uint64 srcva, uint64 max)
     return -1;
   }
 }
+
+void
+print_uint64(uint64 value) {
+  char buffer[17];
+  const char *hex_digits = "0123456789abcdef";
+  for (int i = 15; i >= 0; --i) {
+    buffer[i] = hex_digits[value & 0xf];
+    value >>= 4;
+  }
+  buffer[16] = '\0';
+  printf("%s", buffer);
+}
+
+void
+vmprintmeta(int level, uint64 va, pte_t *pte)
+{
+  for (int i = 2; i > level; --i) {
+    printf(".. ");
+  }
+  printf("..%d: pte 0x", va);
+  print_uint64(*pte);
+  printf(" pa 0x");
+  print_uint64(PTE2PA(*pte));
+  printf("\n");
+}
+
+void
+vmprint(pagetable_t pagetable)
+{
+  printf("page table 0x");
+  print_uint64((uint64)pagetable);
+  printf("\n");
+
+  for (uint64 va = 0; va < 512; ++va) {
+    pte_t *pte = &pagetable[PX(2, (va << 30))];
+    if (*pte & PTE_V) {
+      vmprintmeta(2, va, pte);
+      pagetable_t pg1 = (pagetable_t)PTE2PA(*pte);
+      for (uint64 va1 = 0; va1 < 512; ++va1) {
+        pte_t *pte1 = &pg1[PX(1, (va1 << 21))];
+        if (*pte1 & PTE_V) {
+          vmprintmeta(1, va1, pte1);
+          pagetable_t pg0 = (pagetable_t)PTE2PA(*pte1);
+          for (uint64 va0 = 0; va0 < 512; ++va0) {
+            pte_t *pte0 = &pg0[PX(0, (va0 << 12))];
+            if (*pte0 & PTE_V) {
+              vmprintmeta(0, va0, pte0);
+            }
+          }
+        }
+      }
+    }
+  }
+}

🟥 Detecting which pages have been accessed

Your job is to implement pgaccess(), a system call that reports which pages have been accessed. The system call takes three arguments. First, it takes the starting virtual address of the first user page to check. Second, it takes the number of pages to check. Finally, it takes a user address to a buffer to store the results into a bitmask (a datastructure that uses one bit per page and where the first page corresponds to the least significant bit). You will receive full credit for this part of the lab if the pgaccess test case passes when running pgtbltest.

分析

这个题目虽然标红,但是感觉也不是很难,主要目的还是让我们理解并感受页表中 Flags 的作用以及使用。PTE_A 位于页表项中 Flags 的第 6 位,在查询时使用函数 walk() 拿到这个 pagetable entry,然后判断一下就 ok 了

代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
diff --git a/kernel/riscv.h b/kernel/riscv.h
index 20a01db..100895e 100644
--- a/kernel/riscv.h
+++ b/kernel/riscv.h
@@ -343,6 +343,7 @@ typedef uint64 *pagetable_t; // 512 PTEs
 #define PTE_W (1L << 2)
 #define PTE_X (1L << 3)
 #define PTE_U (1L << 4) // user can access
+#define PTE_A (1L << 6)
 
 // shift a physical address to the right place for a PTE.
 #define PA2PTE(pa) ((((uint64)pa) >> 12) << 10)
diff --git a/kernel/sysproc.c b/kernel/sysproc.c
index 88644b2..28cad51 100644
--- a/kernel/sysproc.c
+++ b/kernel/sysproc.c
@@ -75,6 +75,31 @@ int
 sys_pgaccess(void)
 {
   // lab pgtbl: your code here.
+  uint64 buf;
+  int len;
+  uint64 mask;
+  pte_t *pte;
+  struct proc *p;
+  unsigned int abits = 0;
+
+  p = myproc();
+
+  argaddr(0, &buf);
+  argint(1, &len);
+  argaddr(2, &mask);
+
+  for (int i = 0 ; i < len; ++i) {
+    pte = walk(p->pagetable, (uint64)(buf + i * PGSIZE), 0);
+    if (*pte & PTE_A) {
+      abits |= (1 << i);
+    }
+    *pte &= ~PTE_A;
+  }
+
+  if (copyout(p->pagetable, mask, (char *)&abits, sizeof(abits)) < 0) {
+    return -1;
+  }
+
   return 0;
 }
 #endif
diff --git a/kernel/vm.c b/kernel/vm.c
index dc8a8e1..9d7ddb7 100644
--- a/kernel/vm.c
+++ b/kernel/vm.c
@@ -456,11 +456,7 @@ vmprintmeta(int level, uint64 va, pte_t *pte)
   for (int i = 2; i > level; --i) {
     printf(".. ");
   }
-  printf("..%d: pte 0x", va);
-  print_uint64(*pte);
-  printf(" pa 0x");
-  print_uint64(PTE2PA(*pte));
-  printf("\n");
+  printf("..%d: pte %p pa %p\n", va, *pte, PTE2PA(*pte));
 }
 
 void
本文由作者按照 CC BY 4.0 进行授权