MIT 6.S081 | 0x02 System calls

2023-12-02

Before you start coding, read Chapter 2 of the xv6 book, and Sections 4.3 and 4.4 of Chapter 4, and related source files:

  • The user-space code for systems calls is in user/user.h and user/usys.pl.
  • The kernel-space code is kernel/syscall.h, kernel/syscall.c.
  • The process-related code is kernel/proc.h and kernel/proc.c.

🟦 System call tracing

In this assignment you will add a system call tracing feature that may help you when debugging later labs. You'll create a new trace system call that will control tracing. It should take one argument, an integer "mask", whose bits specify which system calls to trace. For example, to trace the fork system call, a program calls trace(1 << SYS_fork), where SYS_fork is a syscall number from kernel/syscall.h. You have to modify the xv6 kernel to print out a line when each system call is about to return, if the system call's number is set in the mask. The line should contain the process id, the name of the system call and the return value; you don't need to print the system call arguments. The trace system call should enable tracing for the process that calls it and any children that it subsequently forks, but should not affect other processes.

分析

这个任务的主要目的是让我们学习如何创建一个系统调用,同时通过实现 trace 这个调用让我们理解系统调用的整体流程。代码量不大,但也足够精巧。根据实验提示和简单的代码阅读,我们可以知道在系统调用时操作系统大体做了如下几件事:

  1. 触发中断,执行 syscall() 函数(kernel/trap.c:67

  2. syscall() 函数(kernel/syscall.c:162)根据中断号从中断表(kernel/syscall.c:109)中找到对应的函数入口执行对应的系统调用

    void
    syscall(void)
    {
      int num;
      struct proc *p = myproc();
    
      num = p->trapframe->a7;
      if(num > 0 && num < NELEM(syscalls) && syscalls[num]) {
        // Use num to lookup the system call function for num, call it,
        // and store its return value in p->trapframe->a0
        p->trapframe->a0 = syscalls[num]();
      } else {
        printf("%d %s: unknown sys call %d\n",
                p->pid, p->name, num);
        p->trapframe->a0 = -1;
      }
    }
    

接下来的思路就很直接,要想 trace 系统调用,只需要在执行 syscall 时输出相应的调用名即可。现在剩下几个步骤/小问题:

  1. 题目中给了提示,要我们在 struct proc 中维护一个 mask 来使用 trace 功能。这个 mask 在创建进程时初始化,在 syscall 时如果为零则不打印相关信息,否则判断当前的系统调用号是否在 mask 中来判断是否打印信息
  2. fork 后的子进程是否进行 trace 由其父进程决定。所以在创建新进程时,只需将父进程的 mask 赋值给子进程
  3. 这个 mask 是不需要重新设为 0 的,因为 trace 只对目标进程设置 mask,该进程结束时也不会对其他进程产生影响

代码

diff --git a/Makefile b/Makefile
index ded5bc2..97e2954 100644
--- a/Makefile
+++ b/Makefile
@@ -184,6 +184,7 @@ UPROGS=\
  $U/_rm\
  $U/_sh\
  $U/_stressfs\
+ $U/_trace\
  $U/_usertests\
  $U/_grind\
  $U/_wc\
diff --git a/kernel/proc.c b/kernel/proc.c
index 959b778..d7cefdb 100644
--- a/kernel/proc.c
+++ b/kernel/proc.c
@@ -124,6 +124,7 @@ allocproc(void)
 found:
   p->pid = allocpid();
   p->state = USED;
+  p->tracemask = 0;
 
   // Allocate a trapframe page.
   if((p->trapframe = (struct trapframe *)kalloc()) == 0){
@@ -312,6 +313,8 @@ fork(void)
 
   pid = np->pid;
 
+  np->tracemask = p->tracemask;
+
   release(&np->lock);
 
   acquire(&wait_lock);
diff --git a/kernel/proc.h b/kernel/proc.h
index d021857..aeb6cf0 100644
--- a/kernel/proc.h
+++ b/kernel/proc.h
@@ -91,6 +91,7 @@ struct proc {
   int killed;                  // If non-zero, have been killed
   int xstate;                  // Exit status to be returned to parent's wait
   int pid;                     // Process ID
+  int tracemask;
 
   // wait_lock must be held when using this:
   struct proc *parent;         // Parent process
diff --git a/kernel/syscall.c b/kernel/syscall.c
index ed65409..873793a 100644
--- a/kernel/syscall.c
+++ b/kernel/syscall.c
@@ -101,6 +101,7 @@ extern uint64 sys_unlink(void);
 extern uint64 sys_link(void);
 extern uint64 sys_mkdir(void);
 extern uint64 sys_close(void);
+extern uint64 sys_trace(void);
 
 // An array mapping syscall numbers from syscall.h
 // to the function that handles the system call.
@@ -126,6 +127,32 @@ static uint64 (*syscalls[])(void) = {
 [SYS_link]    sys_link,
 [SYS_mkdir]   sys_mkdir,
 [SYS_close]   sys_close,
+[SYS_trace]   sys_trace,
+};
+
+static const char* sysnames[] = {
+[SYS_fork]    "fork",
+[SYS_exit]    "exit",
+[SYS_wait]    "wait",
+[SYS_pipe]    "pipe",
+[SYS_read]    "read",
+[SYS_kill]    "kill",
+[SYS_exec]    "exec",
+[SYS_fstat]   "fstat",
+[SYS_chdir]   "chdir",
+[SYS_dup]     "dup",
+[SYS_getpid]  "getpid",
+[SYS_sbrk]    "sbrk",
+[SYS_sleep]   "sleep",
+[SYS_uptime]  "uptime",
+[SYS_open]    "open",
+[SYS_write]   "write",
+[SYS_mknod]   "mknod",
+[SYS_unlink]  "unlink",
+[SYS_link]    "link",
+[SYS_mkdir]   "mkdir",
+[SYS_close]   "close",
+[SYS_trace]   "trace",
 };
 
 void
@@ -139,6 +166,11 @@ syscall(void)
     // Use num to lookup the system call function for num, call it,
     // and store its return value in p->trapframe->a0
     p->trapframe->a0 = syscalls[num]();
+    if ((1 << num) & p->tracemask) {
+      int pid = p->pid;
+      int ret = p->trapframe->a0;
+      printf("%d: syscall %s -> %d\n", pid, sysnames[num], ret);
+    }
   } else {
     printf("%d %s: unknown sys call %d\n",
             p->pid, p->name, num);
diff --git a/kernel/syscall.h b/kernel/syscall.h
index bc5f356..cc112b9 100644
--- a/kernel/syscall.h
+++ b/kernel/syscall.h
@@ -20,3 +20,4 @@
 #define SYS_link   19
 #define SYS_mkdir  20
 #define SYS_close  21
+#define SYS_trace  22
diff --git a/kernel/sysproc.c b/kernel/sysproc.c
index 3b4d5bd..ce17950 100644
--- a/kernel/sysproc.c
+++ b/kernel/sysproc.c
@@ -11,6 +11,7 @@ sys_exit(void)
 {
   int n;
   argint(0, &n);
+  myproc()->tracemask = 0;
   exit(n);
   return 0;  // not reached
 }
@@ -91,3 +92,16 @@ sys_uptime(void)
   release(&tickslock);
   return xticks;
 }
+
+uint64
+sys_trace(void)
+{
+  int mask;
+
+  argint(0, &mask);
+
+  // TODO
+  myproc()->tracemask = mask;
+
+  return 0;
+}
diff --git a/user/user.h b/user/user.h
index 4d398d5..0bf4333 100644
--- a/user/user.h
+++ b/user/user.h
@@ -22,6 +22,7 @@ int getpid(void);
 char* sbrk(int);
 int sleep(int);
 int uptime(void);
+int trace(int);
 
 // ulib.c
 int stat(const char*, struct stat*);
diff --git a/user/usys.pl b/user/usys.pl
index 01e426e..9c97b05 100755
--- a/user/usys.pl
+++ b/user/usys.pl
@@ -36,3 +36,4 @@ entry("getpid");
 entry("sbrk");
 entry("sleep");
 entry("uptime");
+entry("trace");

🟦 Sysinfo

In this assignment you will add a system call, sysinfo, that collects information about the running system. The system call takes one argument: a pointer to a struct sysinfo (see kernel/sysinfo.h). The kernel should fill out the fields of this struct: the freemem field should be set to the number of bytes of free memory, and the nproc field should be set to the number of processes whose state is not UNUSED. We provide a test program sysinfotest; you pass this assignment if it prints "sysinfotest: OK".

分析

简单来说,这个问题就是让我们在调用 sysinfo 时从 kernel mode 向 user mode 复制相关的系统信息:freemem 以及 nproc

  • sysinfo 这个系统调用和前面的 trace 一样,先添加系统调用号、再往调用表中添加函数入口 balabala。函数体 sys_sysinfo 的位置我放到 kernel/sys_file.c 里面了,同时将 getfreememgetnproc 这两个函数签名在这个文件用 extern 标识了一下。sys_sysinfo 的执行倒是简单,使用 copyout 将两个 getxxx 得到的值复制到用户空间即可

    uint64
    sys_sysinfo(void)
    {
      uint64 pinfo;
      argaddr(0, &pinfo);
    
      struct sysinfo info;
      info.nproc = getnproc();
      info.freemem = getfreemem();
    
      if (copyout(myproc()->pagetable, pinfo, (char*)&info, sizeof(info)) < 0) {
        return -1;
      }
    
      return 0;
    }
    
  • getfreemem 添加到 kernel/kalloc.c 中。为了记录空闲空间,需要添加一个全局变量 freemem 记录当前的空闲空间大小。简单分析一遍相关代码,其实可以看到 kernel/kalloc.c 这个文件中,分配和释放空间的函数分别只有 kallockfree,所以 freemem 这个变量只需要在这两个函数中进行修改

    这里只有一个坑,kalloc 在判断有空闲空间后会 “fill with junk”,只有这个时候才能修改 freemem 的值

  • getnprocgetfreemem 差不多,只需要在 allocprocfreeproc 时修改 nproc 的值即可

代码

才发现我把 4096 写死了 😂,这里建议用 PGSIZE

diff --git a/Makefile b/Makefile
index 97e2954..eb6f06a 100644
--- a/Makefile
+++ b/Makefile
@@ -184,6 +184,7 @@ UPROGS=\
  $U/_rm\
  $U/_sh\
  $U/_stressfs\
+ $U/_sysinfotest\
  $U/_trace\
  $U/_usertests\
  $U/_grind\
diff --git a/answers-syscall.txt b/answers-syscall.txt
new file mode 100644
index 0000000..62a5ecc
--- /dev/null
+++ b/answers-syscall.txt
@@ -0,0 +1 @@
+There is nothing
\ No newline at end of file
diff --git a/kernel/kalloc.c b/kernel/kalloc.c
index 0699e7e..da31ea7 100644
--- a/kernel/kalloc.c
+++ b/kernel/kalloc.c
@@ -23,6 +23,8 @@ struct {
   struct run *freelist;
 } kmem;
 
+uint64 freemem = 0;
+
 void
 kinit()
 {
@@ -59,6 +61,7 @@ kfree(void *pa)
   acquire(&kmem.lock);
   r->next = kmem.freelist;
   kmem.freelist = r;
+  freemem += 4096;
   release(&kmem.lock);
 }
 
@@ -76,7 +79,15 @@ kalloc(void)
     kmem.freelist = r->next;
   release(&kmem.lock);
 
-  if(r)
+  if(r) {
     memset((char*)r, 5, PGSIZE); // fill with junk
+    freemem -= 4096;
+  }
   return (void*)r;
 }
+
+uint64
+getfreemem()
+{
+  return freemem;
+}
diff --git a/kernel/proc.c b/kernel/proc.c
index d7cefdb..227ccce 100644
--- a/kernel/proc.c
+++ b/kernel/proc.c
@@ -12,6 +12,8 @@ struct proc proc[NPROC];
 
 struct proc *initproc;
 
+int nproc = 0;
+
 int nextpid = 1;
 struct spinlock pid_lock;
 
@@ -89,6 +91,12 @@ myproc(void)
   return p;
 }
 
+int
+getnproc(void)
+{
+  return nproc;
+}
+
 int
 allocpid()
 {
@@ -147,6 +155,8 @@ found:
   p->context.ra = (uint64)forkret;
   p->context.sp = p->kstack + PGSIZE;
 
+  nproc++;
+
   return p;
 }
 
@@ -170,6 +180,7 @@ freeproc(struct proc *p)
   p->killed = 0;
   p->xstate = 0;
   p->state = UNUSED;
+  nproc--;
 }
 
 // Create a user page table for a given process, with no user memory,
diff --git a/kernel/syscall.c b/kernel/syscall.c
index 873793a..1ab2b3a 100644
--- a/kernel/syscall.c
+++ b/kernel/syscall.c
@@ -102,6 +102,7 @@ extern uint64 sys_link(void);
 extern uint64 sys_mkdir(void);
 extern uint64 sys_close(void);
 extern uint64 sys_trace(void);
+extern uint64 sys_sysinfo(void);
 
 // An array mapping syscall numbers from syscall.h
 // to the function that handles the system call.
@@ -128,6 +129,7 @@ static uint64 (*syscalls[])(void) = {
 [SYS_mkdir]   sys_mkdir,
 [SYS_close]   sys_close,
 [SYS_trace]   sys_trace,
+[SYS_sysinfo] sys_sysinfo,
 };
 
 static const char* sysnames[] = {
@@ -153,6 +155,7 @@ static const char* sysnames[] = {
 [SYS_mkdir]   "mkdir",
 [SYS_close]   "close",
 [SYS_trace]   "trace",
+[SYS_sysinfo] "sysinfo",
 };
 
 void
diff --git a/kernel/syscall.h b/kernel/syscall.h
index cc112b9..0dfedc7 100644
--- a/kernel/syscall.h
+++ b/kernel/syscall.h
@@ -21,3 +21,4 @@
 #define SYS_mkdir  20
 #define SYS_close  21
 #define SYS_trace  22
+#define SYS_sysinfo 23
diff --git a/kernel/sysfile.c b/kernel/sysfile.c
index 16b668c..b675a82 100644
--- a/kernel/sysfile.c
+++ b/kernel/sysfile.c
@@ -15,6 +15,10 @@
 #include "sleeplock.h"
 #include "file.h"
 #include "fcntl.h"
+#include "sysinfo.h"
+
+extern uint64 getnproc();
+extern uint64 getfreemem();
 
 // Fetch the nth word-sized system call argument as a file descriptor
 // and return both the descriptor and the corresponding struct file.
@@ -119,6 +123,23 @@ sys_fstat(void)
   return filestat(f, st);
 }
 
+uint64
+sys_sysinfo(void)
+{
+  uint64 pinfo;
+  argaddr(0, &pinfo);
+
+  struct sysinfo info;
+  info.nproc = getnproc();
+  info.freemem = getfreemem();
+
+  if (copyout(myproc()->pagetable, pinfo, (char*)&info, sizeof(info)) < 0) {
+    return -1;
+  }
+
+  return 0;
+}
+
 // Create the path new as a link to the same inode as old.
 uint64
 sys_link(void)
diff --git a/time.txt b/time.txt
new file mode 100644
index 0000000..e440e5c
--- /dev/null
+++ b/time.txt
@@ -0,0 +1 @@
+3
\ No newline at end of file
diff --git a/user/user.h b/user/user.h
index 0bf4333..14f69e5 100644
--- a/user/user.h
+++ b/user/user.h
@@ -1,4 +1,5 @@
 struct stat;
+struct sysinfo;
 
 // system calls
 int fork(void);
@@ -23,6 +24,7 @@ char* sbrk(int);
 int sleep(int);
 int uptime(void);
 int trace(int);
+int sysinfo(struct sysinfo*);
 
 // ulib.c
 int stat(const char*, struct stat*);
diff --git a/user/usys.pl b/user/usys.pl
index 9c97b05..bc109fd 100755
--- a/user/usys.pl
+++ b/user/usys.pl
@@ -37,3 +37,4 @@ entry("sbrk");
 entry("sleep");
 entry("uptime");
 entry("trace");
+entry("sysinfo");