走~走走~走走走走​🚶‍♂️🚶‍♂️🚶‍♂️

MIT 6.S081 Lab4-6 trap & Lazy & COW


这下大鸽子了,隔了一个月才写了这点。但不会完全鸽的。尽量在寒假前完成绝大部分lab。

或许我也想稍微摆点烂,因此把2021删掉的lab lazy也拿来做做拖时间

Lab Trap(2021)

课程

基本概念

  1. 用户态中,RISC-V有32个用户寄存器以及SATP寄存器(User page table),trap需要将这些寄存器保存,并切换SATP到kernel page table。
  2. 内核态的切换中将会使用以下寄存器
  • stvec:trap handler的位置
  • sepc:保存用户的pc寄存器,sret等需要用到。
  • scause:reason for the trap
  • sstatus:其中,SIE bit表示device interrupts的enable;而SPP表示目前处于那个模式。
  1. 基本的trap过程:
    trap
    + start executing at the new pc.
  2. supervisor mode:实际上,supervisor mode可做的事很少。其相比于User mode,可以读写控制寄存器(也就是satp,stvec这些),以及使用PTE_U标志位为0的PTE,除此之外便无法进行,读取地址也只能依靠kernel page table。

uservec以及之前

该trap为用户态系统调用等。
例如,用户态中通过shell调用write的时候会有:

write riscv

其便会在a7寄存器寄存值,调用系统调用;且在其他寄存器配置write的基本参数,ecall进入uservec(trampoline)中。此时,ecall做了三件事:

  1. user mode-> supervisor mode
  2. pc-> sepc
  3. jump to stvec(此处为trampoline的起始地址,也就是uservec).

此时,需要uservec做以下事:

  1. 保存32个用户寄存器
  2. 切换kernel page table
  3. 创建或找到kernel stack,改变sp。
  4. jump to kernel C code(usertrap)

以上步骤均通过uservec的汇编代码执行。需要注意的是,trampoline和trapframe(用户寄存器以及kernel寄存器保存处)在user 和kernel page table中都是固定的,这防止了因ecall导致的可能的页表错误。

汇编代码中的sscratch用于移除a0,让接下来的保存寄存器等操作可以通过a0实现。

usertrap

该函数如下所示:

void
usertrap(void)
{
  int which_dev = 0;
  if((r_sstatus() & SSTATUS_SPP) != 0)
    panic("usertrap: not from user mode");

  // send interrupts and exceptions to kerneltrap(),
  // since we're now in the kernel.
  w_stvec((uint64)kernelvec);

  struct proc *p = myproc();
  
  // save user program counter.
  p->trapframe->epc = r_sepc();
  
  if(r_scause() == 8){
    // system call

    if(p->killed)
      exit(-1);

    // sepc points to the ecall instruction,
    // but we want to return to the next instruction.
    p->trapframe->epc += 4;

    // an interrupt will change sstatus &c registers,
    // so don't enable until done with those registers.
    intr_on();

    syscall();
  } else if((which_dev = devintr()) != 0){
    // ok
  } else {
    printf("usertrap(): unexpected scause %p pid=%d\n", r_scause(), p->pid);
    printf("            sepc=%p stval=%p\n", r_sepc(), r_stval());
    p->killed = 1;
  }

  if(p->killed)
    exit(-1);

  // give up the CPU if this is a timer interrupt.
  if(which_dev == 2)
    yield();

  usertrapret();
}

感觉还是很好理解的。其做了以下事情:

  1. 检查SPP等。
  2. 在内核进行任何操作之前,写入kernelvec,使得若发生kernel trap(一般而言就是exception)跳转到kernelvec。
  3. 保存用户程序计数器(防止切换改变SEPC)
  4. 根据不同情况,进行不同的处理。

usertrapret

void
usertrapret(void)
{
  struct proc *p = myproc();

  // we're about to switch the destination of traps from
  // kerneltrap() to usertrap(), so turn off interrupts until
  // we're back in user space, where usertrap() is correct.
  intr_off();

  // send syscalls, interrupts, and exceptions to trampoline.S
  w_stvec(TRAMPOLINE + (uservec - trampoline));

  // set up trapframe values that uservec will need when
  // the process next re-enters the kernel.
  p->trapframe->kernel_satp = r_satp();         // kernel page table
  p->trapframe->kernel_sp = p->kstack + PGSIZE; // process's kernel stack
  p->trapframe->kernel_trap = (uint64)usertrap;
  p->trapframe->kernel_hartid = r_tp();         // hartid for cpuid()

  // set up the registers that trampoline.S's sret will use
  // to get to user space.
  
  // set S Previous Privilege mode to User.
  unsigned long x = r_sstatus();
  x &= ~SSTATUS_SPP; // clear SPP to 0 for user mode
  x |= SSTATUS_SPIE; // enable interrupts in user mode
  w_sstatus(x);

  // set S Exception Program Counter to the saved user pc.
  w_sepc(p->trapframe->epc);

  // tell trampoline.S the user page table to switch to.
  uint64 satp = MAKE_SATP(p->pagetable);

  // jump to trampoline.S at the top of memory, which 
  // switches to the user page table, restores user registers,
  // and switches to user mode with sret.
  uint64 fn = TRAMPOLINE + (userret - trampoline);
  ((void (*)(uint64,uint64))fn)(TRAPFRAME, satp);
}

嗯哼,感觉饥初始写的也很明白(
这里可以说的就是,trampoline中的SEPC实际上可以不用在usertrap里保存,这里的保存顺序是没有必然联系的。

userret

汇编代码,其做了以下事:

  1. 切换 page table。
  2. restore user register。
  3. a0改为return的返回值(sscratch)
  4. sret

sret执行以下部分:

  1. 切换回user mode
  2. SPEC保存至PC寄存器
  3. 重新打开中断

以上过程中,xv6尽可能地使系统调用看上去像是函数,但实际上是需要一个层级的调用过程。

kernel trap

该部分。。其实和user trap差不多,但简单一点。

kernelvec只在栈里保存和恢复寄存器,而kerneltrap处理interrupts(time)和exception。

RISC-V assembly (easy)

这部分是简单的对着代码进行分析,让学生熟悉RISC-V的。但xv6的RISC-V并不用完整写一个程序,所以我这种完全不会RISC-V的也可以上手。

摆一张图:

RISC-V registers

用别人图床不也挺好吗

分析的代码部分:

user/_call:     file format elf64-littleriscv

Disassembly of section .text:

0000000000000000 <g>:
#include "kernel/param.h"
#include "kernel/types.h"
#include "kernel/stat.h"
#include "user/user.h"

int g(int x) {
   0:	1141                	addi	sp,sp,-16
   2:	e422                	sd	s0,8(sp)
   4:	0800                	addi	s0,sp,16
  return x+3;
}
   6:	250d                	addiw	a0,a0,3
   8:	6422                	ld	s0,8(sp)
   a:	0141                	addi	sp,sp,16
   c:	8082                	ret

000000000000000e <f>:

int f(int x) {
   e:	1141                	addi	sp,sp,-16
  10:	e422                	sd	s0,8(sp)
  12:	0800                	addi	s0,sp,16
  return g(x);
}
  14:	250d                	addiw	a0,a0,3
  16:	6422                	ld	s0,8(sp)
  18:	0141                	addi	sp,sp,16
  1a:	8082                	ret

000000000000001c <main>:

void main(void) {
  1c:	1141                	addi	sp,sp,-16
  1e:	e406                	sd	ra,8(sp)
  20:	e022                	sd	s0,0(sp)
  22:	0800                	addi	s0,sp,16
  printf("%d %d\n", f(8)+1, 13);
  24:	4635                	li	a2,13
  26:	45b1                	li	a1,12
  28:	00000517          	auipc	a0,0x0
  2c:	7c050513          	addi	a0,a0,1984 # 7e8 <malloc+0xea>
  30:	00000097          	auipc	ra,0x0
  34:	610080e7          	jalr	1552(ra) # 640 <printf>
  exit(0);
  38:	4501                	li	a0,0
  3a:	00000097          	auipc	ra,0x0
  3e:	27e080e7          	jalr	638(ra) # 2b8 <exit>

这里是解答部分:

Question 1

Which registers contain arguments to functions? For example, which register holds 13 in main's call to printf?

a0------a7. 13 存储在a2.

Question 2

Where is the call to function f in the assembly code for main? Where is the call to g? (Hint: the compiler may inline functions.)

编译器优化直接内联展开了。

Question 3

At what address is the function printf located?

0000000000000630 <printf>

Question 4

What value is in the register ra just after the jalr to printf in main?

返回的地址ra是main里头的对应行数的地址。

Question 5

Run the following code. What is the output? The output depends on that fact that the RISC-V is little-endian. If the RISC-V were instead big-endian what would you set i to in order to yield the same output? Would you need to change 57616 to a different value?

unsigned int i = 0x00646c72;
printf("H%x Wo%s", 57616, &i);

大端小端的问题。。。计组都搞过了,不回答😤😤😤

Question 6

In the following code, what is going to be printed after 'y='? (note: the answer is not a specific value.) Why does this happen?

printf("x=%d y=%d", 3);

未定义行为,当前的a2寄存器不知道是什么值,因此输出也是未知的。

Backtrace (moderate)

实现一个回溯的Backtrace。

还是很简单的一道题,用指针就行了,我还在想用什么riscv指令来访问物理内存。自己做这些题还是容易舍近求远。

本题目需要理解下图:

xv6 Stack

在每一层的堆栈中,return address位于frame pointer的offset(-8)位置,二上一个frame pointer的在当前frame pointer offset(-16)位置,因此若要存取上一个堆栈信息,可以直接用指针的加减来获得。

首先在kernel/defs.h等地方加上必要的配置,例如函数声明等。

其次在kernel/riscv.h里增加以下函数声明:

static inline uint64
r_fp()
{
  uint64 x;
  asm volatile("mv %0, s0" : "=r" (x) );
  return x;
}

该函数可以获取当前帧指针s0的值。

kernel/printf.c里增加以下内容:

void backtrace(){
  printf("backtrace:\n");
  uint64 fp = r_fp();
  uint64 start = PGROUNDUP(fp), end = PGROUNDDOWN(fp);
  while(end<=fp&&fp<=start){
    printf("%p\n", *(uint64*)(fp-8));
    fp = *(uint64*)(fp-16);
  }
}

其中的start和end变量是为了防止fp的无限回溯超出界限。xv6会将每一个stack都分配一个page,因此用PGROUNDUP等宏即可获取上下限。

Alarm (hard)

按照他的步骤来就算简单。

本题要求增加两个系统调用:sigalarm和sigreturn。

除了增加系统调用所必要的修改外,其他修改如下:

由于跳转函数会破坏寄存器的值,因此需要在proc结构里保存寄存器的值。这里的实现是有些偷懒的,只根据alarmtest.asm里的所需值增加了所需要的寄存器:sp,pc,s0-s1, ra以及a0-a7,但实际上无从知道编译器会用哪些用户寄存器,因此在实际使用的时候应该将32个寄存器全部存储。

struct proc {
  //...
  uint64 ticks; //时间间隔
  uint64 func; //执行的函数指针
  uint64 count; //经过多长时间
  uint64 alarm_epc; //保存的pc寄存器
  uint64 in_handler; //是否有return

  // 保存的寄存器
  uint64 sp;
  uint64 s0;
  uint64 s1;
  uint64 ra;
  uint64 a0;
  uint64 a1;
  uint64 a2;
  uint64 a3;
  uint64 a4;
  uint64 a5;
  uint64 a6;
  uint64 a7;
};

kernel/proc.callocproc函数里也增加初始化内容:

static struct proc*
allocproc(void)
{

found:
    //.....
  p->count = 0;
  p->ticks = 0;
  p->func = 0;
  p->alarm_epc = 0;
  p->in_handler = 0;
  p->a0 = p->a1 = p->a2 = p->a3 = p->a4 = p->a5 = p->a6 = p->a7 =0;
  p->sp = p->ra = p->s0 = p->s1 = 0;
  return p;
}

两系统调用:

uint64
sys_sigalarm(void){
  int ticks;
  uint64 func;
  if(argint(0, &ticks) < 0 && argaddr(1, &func)<0){
    return -1;
  }
  struct proc *p = myproc();
  func = p->trapframe->a1;
  printf("%p\n", func);
  if(ticks == 0 && func == 0){
    p->ticks = p->func = p->count = 0;
    return 0;
  }

  p->ticks = ticks;
  p->func = func;
  p->count = 0;

  return 0;
}
uint64
sys_sigreturn(void){
  struct proc *p = myproc();
  if (p->in_handler){
    p->trapframe->epc  = p->alarm_epc;
    p->count = 0;
    p->in_handler = 0;

    p->trapframe->a0 = p->a0;
    p->trapframe->a1 = p->a1;
    p->trapframe->a2 = p->a2;
    p->trapframe->a3 = p->a3;
    p->trapframe->a4 = p->a4;
    p->trapframe->a5 = p->a5;
    p->trapframe->a6 = p->a6;
    p->trapframe->a7 = p->a7;

    p->trapframe->sp = p->sp;
    p->trapframe->s0 = p->s0;
    p->trapframe->s1 = p->s1;
    p->trapframe->ra = p->ra;
  }
  return 0;
}

kernel/trap.c:

void
usertrap(void)
{   
  // ...
  // give up the CPU if this is a timer interrupt.
  if(which_dev == 2){
    if (p->func||p->ticks){
      p->count++;
      if (p->count==p->ticks){
        p->in_handler = 1;
        p->alarm_epc = p->trapframe->epc;
        p->trapframe->epc = p->func;

        p->a0 = p->trapframe->a0;
        p->a1 = p->trapframe->a1;
        p->a2 = p->trapframe->a2;
        p->a3 = p->trapframe->a3;
        p->a4 = p->trapframe->a4;
        p->a5 = p->trapframe->a5;
        p->a6 = p->trapframe->a6;
        p->a7 = p->trapframe->a7;
        p->sp = p->trapframe->sp;
        p->s0 = p->trapframe->s0;
        p->s1 = p->trapframe->s1;
        p->ra = p->trapframe->ra;
      }
    }
    yield();
  }
  usertrapret();
}

Option

Optional Challenge是要在backtrace里打印出name而不是地址,这部分。。。网上找不到,我也不会。大概的思路就是像gdb一样从可执行文件里读symbol然后输出,可问题是怎么获得呢?不会捏。

Lab Lazy(2020)

2021课程把这个lab删了。可能是因为课堂上已经把lab的至少一半的内容给泄完了。

但其实剩下的也是重量级,谁能想得到把panic删了的。

课程

课程关于代码的解释都在lab里面,所以这里只稍微提一下原理。

lazy lab本身需要利用虚拟内存的Page faults来加载page,RV里,Page fault的原因存储在SCAUSE,虚拟内存中的地址存储在STVAL寄存器中:

RISC-V SCAUSE value

以及在TRAPFRAME中存储的用户程序寄存器值。

因此,我们可以根据以上信息来进行lazy page allocation,替代eager allocation以提高性能。

Eliminate allocation from sbrk() (easy)

只要改一下sys_sbrk就行。

sysproc.c:

uint64
sys_sbrk(void)
{
  int addr;
  int n;
  struct proc* p;
  if(argint(0, &n) < 0)
    return -1;
  p =  myproc();
  p->sz = p->sz+n;
  //if(growproc(n) < 0)
  //  return -1;
  return addr;
}

运行结果大致如下:

init: starting sh
$ echo hi
usertrap(): unexpected scause 0x000000000000000f pid=3
            sepc=0x0000000000001258 stval=0x0000000000004008
va=0x0000000000004000 pte=0x0000000000000000
panic: uvmunmap: not mapped

可以看到uvmunmap出现了panic,因为这里只改了heap的位置,没有分配真正的内存,因此也没有真正的页表,无法进行unmap操作。

Lazy allocation (moderate)

lazy allocation的思路是引发page fault(这里只处理load和save类型的page fault)时进行分配。由于物理内存和逻辑内存是全相联的,所以简单的分配给一页就行。

具体代码见下面整体任务。

Lazytests and Usertests (moderate)

trap.c:处理Page Fault的分配页表。

void
usertrap(void)
{
  //......
  if(r_scause() == 8){
    // system call

    if(p->killed)
      exit(-1);

    // sepc points to the ecall instruction,
    // but we want to return to the next instruction.
    p->trapframe->epc += 4;

    // an interrupt will change sstatus &c registers,
    // so don't enable until done with those registers.
    intr_on();

    syscall();
  } else if (r_scause() == 13 || r_scause() == 15){
    uint64 va = r_stval();
    //printf("page fault: %p\n", va);
    //printf("%p, %p\n", r_stval(), va);
    if(va >= p->sz||va<=p->trapframe->sp){
      p->killed = 1;
      //printf("usertrap(): error va %p pid=%d\n", va, p->pid);
    } else {
      uint64 ka = (uint64)kalloc();
      if (ka==0){
        p->killed = 1;
      } else {
        memset((void*)ka, 0, PGSIZE);
        va = PGROUNDDOWN(va);
        if(mappages(p->pagetable, va, PGSIZE, ka, PTE_W|PTE_U|PTE_R)!=0){
          kfree((void*)ka);
          p->killed = 1;
        }
      }      
    }
  } else if((which_dev = devintr()) != 0){
    // ok
  }
  //....
}

上述代码在load或save page fault进入,首先进行合法性检查,之后开始分配内存。若无内存,就kill内存,否则将内存置0,mappages将其映射到目标用户的页表中。

sysproc.c: 修改sbrk。

uint64
sys_sbrk(void)
{
  int addr;
  int n;
  struct proc* p;
  if(argint(0, &n) < 0)
    return -1;
  p =  myproc();
  addr = p->sz;
  p->sz = p->sz+n;
  if (n<0) {
    uvmdealloc(p->pagetable, p->sz-n, p->sz);
  }

  //if(growproc(n) < 0)
  //  return -1;
  return addr;
}

增加了负数的处理。

vm.c 处理fork,copyin(write)和copyout(read)

// 要用到proc结构,引入头文件。
#include "spinlock.h"
#include "proc.h"

//该部分的改动实际上就是注释掉两个panic,这谁想得到,或者敢这样做?
//哎,以后得多试了,这里卡了好久。
// Remove npages of mappings starting from va. va must be
// page-aligned. The mappings must exist.
// Optionally free the physical memory.
void
uvmunmap(pagetable_t pagetable, uint64 va, uint64 npages, int do_free)
{
  uint64 a;
  pte_t *pte;

  if((va % PGSIZE) != 0)
    panic("uvmunmap: not aligned");

  for(a = va; a < va + npages*PGSIZE; a += PGSIZE){
    if((pte = walk(pagetable, a, 0)) == 0){
      //panic("uvmunmap: walk");
      continue;
    }

    if((*pte & PTE_V) == 0)
      continue;
      //panic("uvmunmap: not mapped");
    if(PTE_FLAGS(*pte) == PTE_V)
      panic("uvmunmap: not a leaf");
    if(do_free){
      uint64 pa = PTE2PA(*pte);
      kfree((void*)pa);
    }
    *pte = 0;
  }
}

// 同上。fork用的。
// Given a parent process's page table, copy
// its memory into a child's page table.
// Copies both the page table and the
// physical memory.
// returns 0 on success, -1 on failure.
// frees any allocated pages on failure.
int
uvmcopy(pagetable_t old, pagetable_t new, uint64 sz)
{
  pte_t *pte;
  uint64 pa, i;
  uint flags;
  char *mem;

  for(i = 0; i < sz; i += PGSIZE){
    if((pte = walk(old, i, 0)) == 0)
      //panic("uvmcopy: pte should exist");
      continue;
    if((*pte & PTE_V) == 0)
      //panic("uvmcopy: page not present");
      continue;
    pa = PTE2PA(*pte);
    flags = PTE_FLAGS(*pte);
    if((mem = kalloc()) == 0)
      goto err;
    memmove(mem, (char*)pa, PGSIZE);
    if(mappages(new, i, PGSIZE, (uint64)mem, flags) != 0){
      kfree(mem);
      goto err;
    }
  }
  return 0;

 err:
  uvmunmap(new, 0, i / PGSIZE, 1);
  return -1;
}

// Copy from kernel to user.
// Copy len bytes from src to virtual address dstva in a given page table.
// Return 0 on success, -1 on error.
int
copyout(pagetable_t pagetable, uint64 dstva, char *src, uint64 len)
{
  uint64 n, va0, pa0;
  struct proc* p = myproc();
  while(len > 0){
    va0 = PGROUNDDOWN(dstva);
    pa0 = walkaddr(pagetable, va0);

    if(pa0 == 0) {
      if(va0 >= p->sz||va0<=p->trapframe->sp){
        return -1;
        //printf("usertrap(): error va %p pid=%d\n", va, p->pid);
      } else {
        pa0 = (uint64)kalloc();
        if (pa0==0){
          p->killed = 1;
          return -1;
        } else {
          memset((void*)pa0, 0, PGSIZE);
          va0 = PGROUNDDOWN(va0);
          if(mappages(p->pagetable, va0, PGSIZE, pa0, PTE_W|PTE_U|PTE_R)!=0){
            kfree((void*)pa0);
            p->killed = 1;
            return -1;
          }
        }      
      }
    }
    n = PGSIZE - (dstva - va0);
    if(n > len)
      n = len;
    memmove((void *)(pa0 + (dstva - va0)), src, n);

    len -= n;
    src += n;
    dstva = va0 + PGSIZE;
  }
  return 0;
}

// Copy from user to kernel.
// Copy len bytes to dst from virtual address srcva in a given page table.
// Return 0 on success, -1 on error.
int
copyin(pagetable_t pagetable, char *dst, uint64 srcva, uint64 len)
{
  uint64 n, va0, pa0;
  struct proc* p = myproc();
  while(len > 0){
    va0 = PGROUNDDOWN(srcva);
    pa0 = walkaddr(pagetable, va0);

    if(pa0 == 0) {
      if(va0 >= p->sz||va0<=p->trapframe->sp){
        return -1;
        //printf("usertrap(): error va %p pid=%d\n", va, p->pid);
      } else {
        pa0 = (uint64)kalloc();
        if (pa0==0){
          p->killed = 1;
          return -1;
        } else {
          memset((void*)pa0, 0, PGSIZE);
          va0 = PGROUNDDOWN(va0);
          if(mappages(p->pagetable, va0, PGSIZE, pa0, PTE_W|PTE_U|PTE_R)!=0){
            kfree((void*)pa0);
            p->killed = 1;
            return -1;
          }
        }      
      }
    }
    n = PGSIZE - (srcva - va0);
    if(n > len)
      n = len;
    memmove(dst, (void *)(pa0 + (srcva - va0)), n);

    len -= n;
    dst += n;
    srcva = va0 + PGSIZE;
  }
  return 0;
}

哎,自己好菜。

Lab Copy-On-Write(2021)

课程

其实课程内容很少。

Xv6以及其他类POSIX 系统,若要使用fork-exec方式来生成新的进程,fork便会带来不必要的复制:fork的意义是为exec带来新的pid以及进程资源,原进程的复制只会带来不必要的IO。

于是,大部分操作系统都实现了Copy-On-Write(COW)fork,按需进行复制。

具体过程

可以将fork后的父子进程共享同一份物理内存(page),这样,若不发生写入,则只需要修改一下子进程的pagetable即可。而为了满足这一点,需要将PTE标志位设为只读,且明确标明这是COW的结果(可在RSW位中设置),方便和出错情况区分。而当需要写入的时候,按需对写入页进行复制。

写时复制带来了性能的提升,但相对也带来了维护的复杂性:什么时候释放页呢?这里就需要引入引用计数,而引用计数的相关知识。。略。感觉都会罢。

Implement copy-on write (hard)

该部分确实和Lazy重合度大,重点在PTE标志位的设定,以及kalloc.c中引用数据结构的维护。

不用最开始就考虑复杂情况。由于xv6本身较为简单,因此标志位不会设定的太复杂,不需要考虑R和W位的组合问题,这里钻了牛角尖了。

riscv.h:

#define PTE_COW (1L << 8) // 1 -> Copy-On-Write PAGE

kalloc.c:

#include "types.h"
#include "param.h"
#include "memlayout.h"
#include "spinlock.h"
#include "riscv.h"
#include "defs.h"

#define PA2CNT_INDEX(pa) (((uint64)pa)>>12)

void freerange(void *pa_start, void *pa_end);

struct cnt {
  struct spinlock lock;
  uint cnt[PHYSTOP>>PGSHIFT];
} kcnt;



extern char end[]; // first address after kernel.
                   // defined by kernel.ld.

struct run {
  struct run *next;
};

struct {
  struct spinlock lock;
  struct run *freelist;
} kmem;

void
kinit()
{
  initlock(&kmem.lock, "kmem");
  initlock(&kcnt.lock, "kcnt");
  freerange(end, (void*)PHYSTOP);
}

void
freerange(void *pa_start, void *pa_end)
{
  char *p;
  p = (char*)PGROUNDUP((uint64)pa_start);
  for(; p + PGSIZE <= (char*)pa_end; p += PGSIZE){
    kcnt.cnt[PA2CNT_INDEX(p)] = 0;
    kfree(p);
  }
}

// Free the page of physical memory pointed at by v,
// which normally should have been returned by a
// call to kalloc().  (The exception is when
// initializing the allocator; see kinit above.)
void
kfree(void *pa)
{
  struct run *r;

  if(((uint64)pa % PGSIZE) != 0 || (char*)pa < end || (uint64)pa >= PHYSTOP)
    panic("kfree");

  acquire(&kcnt.lock);
  if(kcnt.cnt[PA2CNT_INDEX(pa)] != 0){
    --kcnt.cnt[PA2CNT_INDEX(pa)];
  }
  release(&kcnt.lock);
  if (kcnt.cnt[PA2CNT_INDEX(pa)] != 0){
    return;
  }

  // Fill with junk to catch dangling refs.
  memset(pa, 1, PGSIZE);

  r = (struct run*)pa;

  acquire(&kmem.lock);
  r->next = kmem.freelist;
  kmem.freelist = r;
  release(&kmem.lock);

}

// Allocate one 4096-byte page of physical memory.
// Returns a pointer that the kernel can use.
// Returns 0 if the memory cannot be allocated.
void *
kalloc(void)
{
  struct run *r;

  acquire(&kmem.lock);
  r = kmem.freelist;
  if(r){
    kmem.freelist = r->next;
    acquire(&kcnt.lock);
    kcnt.cnt[PA2CNT_INDEX(r)] = 1;
    release(&kcnt.lock);
  }

  release(&kmem.lock);

  if(r){
    memset((char*)r, 5, PGSIZE); // fill with junk
  }
  return (void*)r;
}

void kincrease(uint64 pa){
  acquire(&kcnt.lock);
  kcnt.cnt[PA2CNT_INDEX(pa)] += 1;
  release(&kcnt.lock);
}

vm.c:

int
uvmcopy(pagetable_t old, pagetable_t new, uint64 sz)
{
  pte_t *pte;
  uint64 pa, i;
  uint flags;
  //char *mem;

  for(i = 0; i < sz; i += PGSIZE){
    if((pte = walk(old, i, 0)) == 0)
      panic("uvmcopy: pte should exist");
    if((*pte & PTE_V) == 0)
      panic("uvmcopy: page not present");
    pa = PTE2PA(*pte);    
    kincrease(pa);
    
    *pte |= PTE_COW;
    *pte &= ~PTE_W;
    flags = PTE_FLAGS(*pte);
    //if((mem = kalloc()) == 0)
    //  goto err;
    //memmove(mem, (char*)pa, PGSIZE);
    if(mappages(new, i, PGSIZE, (uint64)pa, flags) != 0){
      // 若无法分配一个page。。。感觉要改mappage,这里头有个remap。
      //kfree((void*)pa);
      goto err;
    }
  }
  return 0;
  //...
}


int
copyout(pagetable_t pagetable, uint64 dstva, char *src, uint64 len)
{
  uint64 n, va0, pa0;
  pte_t* pte0;
  while(len > 0){
    va0 = PGROUNDDOWN(dstva);
    pa0 = walkaddr(pagetable, va0);
    if(pa0 == 0)
      return -1;

    pte0 = walk(pagetable, va0, 0);
    if((*pte0 & PTE_W) == 0){
      if(cow(pagetable, va0)<0){
        return -1;
      }
    }
    pa0 = PTE2PA(*pte0);
    //.....
  }
}

int cow(pagetable_t pagetable, uint64 va)
{
  if(va>=MAXVA){
    return -1;
  }
  pte_t* pte;
  if((pte = walk(pagetable, va, 0)) == 0){
    panic("cow(): cow walk error.");
    return -1;
  }
  if((*pte & PTE_V) == 0){
    return -1;
  }
  if((*pte & PTE_COW) == 0){
    return -1;
  }
  if((*pte & PTE_U) == 0){
    return -1;
  }

  uint64 ka = (uint64)kalloc(), pa = PTE2PA(*pte);
  uint flags = PTE_FLAGS(*pte);
  if (ka==0){
    return -1;
  }

  memmove((void*)ka, (char*)pa, PGSIZE);
  flags &= ~PTE_COW;
  flags |= PTE_W;
  *pte = PA2PTE(ka) | flags;
  kfree((void*)pa);
  return 0;
}

trap.c:

void
usertrap(void)
{
  //...
  if(r_scause() == 8){
    // system call
    // ...
    syscall();
  } else if (r_scause() == 15){
    uint64 va = r_stval();
    if(cow(p->pagetable, va)<0){
      p->killed = 1;
    }
  } //...
}

再修改一下defs.h即可。


Author: ZzzRemake
Reprint policy: All articles in this blog are used except for special statements CC BY 4.0 reprint policy. If reproduced, please indicate source ZzzRemake !
Comment
  TOC