6.S081:COW-fork


COW-fork

Implement copy-on-write fork(hard)

Your task is to implement copy-on-write fork in the xv6 kernel. You are done if your modified kernel executes both the cowtest and 'usertests -q' programs successfully.

To help you test your implementation, we've provided an xv6 program called cowtest (source in user/cowtest.c). cowtest runs various tests, but even the first will fail on unmodified xv6. Thus, initially, you will see:

$ cowtest
simple: fork() failed
$ 

The "simple" test allocates more than half of available physical memory, and then fork()s. The fork fails because there is not enough free physical memory to give the child a complete copy of the parent's memory.

When you are done, your kernel should pass all the tests in both cowtest and usertests -q. That is:

$ cowtest
simple: ok
simple: ok
three: zombie!
ok
three: zombie!
ok
three: zombie!
ok
file: ok
ALL COW TESTS PASSED
$ usertests -q
...
ALL TESTS PASSED
$

Here's a reasonable plan of attack.

  1. Modify uvmcopy() to map the parent's physical pages into the child, instead of allocating new pages. Clear PTE_W in the PTEs of both child and parent for pages that have PTE_W set.
  2. Modify usertrap() to recognize page faults. When a write page-fault occurs on a COW page that was originally writeable, allocate a new page with kalloc(), copy the old page to the new page, and install the new page in the PTE with PTE_W set. Pages that were originally read-only (not mapped PTE_W, like pages in the text segment) should remain read-only and shared between parent and child; a process that tries to write such a page should be killed.
  3. Ensure that each physical page is freed when the last PTE reference to it goes away -- but not before. A good way to do this is to keep, for each physical page, a "reference count" of the number of user page tables that refer to that page. Set a page's reference count to one when kalloc() allocates it. Increment a page's reference count when fork causes a child to share the page, and decrement a page's count each time any process drops the page from its page table. kfree() should only place a page back on the free list if its reference count is zero. It's OK to to keep these counts in a fixed-size array of integers. You'll have to work out a scheme for how to index the array and how to choose its size. For example, you could index the array with the page's physical address divided by 4096, and give the array a number of elements equal to highest physical address of any page placed on the free list by kinit() in kalloc.c. Feel free to modify kalloc.c (e.g., kalloc() and kfree()) to maintain the reference counts.
  4. Modify copyout() to use the same scheme as page faults when it encounters a COW page.

Some hints:

  • It may be useful to have a way to record, for each PTE, whether it is a COW mapping. You can use the RSW (reserved for software) bits in the RISC-V PTE for this.
  • usertests -q explores scenarios that cowtest does not test, so don't forget to check that all tests pass for both.
  • Some helpful macros and definitions for page table flags are at the end of kernel/riscv.h.
  • If a COW page fault occurs and there's no free memory, the process should be killed.

Solution

设置一个全局数组来记录物理页的引用数量,注意访问时需要上锁

#define IDX(p) ((uint64)p) / PGSIZE

struct {
  struct spinlock lock;
  uint cnt[IDX(PHYSTOP)];
} ref;

定义对ref对象的操作

void ref_lock(){
  acquire(&ref.lock);
}

void ref_unlock(){
  release(&ref.lock);
}

uint increase_cnt(uint64 pa){
  return ++ref.cnt[IDX(pa)];
}

uint decrease_cnt(uint64 pa){
  return --ref.cnt[IDX(pa)];
}

设置PTE_C表明是一个cow page

#define PTE_C (1L << 8) // cow bit

修改uvmcopy(),不分配物理页,而是映射同一块物理页,同时物理页的引用数加1,这里有一个细节,我们只对有PTE_W置位的页面标记其为PTE_C页面,而对于不可写的页面只做映射

int
uvmcopy(pagetable_t old, pagetable_t new, uint64 sz)
{
  pte_t *pte;
  uint64 pa, i;
  uint flags;

  for(i = 0; i < sz; i += PGSIZE){
    if((pte = walk(old, i, 0)) == 0)
      panic("uvmcopy: pte should exist");
    if((*pte & PTE_V) == 0)
      panic("uvmcopy: page not present");
    pa = PTE2PA(*pte);
    flags = PTE_FLAGS(*pte);
    // if PTE_W then clear PTE_W and set PTE_C
    if(*pte & PTE_W){
      flags = (PTE_FLAGS(*pte) | PTE_C) & ~PTE_W;
      *pte = PA2PTE(pa) | flags;
    }
    if(mappages(new, i, PGSIZE, pa, flags) != 0){
      goto err;
    }
    // increase cnt
    ref_lock();
    increase_cnt(pa);
    ref_unlock();
  }
  return 0;

 err:
  uvmunmap(new, 0, i / PGSIZE, 1);
  return -1;
}

当出现写操作时,会陷入usertrap()中,定义一个处理cow的函数,作用是新分配一块物理页,恢复PTE_W,清除PTE_C,更新PTE,其中开头有一个判断,va >= MAXVA为了防止越界,PGROUNDDOWN(va) == 0防止写入0地址,最后需要free掉原始物理页

// cow handlerl
int cow_handler(pagetable_t pagetable, uint64 va){
  if(va >= MAXVA || PGROUNDDOWN(va) == 0) return -1;
  pte_t *pte = walk(pagetable, va, 0);
  if(pte == 0) return -1;
  if((*pte & PTE_V) == 0) return -1;
  uint64 pa = PTE2PA(*pte);
  if(pa == 0) return -1;
  if(*pte & PTE_C){
    uint64 ka = (uint64)kalloc();
    if(ka == 0) return -1;
    uint64 flags = (PTE_FLAGS(*pte) | PTE_W) & ~PTE_C;
    memmove((void *)ka, (void *)pa, PGSIZE);
    kfree((void *)pa);
    *pte = PA2PTE(ka) | flags;
    return 0;
  }
  return 0;
}

uesrtrap()新增判断,需要判断va是否在guard_page区域,p->killed = 1置位后,在usertrap()后半部分有判断是否killed的语句

else if(r_scause() == 13 || r_scause() == 15) {
    uint64 va = r_stval();
    // printf("page falut %p\n", va);
    if((va <= PGROUNDDOWN(p->trapframe->sp) && va >= PGROUNDDOWN(p->trapframe->sp) - PGSIZE) || (cow_handler(p->pagetable, va) == -1)) p->killed = 1;
  }

还需要修改原本的kalloc()和kfree()

  • kalloc将物理页的引用置1
  • kfree时需要判断引用若为0,则释放页面,否则引用减1
void
kfree(void *pa)
{
  struct run *r;

  if(((uint64)pa % PGSIZE) != 0 || (char*)pa < end || (uint64)pa >= PHYSTOP)
    panic("kfree");

  acquire(&ref.lock);
  if(decrease_cnt((uint64)pa) > 0){
    release(&ref.lock);
    return ;
  }
  release(&ref.lock);

  // Fill with junk to catch dangling refs.
  memset(pa, 1, PGSIZE);

  r = (struct run*)pa;

  acquire(&kmem.lock);
  r->next = kmem.freelist;
  kmem.freelist = r;
  release(&kmem.lock);
}

void *
kalloc(void)
{
  struct run *r;

  acquire(&kmem.lock);
  r = kmem.freelist;
  if(r)
    kmem.freelist = r->next;
  release(&kmem.lock);

  if(r){
    memset((char*)r, 5, PGSIZE); // fill with junk
    acquire(&ref.lock);
    ref.cnt[IDX(r)] = 1;
    release(&ref.lock);    
  }
  return (void*)r;
}

值得注意的是,在xv6初始化后,在内核态会调用freerange()函数,该函数会把物理页都free一遍,故在开始时,让全部物理页的引用数都初始化为1,并在freerange前调用,别忘了初始化锁

void init_cnt(){
  for(int i = 0; i < IDX(PHYSTOP); i++)
    ref.cnt[i] = 1;
}

void
kinit()
{
  initlock(&kmem.lock, "kmem");
  initlock(&ref.lock, "ref");
  init_cnt();
  freerange(end, (void*)PHYSTOP);
}

因为涉及写操作,所以还需要修改copyout函数,类似于usertrap(),调用cow_handler即可

int
copyout(pagetable_t pagetable, uint64 dstva, char *src, uint64 len)
{
  uint64 n, va0, pa0;

  while(len > 0){
    va0 = PGROUNDDOWN(dstva);

    if(cow_handler(pagetable, va0) == -1) return -1;

    pa0 = walkaddr(pagetable, va0);
    if(pa0 == 0)
      return -1;
    n = PGSIZE - (dstva - va0);
    if(n > len)
      n = len;
    memmove((void *)(pa0 + (dstva - va0)), src, n);

    len -= n;
    src += n;
    dstva = va0 + PGSIZE;
  }
  return 0;
}

Author: Paranoid
Reprint policy: All articles in this blog are used except for special statements CC BY 4.0 reprint policy. If reproduced, please indicate source Paranoid !
评论
  TOC