走~走走~走走走走​🚶‍♂️🚶‍♂️🚶‍♂️

MIT 6.S081 Lab7-9 Thread & Network Driver & File System


Lab Thread

哈哈,贼简单,但怎么有人记错了栈的增长方向的,消息了。

课程

课程内容其实就帮你把Lab做一遍了。

后面有机会了补罢。

Uthread: switching between threads (moderate)

首先建个上下文context结构体,用结构体存必要的callee寄存器以及ra,sp, 用汇编进行switch即可。

uthread.c:

struct ucontext
{
  uint64 ra;
  uint64 sp;

  // callee-saved
  uint64 s0;
  uint64 s1;
  uint64 s2;
  uint64 s3;
  uint64 s4;
  uint64 s5;
  uint64 s6;
  uint64 s7;
  uint64 s8;
  uint64 s9;
  uint64 s10;
  uint64 s11;
};

struct thread {
  char       stack[STACK_SIZE]; /* the thread's stack */
  int        state;             /* FREE, RUNNING, RUNNABLE */
  struct ucontext context;
};
struct thread all_thread[MAX_THREAD];
struct thread *current_thread;

void 
thread_schedule(void)
{   
    //....
  if (current_thread != next_thread) {         /* switch threads?  */
    next_thread->state = RUNNING;
    t = current_thread;
    current_thread = next_thread;
    /* YOUR CODE HERE
     * Invoke thread_switch to switch from t to next_thread:
     * thread_switch(??, ??);
     */
    thread_switch((uint64)(&t->context), (uint64)(&next_thread->context));
  } else
    next_thread = 0;
}

void 
thread_create(void (*func)())
{
  struct thread *t;

  for (t = all_thread; t < all_thread + MAX_THREAD; t++) {
    if (t->state == FREE) break;
  }
  t->state = RUNNABLE;
  // YOUR CODE HERE
  memset(&t->context, 0, sizeof(t->context));
  t->context.ra = (uint64)func;
  t->context.sp = (uint64)(t->stack+STACK_SIZE-1);
  // 坑: sp是反向增长。。。。令人感叹。
}

而汇编的内容。。。其实直接可以把kernel/swtch.S抄过来。

Using threads (moderate)

pthread的简单应用。但这里有一个优化是可以单独说的。

原题要求将一个单线程安全的哈希表改成多线程安全,若每个桶都用相同的锁,那么多线程将退化为串行执行,不能起到并行加速的效果;但若对每个桶单独加个锁,便可以做到并行加速。

因此,这部分有ph_safeph_fast两个测试,分别测试正确性和优化后的并行加速。

ph.c:

pthread_mutex_t entry_mutex[NBUCKET];

static 
void put(int key, int value)
{
  int i = key % NBUCKET;

  // is the key already present?
  struct entry *e = 0;
  pthread_mutex_lock(&entry_mutex[i]);
  for (e = table[i]; e != 0; e = e->next) {
    if (e->key == key)
      break;
  }
  if(e){
    // update the existing key.
    e->value = value;
  } else {
    // the new is new.
    insert(key, value, &table[i], table[i]);
  }
  pthread_mutex_unlock(&entry_mutex[i]);
}

再加个锁的初始化即可。

Barrier(moderate)

pthread中的信号量简单使用。

Barrier的作用是指定所有线程执行完后才能进行下一步,相当于一个整合的作用。

barrier.c:

static void 
barrier()
{
  // YOUR CODE HERE
  //
  // Block until all threads have called barrier() and
  // then increment bstate.round.
  //
  pthread_mutex_lock(&bstate.barrier_mutex);
  bstate.nthread++;
  if(bstate.nthread == nthread){
    pthread_mutex_unlock(&bstate.barrier_mutex);
    bstate.nthread = 0;
    bstate.round++;
    pthread_cond_broadcast(&bstate.barrier_cond);
  } else {
    pthread_cond_wait(&bstate.barrier_cond, &bstate.barrier_mutex);
  } 
}

Lab Network Driver

课程

课程主要讲了UART设备和console如何联系、如何读写。该部分调用关系的复杂导致理解上会出现困难性,下面是简略部分:

可能的寄存器:

  1. SIE(各类中断)
  2. SSTATUS(控制中断)
  3. SIP 查看中断类型
  4. SCAUSE 说明当前状态原因位中断。
  5. STVEC usertrap等返回位置

其中,STVEC和SCAUSE已经在trap里知道用处了,而其他寄存器在设备驱动中起作用。

如何让UART设备与xv6正常协同工作:

start(m mode)设置supervisor mode 中断,以及定时器初始化
-> console初始化,配置UART芯片
-> PLIC 初始化
-> SCHEDULER CPU接收中断。

读写的过程可以分为top(系统调用到buffer)和bottom(buffer到下层寄存器读写代码),这两部分代码组合便是驱动的代码。

write(console)

Top:shell-> putc-> write->filewrite->识别FD_DEVICE,consolewrite->获得char,uartputc->若满则sleep,否则写入buffer,调uartstart函数->写入THR

Bottom:送到后,收到中断-> PLIC-> devintr->plic_clai声明某个CPU获得中断(中断号)->uartintr-> consoleintr(由于接受寄存器为0,这里跳过)->uartstart,

read(console)

Top:shell-> read->fileread->consoleread(buffer),若空则sleep,否则读取键盘写入的buffer。

Bottom:键盘读入:中断-> plic, cpu, devintr(同上)->uartgetc->consoleintr->consputc.

Code(Hard)

lab的提示疑似有些太详尽了,这不是直接对着写完就行吗

lab只需要查部分文档即可写完,且用到的也就3个寄存器和4个标志位而已。
之后去查了一下6.828(6.S081的前身),发现量是真的多,相当于从头开始这个lab(虽然网络协议栈给了,但提示几乎没给,只能按照逻辑顺序重新实现e1000驱动。

int
e1000_transmit(struct mbuf *m)
{
  //
  // Your code here.
  //
  // the mbuf contains an ethernet frame; program it into
  // the TX descriptor ring so that the e1000 sends it. Stash
  // a pointer so that it can be freed after sending.
  //

  // transmit是单个frame的传送,因此多进程的时候需要acquire。
  // 实际上transmit的加锁与否不会影响太多,可能这就是锁罢。但recv的锁似乎无法加,具体得看一下net.c里怎么实现的。
  acquire(&e1000_lock);

  int index = regs[E1000_TDT];
  struct tx_desc* current_tx = tx_ring + index;

  // check if the ring is overflowing
  if((current_tx->status & E1000_TXD_STAT_DD) == 0){
    return -1;
  }
  // free the last mbuf that was transmitted from the descriptor
  if(tx_mbufs[index]){
    mbuffree(tx_mbufs[index]);
    tx_mbufs[index] = 0;
  }
  // fill in the descriptor
  current_tx->addr = (uint64)m->head;
  current_tx->length = m->len;

  current_tx->cmd = E1000_TXD_CMD_RS | E1000_TXD_CMD_EOP;
  // update E1000_TDT to wake up DMA.
  regs[E1000_TDT] = (regs[E1000_TDT] + 1) % TX_RING_SIZE;
  tx_mbufs[index] = m;

  release(&e1000_lock);

  return 0;
}

static void
e1000_recv(void)
{
  //
  // Your code here.
  //
  // Check for packets that have arrived from the e1000
  // Create and deliver an mbuf for each packet (using net_rx()).
  //
  
  while(1){
    // 同时刻多个进程进行read,因此需要while。
    // ask the ring index
    int index = (regs[E1000_RDT] + 1) % RX_RING_SIZE;
    struct rx_desc *current_rx = rx_ring + index;
    // check if available
    if((current_rx->status & E1000_RXD_STAT_DD) == 0){
      return;
    }
    // update mbuf len
    struct mbuf* current_mbuf = rx_mbufs[index];
    current_mbuf->len = current_rx->length;
    net_rx(current_mbuf);

    // new mbuf
    struct mbuf* next_mbuf = mbufalloc(MBUF_DEFAULT_HEADROOM);
    rx_mbufs[index] = next_mbuf;
    current_rx->addr = (uint64)next_mbuf->head;
    current_rx->status = 0;
    
    // update E1000_RDT
    regs[E1000_RDT] = (regs[E1000_RDT] + 1) % RX_RING_SIZE;
  }
}

去看了下optional challenge,很有趣,但我没时间写了呜呜呜。

Lab File System

课程

这部分还是有些小晕,主要是锁的部分,有的地方不用加有的地方要加。后面应该要整理一份梳理一下。

buffer

这部分主要为bread,bwrite和brelse。

bread:调用的bget其中bcache.lock 保证了bcache和得到的buffer初始化的原子性,从而让bget可以在bcache.lock release后再获取b->lock.
而bwrite, brelse反而无关紧要,bwrite必须写一个带锁的block,而brelse将锁去除并将block放回bcache。

logging

主要流程为:

begin_op();
//...
bp = bread();
bp->data[...] = ...;
log_write(bp);
//...
end_op();

begin_op完成对log这一全局变量的commit状态检查以及预留空间(log写入的空间不能超过LOGSIZE)的检查。
log_write 则在完成必要的检查后,pin了一个block(防止其被evict)并在上面进行多次写。多次写相比于一次写立刻提交有了性能提升。
end_op()完成写入log、commit log(通过写入head),install_trans(将log写入文件系统),清空log(log.lh.n = 0),最后再写入head来擦除transaction。

若commit后发生崩溃,则recover_from_log会在系统启动中由fsinit-initlog调用,写入log。

Large files (moderate)

实现双重间接块。

改下定义:

//fs.h
#define NDIRECT 11
#define NINDIRECT_DOUBLE (NINDIRECT * NINDIRECT)
#define MAXFILE (NDIRECT + NINDIRECT + NINDIRECT_DOUBLE)

struct dinode {
  short type;           // File type
  short major;          // Major device number (T_DEVICE only)
  short minor;          // Minor device number (T_DEVICE only)
  short nlink;          // Number of links to inode in file system
  uint size;            // Size of file (bytes)
  uint addrs[NDIRECT+1+1];   // Data block addresses
};

//file.h
struct inode {
  uint dev;           // Device number
  uint inum;          // Inode number
  int ref;            // Reference count
  struct sleeplock lock; // protects everything below here
  int valid;          // inode has been read from disk?

  short type;         // copy of disk inode
  short major;
  short minor;
  short nlink;
  uint size;
  uint addrs[NDIRECT+1+1];
};

bmap.c:

static uint
bmap(struct inode *ip, uint bn)
{
  uint addr, *a;
  struct buf *bp;
  if(bn < NDIRECT){
    if((addr = ip->addrs[bn]) == 0)
      ip->addrs[bn] = addr = balloc(ip->dev);
    return addr;
  }
  bn -= NDIRECT;

  if(bn < NINDIRECT){
    // Load indirect block, allocating if necessary.
    if((addr = ip->addrs[NDIRECT]) == 0)
      ip->addrs[NDIRECT] = addr = balloc(ip->dev);
    bp = bread(ip->dev, addr);
    a = (uint*)bp->data;
    if((addr = a[bn]) == 0){
      a[bn] = addr = balloc(ip->dev);
      log_write(bp);
    }
    brelse(bp);
    return addr;
  }
  // begin
  bn -= NINDIRECT;

  int first_index = bn/NINDIRECT, second_index = bn%NINDIRECT;
  if(bn < NINDIRECT_DOUBLE){
    if((addr = ip->addrs[NDIRECT+1]) == 0)
      ip->addrs[NDIRECT+1] = addr = balloc(ip->dev);
    bp = bread(ip->dev, addr);
    a = (uint*)bp->data;
    if((addr = a[first_index]) == 0){
      a[first_index] = addr = balloc(ip->dev);
      log_write(bp);
    }
    brelse(bp);
    // second : to data
    bp = bread(ip->dev, addr);
    a = (uint*)bp->data;
    if((addr = a[second_index]) == 0){
      a[second_index] = addr = balloc(ip->dev);
      log_write(bp);
    }
    brelse(bp);
    return addr;
  }
  // end
  panic("bmap: out of range");
}

void
itrunc(struct inode *ip)
{
  int i, j;
  struct buf *bp;
  uint *a;

  for(i = 0; i < NDIRECT; i++){
    if(ip->addrs[i]){
      bfree(ip->dev, ip->addrs[i]);
      ip->addrs[i] = 0;
    }
  }

  if(ip->addrs[NDIRECT]){
    bp = bread(ip->dev, ip->addrs[NDIRECT]);
    a = (uint*)bp->data;
    for(j = 0; j < NINDIRECT; j++){
      if(a[j])
        bfree(ip->dev, a[j]);
    }
    brelse(bp);
    bfree(ip->dev, ip->addrs[NDIRECT]);
    ip->addrs[NDIRECT] = 0;
  }


  // begin
  struct buf *bp_index;
  uint *a_index;

  if(ip->addrs[NDIRECT+1]){
    bp = bread(ip->dev, ip->addrs[NDIRECT+1]);
    a = (uint*)bp->data;
    for(i = 0; i<NINDIRECT;i++){
      if(a[i]){
        bp_index = bread(ip->dev, a[i]);
        a_index = (uint*)bp_index->data;
        for(j = 0; j<NINDIRECT;j++){
          if(a_index[j])
            bfree(ip->dev, a_index[j]);
        }
        brelse(bp_index);
        bfree(ip->dev, a[i]);
        a[i] = 0;
      }
    }
    brelse(bp);
    bfree(ip->dev, ip->addrs[NINDIRECT+1]);
    ip->addrs[NDIRECT+1] = 0;
  }
  // end
  ip->size = 0;
  iupdate(ip);
}

增加系统调用的部分略。

定义:

//stat.h
#define T_SYMLINK 4   // Symlink
//fcntl.h
#define O_NOFOLLOW 0x100

sysfile.c:

uint64
sys_open(void)
{
  char path[MAXPATH];
  int fd, omode;
  struct file *f;
  struct inode *ip;
  int n;

  if((n = argstr(0, path, MAXPATH)) < 0 || argint(1, &omode) < 0)
    return -1;

  begin_op();

  if(omode & O_CREATE){
    ip = create(path, T_FILE, 0, 0);
    if(ip == 0){
      end_op();
      return -1;
    }
  } else {
    int cnt = 0;
    for(cnt = 0;cnt<10;++cnt){
      //printf("cnt: %d\n", cnt);
      if((ip = namei(path)) == 0){
        end_op();
        return -1;
      }
      ilock(ip);
      if(ip->type == T_SYMLINK && ((omode & O_NOFOLLOW) == 0)){
        if(readi(ip, 0, (uint64)path, 0, MAXPATH) != MAXPATH){
          iunlockput(ip);
          end_op();
          return -1;
        }
        iunlockput(ip);
      } else {
        break;
      }
    }
    if(cnt==10){
      iunlockput(ip);
      end_op();
      return -1;
    }

    if(ip->type == T_DIR && omode != O_RDONLY){
      iunlockput(ip);
      end_op();
      return -1;
    }
  }
  //...

}

uint64
sys_symlink(void){
  char target[MAXPATH], path[MAXPATH];
  if(argstr(0, target, MAXPATH) < 0 || argstr(1, path, MAXPATH) < 0){
    return -1;
  }

  int len = strlen(target);

  struct inode* ip;
  begin_op();
  if((ip = create(path, T_SYMLINK, 0, 0)) == 0){
    end_op();
    return -1;
  }
  //create 就是加锁的ip,这里铸币了
  //ilock(ip);
  if(writei(ip, 0, (uint64)target, 0, len) != len){
    end_op();
    return -1;
  }
  iunlockput(ip);
  end_op();
  return 0;
}

几个错误点:

  1. 文档里的api不记得了,甚至还想自己实现一遍writei/readi,幸好悬崖勒马了。
  2. 另外对open的使用也不太熟悉,导致我还以为要在symlink的系统调用里头用O_TAG,我寻思也没有file层的调用啊。
  3. 最后就是ilock…这部分到现在还是不太了解,等之后细细梳理了。不过在这里和lock的原理无关,单纯就是自己会忘记加iunlockput而已。

碎碎念

一拖再拖到最后剩一个mmap lab没完成,只能期待后面有时间写了。接下来没有lab了,课程还会继续看,但博客不再更新了,令人感叹。

写lab确实是有用的,满足了我写代码的需求,并且我也确实熟悉了关于操作系统的知识。但到后面就会发现,我这种三天打鱼两天晒网式学习严重影响了学6.S081 的过程,这也是我没时间写mmap lab的原因。

希望自己以后学这些确实能学到知识的时候,不会再去用这种态度去糟蹋知识吧。


Author: ZzzRemake
Reprint policy: All articles in this blog are used except for special statements CC BY 4.0 reprint policy. If reproduced, please indicate source ZzzRemake !
Comment
  TOC