Asterinas 进程启动与切换
 进程启动
 进程创建:
 Rust
 pub fn spawn_user_process(
 executable_path: &str,
 argv: Vec,
 envp: Vec,
 ) -> Result<Arc> {
 // spawn user process should give an absolute path
 debug_assert!(executable_path.starts_with(‘/’));
 let process = Process::create_user_process(executable_path, argv, envp)?;
    open_ntty_as_controlling_terminal(&process)?;process.run();Ok(process)
}
Rust
 fn create_user_process(
 executable_path: &str,
 argv: Vec,
 envp: Vec,
 ) -> Result<Arc> {
 let process_builder = {
 let pid = allocate_tid();
 let parent = Weak::new();
        let credentials = Credentials::new_root();let mut builder = ProcessBuilder::new(pid, executable_path, parent);builder.argv(argv).envp(envp).credentials(credentials);builder};let process = process_builder.build()?;// Lock order: session table -> group table -> process table -> group of process// -> group inner -> session innerlet mut session_table_mut = process_table::session_table_mut();let mut group_table_mut = process_table::group_table_mut();let mut process_table_mut = process_table::process_table_mut();// Creates new grouplet group = ProcessGroup::new(process.clone());*process.process_group.lock() = Arc::downgrade(&group);group_table_mut.insert(group.pgid(), group.clone());// Creates new sessionlet session = Session::new(group.clone());group.inner.lock().session = Arc::downgrade(&session);session.inner.lock().leader = Some(process.clone());session_table_mut.insert(session.sid(), session);process_table_mut.insert(process.pid(), process.clone());Ok(process)
}
创建线程:
 Rust
 pub fn build(self) -> Result<Arc> {
 self.check_build()?;
 let Self {
 pid,
 executable_path,
 parent,
 main_thread_builder,
 argv,
 envp,
 process_vm,
 file_table,
 fs,
 umask,
 resource_limits,
 sig_dispositions,
 credentials,
 } = self;
    let process_vm = process_vm.or_else(|| Some(ProcessVm::alloc())).unwrap();let file_table = file_table.or_else(|| Some(Arc::new(Mutex::new(FileTable::new_with_stdio())))).unwrap();let fs = fs.or_else(|| Some(Arc::new(RwMutex::new(FsResolver::new())))).unwrap();let umask = umask.or_else(|| Some(Arc::new(RwLock::new(FileCreationMask::default())))).unwrap();let resource_limits = resource_limits.or_else(|| Some(ResourceLimits::default())).unwrap();let sig_dispositions = sig_dispositions.or_else(|| Some(Arc::new(Mutex::new(SigDispositions::new())))).unwrap();let process = {let threads = Vec::new();Arc::new(Process::new(pid,parent,threads,executable_path.to_string(),process_vm,file_table,fs,umask,sig_dispositions,resource_limits,))};let thread = if let Some(thread_builder) = main_thread_builder {let builder = thread_builder.process(Arc::downgrade(&process));builder.build()} else {Thread::new_posix_thread_from_executable(pid,credentials.unwrap(),process.vm(),&process.fs().read(),executable_path,Arc::downgrade(&process),argv.unwrap(),envp.unwrap(),)?};process.threads().lock().push(thread);process.set_runnable();Ok(process)
}
Rust
 impl PosixThreadExt for Thread {
 /// This function should only be called when launch shell()
 fn new_posix_thread_from_executable(
 tid: Tid,
 credentials: Credentials,
 process_vm: &ProcessVm,
 fs_resolver: &FsResolver,
 executable_path: &str,
 process: Weak,
 argv: Vec,
 envp: Vec,
 ) -> Result<Arc> {
 let elf_file = {
 let fs_path = FsPath::new(AT_FDCWD, executable_path)?;
 fs_resolver.lookup(&fs_path)?
 };
 let (_, elf_load_info) =
 load_program_to_vm(process_vm, elf_file, argv, envp, fs_resolver, 1)?;
    let vm_space = process_vm.root_vmar().vm_space().clone();let mut cpu_ctx = UserContext::default();cpu_ctx.set_rip(elf_load_info.entry_point() as _);cpu_ctx.set_rsp(elf_load_info.user_stack_top() as _);let user_space = Arc::new(UserSpace::new(vm_space, cpu_ctx));let thread_name = Some(ThreadName::new_from_executable_path(executable_path)?);let thread_builder = PosixThreadBuilder::new(tid, user_space, credentials).thread_name(thread_name).process(process);Ok(thread_builder.build())
}
从ELF文件中解析装载到内存中,并且找到ELF文件的入口地址,并且设置相应的栈:
 Rust
 cpu_ctx.set_rip(elf_load_info.entry_point() as _);
 cpu_ctx.set_rsp(elf_load_info.user_stack_top() as _);
 vm_space与cpu_ctx都被塞进user_space 中。
 Rust
 let user_space = Arc::new(UserSpace::new(vm_space, cpu_ctx));
 创建新线程:
 Rust
 pub fn build(self) -> Arc {
 let Self {
 tid,
 user_space,
 process,
 credentials,
 thread_name,
 set_child_tid,
 clear_child_tid,
 sig_mask,
 sig_queues,
 is_main_thread,
 } = self;
 let thread = Arc::new_cyclic(|thread_ref| {
 let task = create_new_user_task(user_space, thread_ref.clone());
 let status = ThreadStatus::Init;
 let posix_thread = PosixThread {
 process,
 is_main_thread,
 name: Mutex::new(thread_name),
 set_child_tid: Mutex::new(set_child_tid),
 clear_child_tid: Mutex::new(clear_child_tid),
 credentials,
 sig_mask: Mutex::new(sig_mask),
 sig_queues: Mutex::new(sig_queues),
 sig_context: Mutex::new(None),
 sig_stack: Mutex::new(None),
 robust_list: Mutex::new(None),
 };
        Thread::new(tid, task, posix_thread, status)});thread_table::add_thread(thread.clone());thread
}
这里面核心是创建task,task被丢到thread中:
 Rust
 let task = create_new_user_task(user_space, thread_ref.clone());
 let status = ThreadStatus::Init;
 let posix_thread = PosixThread {
 process,
 is_main_thread,
 name: Mutex::new(thread_name),
 set_child_tid: Mutex::new(set_child_tid),
 clear_child_tid: Mutex::new(clear_child_tid),
 credentials,
 sig_mask: Mutex::new(sig_mask),
 sig_queues: Mutex::new(sig_queues),
 sig_context: Mutex::new(None),
 sig_stack: Mutex::new(None),
 robust_list: Mutex::new(None),
 };
        Thread::new(tid, task, posix_thread, status)
create_new_user_task实现:
 Rust
 pub fn create_new_user_task(user_space: Arc, thread_ref: Weak) -> Arc {
 fn user_task_entry() {
 let cur = Task::current();
 let user_space = cur.user_space().expect(“user task should have user space”);
 let mut user_mode = UserMode::new(user_space);
 debug!(
 “[Task entry] rip = 0x{:x}”,
 user_mode.context().instruction_pointer()
 );
 debug!(
 “[Task entry] rsp = 0x{:x}”,
 user_mode.context().stack_pointer()
 );
 debug!(
 “[Task entry] rax = 0x{:x}”,
 user_mode.context().syscall_ret()
 );
 loop {
 let user_event: UserEvent = user_mode.execute();
 let context = user_mode.context_mut();
 // handle user event:
 handle_user_event(user_event, context);
 let current_thread = current_thread!();
 // should be do this comparison before handle signal?
 if current_thread.status().lock().is_exited() {
 break;
 }
 handle_pending_signal(context).unwrap();
 if current_thread.status().lock().is_exited() {
 debug!(“exit due to signal”);
 break;
 }
 // If current is suspended, wait for a signal to wake up self
 while current_thread.status().lock().is_stopped() {
 Thread::yield_now();
 debug!(“{} is suspended.”, current_thread.tid());
 handle_pending_signal(context).unwrap();
 }
 // a preemption point after handling user event.
 preempt();
 }
 debug!(“exit user loop”);
 // FIXME: This is a work around: exit in kernel task entry may be not called. Why this will happen?
 Task::current().exit();
 }
TaskOptions::new(user_task_entry).data(thread_ref).user_space(Some(user_space)).build().expect("spawn task failed")
}
 从user_space钟创建user_mode
 Rust
 let mut user_mode = UserMode::new(user_space);
 其实就是将user_space中的context转移给user_mode:
 Rust
 pub fn new(user_space: &'a Arc) -> Self {
 Self {
 current: Task::current(),
 user_space,
 context: user_space.init_ctx,
 }
 }
 user_task_entry作为当前task的用户入口。同时构建task的kernel侧入口kernel_task_entry,kernel_task_entry被设置到task的TaskContext中:
 Rust
 pub fn build(self) -> Result<Arc> {
 /// all task will entering this function
 /// this function is mean to executing the task_fn in Task
 fn kernel_task_entry() {
 let current_task = current_task()
 .expect(“no current task, it should have current task in kernel task entry”);
 current_task.func.call(());
 current_task.exit();
 }
 let result = Task {
 func: self.func.unwrap(),
 data: self.data.unwrap(),
 user_space: self.user_space,
 task_inner: Mutex::new(TaskInner {
 task_status: TaskStatus::Runnable,
 ctx: TaskContext::default(),
 }),
 exit_code: 0,
 kstack: KernelStack::new_with_guard_page()?,
 link: LinkedListAtomicLink::new(),
 priority: self.priority,
 cpu_affinity: self.cpu_affinity,
 };
    result.task_inner.lock().task_status = TaskStatus::Runnable;result.task_inner.lock().ctx.rip = kernel_task_entry as usize;result.task_inner.lock().ctx.regs.rsp =(crate::vm::paddr_to_vaddr(result.kstack.end_paddr())) as u64;Ok(Arc::new(result))
}
从上面的逻辑很清晰的流程,从创建进程process到创建线程thread到创建任务task.
Task任务切换
 从上面的流程中,已经创建好了task。现在看看task如何切换以及执行。
 Rust
 process.run();
 进程创建好后,开始执行,线程开始执行
 Rust
 pub fn run(&self) {
 let threads = self.threads.lock();
 // when run the process, the process should has only one thread
 debug_assert!(threads.len() == 1);
 debug_assert!(self.is_runnable());
 let thread = threads[0].clone();
 // should not hold the lock when run thread
 drop(threads);
 thread.run();
 }
 thread的task开始执行:
 Rust
 pub fn run(&self) {
 self.status.lock().set_running();
 self.task.run();
 }
 将当前task放进系统的task列表中:
 Rust
 pub fn add_task(task: Arc) {
 GLOBAL_SCHEDULER.lock_irq_disabled().enqueue(task);
 }
 调度:
 Rust
 pub fn schedule() {
 if let Some(task) = fetch_task() {
 switch_to_task(task);
 }
 }
 任务切换:
 Rust
 fn switch_to_task(next_task: Arc) {
 if !PREEMPT_COUNT.is_preemptive() {
 panic!(
 “Calling schedule() while holding {} locks”,
 PREEMPT_COUNT.num_locks()
 );
 //GLOBAL_SCHEDULER.lock_irq_disabled().enqueue(next_task);
 //return;
 }
 let current_task_option = current_task();
 let next_task_cx_ptr = &next_task.inner_ctx() as *const TaskContext;
 let current_task: Arc;
 let current_task_cx_ptr: *mut TaskContext = match current_task_option {
 None => PROCESSOR.lock().get_idle_task_cx_ptr(),
 Some(current_task) => {
 if current_task.status() == TaskStatus::Runnable {
 GLOBAL_SCHEDULER
 .lock_irq_disabled()
 .enqueue(current_task.clone());
 }
 &mut current_task.inner_exclusive_access().ctx as *mut TaskContext
 }
 };
// change the current task to the next taskPROCESSOR.lock().current = Some(next_task.clone());
unsafe {context_switch(current_task_cx_ptr, next_task_cx_ptr);
}
}
 这个里面task切换的时候使用的是TaskContext,而根据前面分析可得知TaskContext中存放的是kernel_task_entry:
 Rust
 result.task_inner.lock().ctx.rip = kernel_task_entry as usize;
 result.task_inner.lock().ctx.regs.rsp =
 (crate::vm::paddr_to_vaddr(result.kstack.end_paddr())) as u64;
 context_switch实现:
 Rust
 .text
 .global context_switch
 .code64
 context_switch: # (cur: *mut TaskContext, nxt: *TaskContext)
Save cur’s register
mov rax, [rsp] # return address
 mov [rdi + 56], rax # 56 = offsetof(Context, rip)
 mov [rdi + 0], rsp
 mov [rdi + 8], rbx
 mov [rdi + 16], rbp
 mov [rdi + 24], r12
 mov [rdi + 32], r13
 mov [rdi + 40], r14
 mov [rdi + 48], r15
Restore nxt’s registers
mov rsp, [rsi + 0]
 mov rbx, [rsi + 8]
 mov rbp, [rsi + 16]
 mov r12, [rsi + 24]
 mov r13, [rsi + 32]
 mov r14, [rsi + 40]
 mov r15, [rsi + 48]
 mov rax, [rsi + 56] # restore return address
 mov [rsp], rax # for stack balance, must use mov instead of push
 ret
在x86_64汇编中,函数调用使用的寄存器规则是:
 •%rdi, %rsi, %rdx, %rcx,%r8, %r9 :六个寄存器,当参数少于7个时, 参数从左到右放入寄存器: rdi, rsi, rdx, rcx, r8, r9;当参数为7个以上时,前 6 个与前面一样, 但后面的依次从 “右向左” 放入栈中,即和32位汇编一样。
 •那么rdi寄存器存放的是current_task_cx_ptr,而next_task_cx_ptr存放在rsi寄存器中。
 •rax寄存器被放入ctx.rip = kernel_task_entry
 •执行ret指令,返回地址由rax寄存器指定。
为什么rax存放的是返回地址呢?我们看看TaskContext 的定义,从这个结构体中可以看出来结构体的rip成员变量是结构体的第7个成员。因此
 mov rax, [rsi + 56] # restore return address
 rsi + 56指向的是第7个成员。
 Rust
 pub struct CalleeRegs {
 pub rsp: u64,
 pub rbx: u64,
 pub rbp: u64,
 pub r12: u64,
 pub r13: u64,
 pub r14: u64,
 pub r15: u64,
 }
#[derive(Debug, Default, Clone, Copy)]
 #[repr©]
 pub(crate) struct TaskContext {
 pub regs: CalleeRegs,
 pub rip: usize,
 }
task切换完成后,下一个task回到kernel_task_entry 处执行:
 Rust
 fn kernel_task_entry() {
 let current_task = current_task()
 .expect(“no current task, it should have current task in kernel task entry”);
 current_task.func.call(());
 current_task.exit();
 }
 kernel_task_entry调用user_task_entry:
 Rust
 fn user_task_entry() {
 let cur = Task::current();
 let user_space = cur.user_space().expect(“user task should have user space”);
 let mut user_mode = UserMode::new(user_space);
 debug!(
 “[Task entry] rip = 0x{:x}”,
 user_mode.context().instruction_pointer()
 );
 debug!(
 “[Task entry] rsp = 0x{:x}”,
 user_mode.context().stack_pointer()
 );
 debug!(
 “[Task entry] rax = 0x{:x}”,
 user_mode.context().syscall_ret()
 );
 loop {
 let user_event: UserEvent = user_mode.execute();
 let context: &mut UserContext = user_mode.context_mut();
 // handle user event:
 handle_user_event(user_event, context);
 let current_thread = current_thread!();
 // should be do this comparison before handle signal?
 if current_thread.status().lock().is_exited() {
 break;
 }
 handle_pending_signal(context).unwrap();
 if current_thread.status().lock().is_exited() {
 debug!(“exit due to signal”);
 break;
 }
 // If current is suspended, wait for a signal to wake up self
 while current_thread.status().lock().is_stopped() {
 Thread::yield_now();
 debug!(“{} is suspended.”, current_thread.tid());
 handle_pending_signal(context).unwrap();
 }
 // a preemption point after handling user event.
 preempt();
 }
 debug!(“exit user loop”);
 // FIXME: This is a work around: exit in kernel task entry may be not called. Why this will happen?
 Task::current().exit();
 }
Rust
 let user_event: UserEvent = user_mode.execute();
Rust
 impl UserContextApiInternal for UserContext {
 fn execute(&mut self) -> crate::user::UserEvent {
 // set interrupt flag so that in user mode it can receive external interrupts
 // set ID flag which means cpu support CPUID instruction
 self.user_context.general.rflags |= (RFlags::INTERRUPT_FLAG | RFlags::ID).bits() as usize;
    const SYSCALL_TRAPNUM: u16 = 0x100;// return when it is syscall or cpu exception type is Fault or Trap.loop {self.user_context.run();match CpuException::to_cpu_exception(self.user_context.trap_num as u16) {Some(exception) => {#[cfg(feature = "intel_tdx")]if *exception == VIRTUALIZATION_EXCEPTION {let ve_info =tdcall::get_veinfo().expect("#VE handler: fail to get VE info\n");handle_virtual_exception(self.general_regs_mut(), &ve_info);continue;}if exception.typ == CpuExceptionType::FaultOrTrap|| exception.typ == CpuExceptionType::Fault|| exception.typ == CpuExceptionType::Trap{break;}}None => {if self.user_context.trap_num as u16 == SYSCALL_TRAPNUM {break;}}};call_irq_callback_functions(&self.as_trap_frame());}crate::arch::irq::enable_local();if self.user_context.trap_num as u16 != SYSCALL_TRAPNUM {self.cpu_exception_info = CpuExceptionInfo {page_fault_addr: unsafe { x86::controlregs::cr2() },id: self.user_context.trap_num,error_code: self.user_context.error_code,};UserEvent::Exception} else {UserEvent::Syscall}
}
最终调用到syscall_return,UserContext作为参数,rdi寄存器指向UserContext。
 Rust
 .global syscall_return
 syscall_return:
 # disable interrupt
 cli
# save callee-saved registers
mov ecx, 0xC0000100
rdmsr
shl rdx, 32
or  rax, rdx
push rax                # push fsbase
push r15
push r14
push r13
push r12
push rbp
push rbxpush rdi
push rdi                # keep rsp 16 bytes align
mov gs:4, rsp           # store kernel rsp -> TSS.sp0
mov rsp, rdi            # set rsp = bottom of trap frame# pop fsbase gsbase
swapgs                  # store kernel gsbase
mov ecx, 0xC0000100
mov edx, [rsp + 18*8+4]
mov eax, [rsp + 18*8]
wrmsr                   # pop fsbase
mov ecx, 0xC0000101
mov edx, [rsp + 19*8+4]
mov eax, [rsp + 19*8]
wrmsr                   # pop gsbasepop rax
pop rbx
pop rcx
pop rdx
pop rsi
pop rdi
pop rbp
pop r8                  # skip rsp
pop r8
pop r9
pop r10
pop r11
pop r12
pop r13
pop r14
pop r15
# rip
# rflags
# fsbase
# gsbase
# trap_num
# error_code# determain sysret or iret
cmp dword ptr [rsp + 4*8], 0x100  # syscall?
je sysret
iret:
 # get user cs from STAR MSR
 mov ecx, 0xC0000081
 rdmsr # msr[ecx] => edx:eax
 shr edx, 16 # dx = user_cs32
 lea ax, [edx + 8] # ax = user_ss
 add dx, 16 # dx = user_cs64
# construct trap frame
push rax                # push ss
push [rsp - 8*8]        # push rsp
push [rsp + 3*8]        # push rflags
push rdx                # push cs
push [rsp + 4*8]        # push rip# recover rcx, rdx, rax
mov rax, [rsp - 11*8]
mov rcx, [rsp - 9*8]
mov rdx, [rsp - 8*8]iretq
这段代码关键在这里:
 Rust
 push rax # push ss
 push [rsp - 88] # push rsp
 push [rsp + 38] # push rflags
 push rdx # push cs
 push [rsp + 48] # push rip
 iretq指令在执行的时候会从栈顶弹出返回地址,刚好对应:
 Rust
 push [rsp + 48] # push rip
 而rsp值在前面被修改成了:
Rust
 mov rsp, rdi # set rsp = bottom of trap frame
 而rdi值是syscall_return函数调用中带进来的参数,即UserContext 变量。
 Rust
 pub struct UserContext {
 pub general: GeneralRegs,
 pub trap_num: usize,
 pub error_code: usize,
 }
/// General registers
 #[derive(Debug, Default, Clone, Copy, Eq, PartialEq)]
 #[repr©]
 pub struct GeneralRegs {
 pub rax: usize,
 pub rbx: usize,
 pub rcx: usize,
 pub rdx: usize,
 pub rsi: usize,
 pub rdi: usize,
 pub rbp: usize,
 pub rsp: usize,
 pub r8: usize,
 pub r9: usize,
 pub r10: usize,
 pub r11: usize,
 pub r12: usize,
 pub r13: usize,
 pub r14: usize,
 pub r15: usize,
 pub rip: usize,
 pub rflags: usize,
 pub fsbase: usize,
 pub gsbase: usize,
 }
 mov rsp, rdi 想到与让rsp指向了UserContext 对象。后面一系列的pop与push操作都是在移动指针指向UserContext 对象里不同的成员,而这条指令
 Rust
 push [rsp + 4*8] # push rip
 最终指向的是UserContext 对象里的rip成员,这个成员是被设置成应用程序的入口地址的:
 Rust
 cpu_ctx.set_rip(elf_load_info.entry_point() as _);
 cpu_ctx.set_rsp(elf_load_info.user_stack_top() as _);
 因此iretq执行的时候,从栈顶弹出的就是应用程序的入口地址,这样就跳进应用程序的入口地址进行执行了。
系统调用与返回
 从上面内容研究到了程序进入入口地址开始执行程序的逻辑了,在应用程序执行的过程中可能会碰到一些系统调用,会导致程序重新陷入内核,由内核处理相关调用后再返回应用的用户空间继续执行。
 首先会向系统写入一个系统调用统一的响应地址:
Rust
 pub fn init() {
 let cpuid = raw_cpuid::CpuId::new();
 unsafe {
 // enable syscall instruction
 assert!(cpuid
 .get_extended_processor_and_feature_identifiers()
 .unwrap()
 .has_syscall_sysret());
 Efer::update(|efer| {
 efer.insert(EferFlags::SYSTEM_CALL_EXTENSIONS);
 });
    // flags to clear on syscall// copy from Linux 5.0// TF|DF|IF|IOPL|AC|NTconst RFLAGS_MASK: u64 = 0x47700;LStar::write(VirtAddr::new(syscall_entry as usize as u64));SFMask::write(RFlags::from_bits(RFLAGS_MASK).unwrap());
}
}
 syscall_entry会作为系统调用的响应入口,也就是说当应用程序调用系统调用的时候,系统会捕捉到系统调用请求,然后就请求转到syscall_entry处理。
 Rust
 .global syscall_entry
 syscall_entry:
 # syscall instruction do:
 # - load cs
 # - store rflags -> r11
 # - mask rflags
 # - store rip -> rcx
 # - load rip
swapgs                  # swap in kernel gs
mov gs:12, rsp          # store user rsp -> scratch at TSS.sp1
mov rsp, gs:4           # load kernel rsp <- TSS.sp0
pop rsp                 # load rsp = bottom of trap frame
add rsp, 22*8           # rsp = top of trap frame# push trap_num, error_code
push 0                  # push error_code
push 0x100              # push trap_num
sub rsp, 16             # skip fsbase, gsbase
# push general registers
push r11                # push rflags
push rcx                # push rip
.global trap_syscall_entry
 trap_syscall_entry:
 push r15
 push r14
 push r13
 push r12
 push r11
 push r10
 push r9
 push r8
 push gs:12 # push rsp
 push rbp
 push rdi
 push rsi
 push rdx
 push rcx
 push rbx
 push rax
# push fsbase gsbase
mov ecx, 0xC0000100
rdmsr
mov [rsp + 18*8+4], edx
mov [rsp + 18*8], eax
mov ecx, 0xC0000102     # kernelgs
rdmsr
mov [rsp + 19*8+4], edx
mov [rsp + 19*8], eax# restore callee-saved registers
mov rsp, gs:4           # load kernel rsp <- TSS.sp0
pop rbx
pop rbx
pop rbx
pop rbp
pop r12
pop r13
pop r14
pop r15pop rax
mov ecx, 0xC0000100
mov rdx, rax
shr rdx, 32
wrmsr                   # pop fsbase# go back to Rust
ret
这段代码的关键点在于:
 Rust
 mov rsp, gs:4 # load kernel rsp <- TSS.sp0
 将kernel的栈顶放置进rsp。
而在syscall_return调用的时候:
 Rust
 mov gs:4, rsp # store kernel rsp -> TSS.sp0
 会将kernel 栈顶保存进gs:4。
 因此当kernel rsp被回复后,会进行一系列的弹栈操作:
 Rust
 # restore callee-saved registers
 mov rsp, gs:4 # load kernel rsp <- TSS.sp0
 pop rbx
 pop rbx
 pop rbx
 pop rbp
 pop r12
 pop r13
 pop r14
 pop r15
pop rax
这个跟syscall_return调用时候的压栈操作一一对应,这样弹完栈后栈顶会指向函数的返回地址,即syscall_return调用的返回地址,这样执行ret指令后程序会返回到内核里调用syscall_return处,继续处理相关的系统调用。
所以从这个角度来看整个task的切换:
 Task1.kernel_task_entry-> Task1.user_task_entry -> 通过retq/sysretq返回到用户空间-> syscall_entry进入内核->Task1.user_task_entry处理系统调用之类的。
 任务调度
 在asterinas系统中构建有一个基于优先级的可抢占的调度器:
 Rust
 pub fn init() {
 let preempt_scheduler = Box::new(PreemptScheduler::new());
 let scheduler = Box::::leak(preempt_scheduler);
 set_scheduler(scheduler);
 }
该调度器将任务分成两种类型,实时任务与普通任务:
 Rust
 struct PreemptScheduler {
 /// Tasks with a priority of less than 100 are regarded as real-time tasks.
 real_time_tasks: SpinLock<LinkedList>,
 /// Tasks with a priority greater than or equal to 100 are regarded as normal tasks.
 normal_tasks: SpinLock<LinkedList>,
 }
 任务抢占,在任务的user_task_entry中会有一个抢占点:
 Rust
 pub fn create_new_user_task(user_space: Arc, thread_ref: Weak) -> Arc {
 fn user_task_entry() {
 let cur = Task::current();
 let user_space = cur.user_space().expect(“user task should have user space”);
 let mut user_mode = UserMode::new(user_space);
 debug!(
 “[Task entry] rip = 0x{:x}”,
 user_mode.context().instruction_pointer()
 );
 debug!(
 “[Task entry] rsp = 0x{:x}”,
 user_mode.context().stack_pointer()
 );
 debug!(
 “[Task entry] rax = 0x{:x}”,
 user_mode.context().syscall_ret()
 );
 loop {
 let user_event = user_mode.execute();
 let context = user_mode.context_mut();
 // handle user event:
 handle_user_event(user_event, context);
 let current_thread = current_thread!();
 // should be do this comparison before handle signal?
 if current_thread.status().lock().is_exited() {
 break;
 }
 handle_pending_signal(context).unwrap();
 if current_thread.status().lock().is_exited() {
 debug!(“exit due to signal”);
 break;
 }
 // If current is suspended, wait for a signal to wake up self
 while current_thread.status().lock().is_stopped() {
 Thread::yield_now();
 debug!(“{} is suspended.”, current_thread.tid());
 handle_pending_signal(context).unwrap();
 }
 // a preemption point after handling user event.
 preempt();
 }
 debug!(“exit user loop”);
 // FIXME: This is a work around: exit in kernel task entry may be not called. Why this will happen?
 Task::current().exit();
 }
 该抢占点会去检查当前任务是否能被其他任务抢占:
 Rust
 pub fn preempt() {
 // disable interrupts to avoid nested preemption.
 let disable_irq = disable_local();
 let Some(curr_task) = current_task() else {
 return;
 };
 let mut scheduler = GLOBAL_SCHEDULER.lock_irq_disabled();
 if !scheduler.should_preempt(&curr_task) {
 return;
 }
 let Some(next_task) = scheduler.dequeue() else {
 return;
 };
 drop(scheduler);
 switch_to_task(next_task);
 }
 should_preempt逻辑很简单,如果当前任务不是实时任务,且待执行的实时任务列表不为空则代表当前任务可以被抢占
 Rust
 fn should_preempt(&self, task: &Arc) -> bool {
 !task.is_real_time() && !self.real_time_tasks.lock_irq_disabled().is_empty()
 }
 如果能被抢占,则调度一个新任务开始执行:
 Rust
 let Some(next_task) = scheduler.dequeue() else {
 return;
 };
 drop(scheduler);
 switch_to_task(next_task);
 所以我们上面描述的简单逻辑可以是这样的:
 Task1.kernel_task_entry-> Task1.user_task_entry -> 通过retq/sysretq返回到用户空间-> syscall_entry进入内核->Task1.user_task_entry处理系统调用之类的->能被抢占->switch_to_task ->Task2.kernel_task_entry->Task2.user_task_entry -> 通过retq/sysretq返回到用户空间-> syscall_entry进入内核->Task2.user_task_entry处理系统调用之类的
 大概类似这样的流程。
几个不完善的点:
 •感觉貌似对多CPU,多处理核心没有支持。
 •另外抢占不是实时发生的,是必须等到正在执行的任务处理完成到某个阶段才发生。而且必须是当前任务陷入进kernel的时候才能被抢占。