리눅스 커널에서 preemptive scheduling이 일어나는 지점은 다음과 같다:

 - 인터럽트를 처리하고 난 시점
 - 시스템 콜을 처리하고 난 시점 [***] 
 - 시그널을 처리하고 난 시점

이 동작은 아키텍처에 의존적인 동작이었다. 즉, Arm64나 RISCV 별로 루틴이 달랐다.
최근에 common한 path로 처리가 됐다.

관련 커밋 메시지와 커밋 아이디는 아래와 같다: 

f0bddf50586da81360627a772be0e3
riscv: entry: Convert to generic entry

'21 Feb 2023'에 머지됐으니, v6.3 버전부터 이와 같은 path로 처리된다.

RISCV 관점 분석

RISCV에서 시스템 콜은 do_trap_ecall_u() 함수 (위치: arch/riscv/kernel/traps.c)에서 핸들링한다.

arch/riscv/kernel/traps.c
void do_trap_ecall_u(struct pt_regs *regs)
{
if (user_mode(regs)) {
long syscall = regs->a7;

regs->epc += 4;
regs->orig_a0 = regs->a0;
regs->a0 = -ENOSYS;

riscv_v_vstate_discard(regs);

syscall = syscall_enter_from_user_mode(regs, syscall);

add_random_kstack_offset();

if (syscall >= 0 && syscall < NR_syscalls) {
syscall = array_index_nospec(syscall, NR_syscalls);
syscall_handler(regs, syscall);
}
[...]
syscall_exit_to_user_mode(regs);

syscall_exit_to_user_mode() 함수가 시스템 콜 핸들러 함수가 처리된 다음에 호출된다.

include/linux/entry-common.h 
static __always_inline void syscall_exit_to_user_mode(struct pt_regs *regs)
{
instrumentation_begin();
syscall_exit_to_user_mode_work(regs);
instrumentation_end();
exit_to_user_mode();
}

arm64_exit_to_user_mode()

arch/arm64/kernel/entry-common.c 
static __always_inline void arm64_exit_to_user_mode(struct pt_regs *regs)
{
local_irq_disable();
exit_to_user_mode_prepare_legacy(regs);
local_daif_mask();
mte_check_tfsr_exit();
exit_to_user_mode();
}

exit_to_user_mode()

include/linux/irq-entry-common.h
static __always_inline void exit_to_user_mode(void)
{
instrumentation_begin();
unwind_reset_info();
trace_hardirqs_on_prepare();
lockdep_hardirqs_on_prepare();
instrumentation_end();

user_enter_irqoff();
arch_exit_to_user_mode();
lockdep_hardirqs_on(CALLER_ADDR0);
}

정리하면 다음과 같다: 

RISCV: do_trap_ecall_u() -> syscall_exit_to_user_mode()  
Arm64:  arm64_exit_to_user_mode() -> exit_to_user_mode() 

RISCV

syscall_exit_to_user_mode()

__visible noinstr void syscall_exit_to_user_mode(struct pt_regs *regs)
{
instrumentation_begin();
__syscall_exit_to_user_mode_work(regs); >>>
instrumentation_end();
__exit_to_user_mode();
}

syscall_exit_to_user_mode_work()

void syscall_exit_to_user_mode_work(struct pt_regs *regs)
{
__syscall_exit_to_user_mode_work(regs);  >>>
}

__syscall_exit_to_user_mode_work()

static __always_inline void __syscall_exit_to_user_mode_work(struct pt_regs *regs)
{
syscall_exit_to_user_mode_prepare(regs);
local_irq_disable_exit_to_user();
exit_to_user_mode_prepare(regs);  >>>
}

exit_to_user_mode_prepare()

static void exit_to_user_mode_prepare(struct pt_regs *regs)
{
unsigned long ti_work;

lockdep_assert_irqs_disabled();

/* Flush pending rcuog wakeup before the last need_resched() check */
tick_nohz_user_enter_prepare();

ti_work = read_thread_flags();
if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK))
ti_work = exit_to_user_mode_loop(regs, ti_work);  >>>

arch_exit_to_user_mode_prepare(regs, ti_work);

/* Ensure that kernel state is sane for a return to userspace */
kmap_assert_nomap();
lockdep_assert_irqs_disabled();
lockdep_sys_exit();
}

Arm64

arm64_exit_to_user_mode()

static __always_inline void arm64_exit_to_user_mode(struct pt_regs *regs)
{
local_irq_disable();
exit_to_user_mode_prepare_legacy(regs); >>
local_daif_mask();
mte_check_tfsr_exit();
exit_to_user_mode();
}

exit_to_user_mode_prepare_legacy()

/* Temporary workaround to keep ARM64 alive */
static __always_inline void exit_to_user_mode_prepare_legacy(struct pt_regs *regs)
{
__exit_to_user_mode_prepare(regs); >>
rseq_exit_to_user_mode_legacy();
__exit_to_user_mode_validate();
}

static __always_inline void __exit_to_user_mode_prepare(struct pt_regs *regs)
{
unsigned long ti_work;

lockdep_assert_irqs_disabled();

/* Flush pending rcuog wakeup before the last need_resched() check */
tick_nohz_user_enter_prepare();

ti_work = read_thread_flags();
if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK))
ti_work = exit_to_user_mode_loop(regs, ti_work); >>

arch_exit_to_user_mode_prepare(regs, ti_work);
}


exit_to_user_mode_loop

static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
    unsigned long ti_work)
{
/*
 * Before returning to user space ensure that all pending work
 * items have been completed.
 */
while (ti_work & EXIT_TO_USER_MODE_WORK) {

local_irq_enable_exit_to_user(ti_work);

if (ti_work & _TIF_NEED_RESCHED)
schedule();

if (ti_work & _TIF_UPROBE)
uprobe_notify_resume(regs);

if (ti_work & _TIF_PATCH_PENDING)
klp_update_patch_state(current);

if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
arch_do_signal_or_restart(regs);

if (ti_work & _TIF_NOTIFY_RESUME)
resume_user_mode_work(regs);

+ Recent posts