__clear_user function

SYM_FUNC_START(__clear_user)

        /* Enable access to user memory */
        li t6, SR_SUM
        csrs CSR_STATUS, t6

        add a3, a0, a1
        addi t0, a0, SZREG-1
        andi t1, a3, ~(SZREG-1)
        andi t0, t0, ~(SZREG-1)

'csrs       sstatus,t6' instruction turns on sstatus.sum = 1. 
This makes kernel space access user space memory 

SP:FFFFFFFF803062F4|__clear_user:   lui        t6,0x40       ; t6,64
SP:FFFFFFFF803062F8|                csrs       sstatus,t6
SP:FFFFFFFF803062FC|                add        a3,a0,a1

comment said 'Enable access to user memory'

Similar code is here:
 
SYM_FUNC_START(fallback_scalar_usercopy)
        /* Enable access to user memory */
        li      t6, SR_SUM
        csrs    CSR_STATUS, t6 

 

 

- Arm

el0t_64_sync_handler  // arch/arm64/kernel/entry-common.c
   el0_da        // arch/arm64/kernel/entry-common.c
      do_mem_abort    // arch/arm64/mm/fault.c
         do_translation_fault // arch/arm64/mm/fault.c
            do_page_fault  // arch/arm64/mm/fault.c

asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs)
{
unsigned long esr = read_sysreg(esr_el1);

switch (ESR_ELx_EC(esr)) {
case ESR_ELx_EC_SVC64:
el0_svc(regs);
break;
case ESR_ELx_EC_DABT_LOW:
el0_da(regs, esr); >>>
break;

static void noinstr el0_da(struct pt_regs *regs, unsigned long esr)
{
unsigned long far = read_sysreg(far_el1);

enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_mem_abort(far, esr, regs); >>>
exit_to_user_mode(regs);
}

void do_mem_abort(unsigned long far, unsigned long esr, struct pt_regs *regs)
{
        const struct fault_info *inf = esr_to_fault_info(esr);
        unsigned long addr = untagged_addr(far);

        if (!inf->fn(far, esr, regs))  >>>
                return; 

        if (!user_mode(regs))
                die_kernel_fault(inf->name, addr, esr, regs);

(where)
do_translation_fault function is registered in fault_info variable.

static const struct fault_info fault_info[] = {
        { do_bad,               SIGKILL, SI_KERNEL,     "ttbr address size fault"       },
        { do_bad,               SIGKILL, SI_KERNEL,     "level 1 address size fault"    },
        { do_bad,               SIGKILL, SI_KERNEL,     "level 2 address size fault"    },
        { do_bad,               SIGKILL, SI_KERNEL,     "level 3 address size fault"    },
        { do_translation_fault, SIGSEGV, SEGV_MAPERR,   "level 0 translation fault"     },
        { do_translation_fault, SIGSEGV, SEGV_MAPERR,   "level 1 translation fault"     },
        { do_translation_fault, SIGSEGV, SEGV_MAPERR,   "level 2 translation fault"     },
        { do_translation_fault, SIGSEGV, SEGV_MAPERR,   "level 3 translation fault"     },
        { do_page_fault,        SIGSEGV, SEGV_ACCERR,   "level 0 access flag fault"     },
        { do_page_fault,        SIGSEGV, SEGV_ACCERR,   "level 1 access flag fault"     },


static int __kprobes do_translation_fault(unsigned long far, 
                                          unsigned long esr,
                                          struct pt_regs *regs)
{
        unsigned long addr = untagged_addr(far);

        if (is_ttbr0_addr(addr))
                return do_page_fault(far, esr, regs); >>>

        do_bad_area(far, esr, regs);
        return 0;
}

- RISC-V

handle_exception
do_page_fault
handle_page_fault

arch/riscv/kernel/entry.S 
SYM_DATA_START_LOCAL(excp_vect_table)
RISCV_PTR do_trap_insn_misaligned
ALT_INSN_FAULT(RISCV_PTR do_trap_insn_fault)
RISCV_PTR do_trap_insn_illegal
RISCV_PTR do_trap_break
RISCV_PTR do_trap_load_misaligned
RISCV_PTR do_trap_load_fault
RISCV_PTR do_trap_store_misaligned
RISCV_PTR do_trap_store_fault
RISCV_PTR do_trap_ecall_u /* system call */
RISCV_PTR do_trap_ecall_s
RISCV_PTR do_trap_unknown
RISCV_PTR do_trap_ecall_m
/* instruction page fault */
ALT_PAGE_FAULT(RISCV_PTR do_page_fault)
RISCV_PTR do_page_fault   /* load page fault */  >>>
RISCV_PTR do_trap_unknown
RISCV_PTR do_page_fault   /* store page fault */  >>>
SYM_DATA_END_LABEL(excp_vect_table, SYM_L_LOCAL, excp_vect_table_end)

asmlinkage __visible noinstr void do_page_fault(struct pt_regs *regs)
{
irqentry_state_t state = irqentry_enter(regs);

handle_page_fault(regs);

local_irq_disable();

irqentry_exit(regs, state);
}

static __always_inline bool is_ttbr0_addr(unsigned long addr)
{
/* entry assembly clears tags for TTBR0 addrs */
return addr < TASK_SIZE;
}

RISC-V를 분석하면 가장 이해하기 어려운 콤포넌트는 opensbi이다. opensbi는 슈퍼바이저 모드와 
머신 모드의 인터페이스라고 할 수 있으며, 리눅스 커널의 가장 낮은 소프트웨어 레이어라고 할 수 있다.

리눅스 커널에서 RISC-V에 dependent한 동작을 정확히 이해하려면, 역시나 opensbi를 이해해야 한다.

이번 포스트에서는 opensbi 스펙 중에 marchid를 중심으로 소프트웨어의 실행 흐름을 정리한다.
ChatGPT이 이런 내용을 알려주지 않으니 많은 개발자에게 도움이 됐으면 좋겠다.

RISC-V 단체에서 기술한 opensbi 스펙 문서를 보자.

4.6. Function: Get machine architecture ID (FID #5)
struct sbiret sbi_get_marchid(void);

https://lists.riscv.org/g/tech-brs/attachment/361/0/riscv-sbi.pdf#:~:text=Function:%20Firmware%20Features%20Set%20(FID%20%230)%20struct,for%20which%20per%20feature%20supported%20values%20are

marchid를 읽어오는 인터페이스이다. 리눅스 커널에서 marchid에 대한 opensbi는 어떻게 구현됐을까?

다음 함수를 보자.

rch/riscv/kernel/sbi.c
long sbi_get_marchid(void)
{
return __sbi_base_ecall(SBI_EXT_BASE_GET_MARCHID);
}

어셈블리 명령어로 보면 sbi_get_marchid 함수의 구현부는 아래와 같다:

      SP:FFFFFFFF8000B3BA|sbi_get_marchid:   c.addi     sp,-0x10      ; sp,-16
      SP:FFFFFFFF8000B3BC|                   c.sdsp     s0,0x8(sp)    ; s0,8(sp)
      SP:FFFFFFFF8000B3BE|                   c.addi4spn s0,0x10       ; s0,16
      SP:FFFFFFFF8000B3C0|                   c.li       a0,0x0
      SP:FFFFFFFF8000B3C2|                   c.li       a1,0x0
      SP:FFFFFFFF8000B3C4|                   c.li       a2,0x0
      SP:FFFFFFFF8000B3C6|                   c.li       a3,0x0
      SP:FFFFFFFF8000B3C8|                   c.li       a4,0x0
      SP:FFFFFFFF8000B3CA|                   c.li       a5,0x0
      SP:FFFFFFFF8000B3CC|                   c.li       a6,0x5
      SP:FFFFFFFF8000B3CE|                   c.li       a7,0x10       ; a7,16
      SP:FFFFFFFF8000B3D0|                   ecall

a7 레지스터에 0x10을 로딩한 다음에 ecall 명령어를 실행한다. 이 명령어를 실행하면 바로
머신 모드로 트랩이 유발된다.

이제부터는 opensbi 코드 분석이다.

00000000000003c8 <_trap_handler>:
     3c8:       34021273                csrrw   tp,mscratch,tp
     3cc:       06523023                sd      t0,96(tp) # 60 <_try_lottery+0x36>
     3d0:       300022f3                csrr    t0,mstatus
[...]
     468:       3002b073                csrc    mstatus,t0
     46c:       00010533                add     a0,sp,zero
     470:       16e0c0ef                jal     ra,c5de <sbi_trap_handler>

리눅스 커널에서 ecall 명령어를 실행하면, 트랩이 유발되면서 _trap_handler 레이블의 시작 주소로 점프한다.
그 다음에 sbi_trap_handler 함수로 분기한다.

참고로 opensbi에서 트랩 핸들러를 설정하는 루틴은 아래와 같다:

opensbi/firmware/fw_base.S
        /* Setup trap handler */
        lla     a4, _trap_handler
        csrr    a5, CSR_MISA
        srli    a5, a5, ('H' - 'A')
        andi    a5, a5, 0x1
        beq     a5, zero, _skip_trap_handler_hyp
        lla     a4, _trap_handler_hyp
_skip_trap_handler_hyp:
        csrw    CSR_MTVEC, a4

'csrw    CSR_MTVEC, a4'이 핵심 명령어이다. 


다시 원래 주제로 돌아와서 sbi_trap_handler() 함수를 분석하자.
함수의 분석 내용은 주석을 참고하자.

struct sbi_trap_context *sbi_trap_handler(struct sbi_trap_context *tcntx)
{
        int rc = SBI_ENOTSUPP;
        const char *msg = "trap handler failed";
        struct sbi_scratch *scratch = sbi_scratch_thishart_ptr();
        const struct sbi_trap_info *trap = &tcntx->trap;
        struct sbi_trap_regs *regs = &tcntx->regs;
        ulong mcause = tcntx->trap.cause;

        /* Update trap context pointer */
        tcntx->prev_context = sbi_trap_get_context(scratch);
        sbi_trap_set_context(scratch, tcntx);

        /* Austin: mcause 레지스터의 최상위 비트가 1인지를 체크한다. 만약 true이면 트렙의
                       종류는 인터럽트이다. */
        if (mcause & MCAUSE_IRQ_MASK) {
                if (sbi_hart_has_extension(sbi_scratch_thishart_ptr(),
                                           SBI_HART_EXT_SMAIA))
                        rc = sbi_trap_aia_irq();
                else
                        rc = sbi_trap_nonaia_irq(mcause & ~MCAUSE_IRQ_MASK);
                msg = "unhandled local interrupt";
                goto trap_done;
        }

        /* Austin: mcause 레지스터는 익셉션 코드 정보를 저장한다. 이 값에 따라 다른 방식으로
                      트랩을 처리한다. mcause는 Armv8-A의 esr_el3에 대응된다(개념적으로)  */
       switch (mcause) {
        case CAUSE_ILLEGAL_INSTRUCTION:
                rc  = sbi_illegal_insn_handler(tcntx);
                msg = "illegal instruction handler failed";
                break;
        case CAUSE_MISALIGNED_LOAD:
                sbi_pmu_ctr_incr_fw(SBI_PMU_FW_MISALIGNED_LOAD);
                rc  = sbi_misaligned_load_handler(tcntx);
                msg = "misaligned load handler failed";
                break;
        case CAUSE_MISALIGNED_STORE:
                sbi_pmu_ctr_incr_fw(SBI_PMU_FW_MISALIGNED_STORE);
                rc  = sbi_misaligned_store_handler(tcntx);
                msg = "misaligned store handler failed";
                break;
        case CAUSE_SUPERVISOR_ECALL:
        case CAUSE_MACHINE_ECALL:
               */ Austin: 리눅스 커널에서 ecall을 실행했으니, 이 루틴으로 분기한다 */
                rc  = sbi_ecall_handler(tcntx);
                msg = "ecall handler failed";
                break;
        case CAUSE_LOAD_ACCESS:
                sbi_pmu_ctr_incr_fw(SBI_PMU_FW_ACCESS_LOAD);
                rc  = sbi_load_access_handler(tcntx);
                msg = "load fault handler failed";
                break;

sbi_ecall_handler() 함수를 분석하자. 분석 내용은 주석을 참고하자.

lib/sbi/sbi_ecall.c
int sbi_ecall_handler(struct sbi_trap_context *tcntx)
{
        int ret = 0;
        struct sbi_trap_regs *regs = &tcntx->regs;
        struct sbi_ecall_extension *ext;
        unsigned long extension_id = regs->a7;
        unsigned long func_id = regs->a6;
        struct sbi_ecall_return out = {0};
        bool is_0_1_spec = 0;

        ext = sbi_ecall_find_extension(extension_id);

위 루틴에서 regs->a7은 0x10이고 regs->a6이다. 왜냐면, 리눅스 커널에서 ecall 명령어를 실행하기 직전에
a6와 a7 레지스터를 다음과 같이 지정했기 때문이다.

      SP:FFFFFFFF8000B3BA|sbi_get_marchid:   c.addi     sp,-0x10      ; sp,-16
      [...]
      SP:FFFFFFFF8000B3CC|                   c.li       a6,0x5
      SP:FFFFFFFF8000B3CE|                   c.li       a7,0x10       ; a7,16

아래 코드를 보면 extension_id는 base(0x10)이고 func_id는 0x5라는 사실을 알 수 있다. 

opensbi/include/sbi/sbi_ecall_interface.h
/* SBI function IDs for BASE extension*/
#define SBI_EXT_BASE_GET_SPEC_VERSION           0x0
#define SBI_EXT_BASE_GET_IMP_ID                 0x1
#define SBI_EXT_BASE_GET_IMP_VERSION            0x2
#define SBI_EXT_BASE_PROBE_EXT                  0x3
#define SBI_EXT_BASE_GET_MVENDORID              0x4
#define SBI_EXT_BASE_GET_MARCHID                0x5

sbi_ecall_handler() 함수의 아랫 부분 코드를 더 분석하자.

lib/sbi/sbi_ecall.c
int sbi_ecall_handler(struct sbi_trap_context *tcntx)
{
        int ret = 0;
        struct sbi_trap_regs *regs = &tcntx->regs;
        struct sbi_ecall_extension *ext;
        unsigned long extension_id = regs->a7;
        unsigned long func_id = regs->a6;
        struct sbi_ecall_return out = {0};
        bool is_0_1_spec = 0;

        ext = sbi_ecall_find_extension(extension_id);
        if (ext && ext->handle) {
*/ Austin: 'ext->handle' 구문에서 sbi_ecall_base_handler 함수가 호출된다. */
                ret = ext->handle(extension_id, func_id, regs, &out);
                if (extension_id >= SBI_EXT_0_1_SET_TIMER &&
                    extension_id <= SBI_EXT_0_1_SHUTDOWN)
                        is_0_1_spec = 1;
        } else {
                ret = SBI_ENOTSUPP;
        }

그 이유는 extension_id가 0x10이면 sbi_ecall_base_handler 함수가 호출되도록 등록했기 때문이다.
sbi_ecall_base_handler 함수가 호출되는 세세한 구현 방식은 나중에 설명하자.

opensbi/lib/sbi/sbi_ecall_base.c
static int sbi_ecall_base_register_extensions(void)
{
        return sbi_ecall_register_extension(&ecall_base);
}

struct sbi_ecall_extension ecall_base = {
        .name                   = "base",
        .extid_start            = SBI_EXT_BASE,
        .extid_end              = SBI_EXT_BASE,
        .register_extensions    = sbi_ecall_base_register_extensions,
        .handle                 = sbi_ecall_base_handler,
};

sbi_ecall_base_handler 함수이다.

opensbi/lib/sbi/sbi_ecall_base.c
static int sbi_ecall_base_handler(unsigned long extid, unsigned long funcid,
                                  struct sbi_trap_regs *regs,
                                  struct sbi_ecall_return *out)
{
        int ret = 0;

        switch (funcid) {
[...]
       case SBI_EXT_BASE_GET_MARCHID:
                out->value = csr_read(CSR_MARCHID);
                break;

'csr_read(CSR_MARCHID);' 매크로 함수의 리턴 값을 out-value에 저장한다.
'csr_read(CSR_MARCHID);'  구문의 정체는 무엇일까? 바로 marchid CSRs 레지스터이다.

   126f8:       f12027f3                csrr    a5,marchid
   126fc:       4501                    li      a0,0
   126fe:       e69c                    sd      a5,8(a3)

a3이 out의 주소를 저장하니, 'sd      a5,8(a3)' 명령어를 실행하면 marchid 레지스터의 값이 저장된다.

marchid 레지스터의 값이 어떻게 저장되는지 확인하자. sbi_ecall_handler 함수의 구현부이다.

int sbi_ecall_handler(struct sbi_trap_context *tcntx)
{
        int ret = 0;
        struct sbi_trap_regs *regs = &tcntx->regs;
[...]
       ext = sbi_ecall_find_extension(extension_id);
        if (ext && ext->handle) {
                ret = ext->handle(extension_id, func_id, regs, &out);
[...]
               if (!is_0_1_spec)
                        regs->a1 = out.value;

marchid CSR은 a1 레지스터를 통해서 저장된다. 'regs->a1' 가장 마지막 라인을 보자.


머신 모드에서 슈퍼바이저 모드(리눅스 커널)로 리턴하는 동작

이제 머신 모드에서 슈퍼바이저 모드로 리턴하는 동작이다. 즉 opensbi -> 리눅스 커널이다.

00000000000003c8 <_trap_handler>:
     3c8:       34021273                csrrw   tp,mscratch,tp
     3cc:       06523023                sd      t0,96(tp) # 60 <_try_lottery+0x36>
     3d0:       300022f3                csrr    t0,mstatus
[...]
    46c:       00010533                add     a0,sp,zero
     470:       16e0c0ef                jal     ra,c5de <sbi_trap_handler>

     ; sbi_trap_handler 함수 실행이 마무리되면 아래 어셈블리 명령어가 실행된다.
     ; 스택에 푸시한 (리눅스 커널에서 ecall을 실행하기 직전의) 레지스터를 로딩한다.
     ; 슈퍼바이저 모드(리눅스 커널)로 돌아가기 위해서이다.  
     474:       00853083                ld      ra,8(a0)
     478:       01053103                ld      sp,16(a0)
     47c:       01853183                ld      gp,24(a0)
     480:       02053203                ld      tp,32(a0)
     484:       03053303                ld      t1,48(a0)
     488:       03853383                ld      t2,56(a0)
     48c:       6120                    ld      s0,64(a0)
     48e:       6524                    ld      s1,72(a0)
     490:       6d2c                    ld      a1,88(a0)
     492:       7130                    ld      a2,96(a0)
[...]
     4de:       30029073                csrw    mstatus,t0 ; Austin: 되돌아갈 privilege 모드를 mstatus 레지스터 설정
     4e2:       10053283                ld      t0,256(a0)
     4e6:       34129073                csrw    mepc,t0  ; Austin: 되돌아갈 리눅스 커널의 주소 - ecall을 실행한 다음 주소
     4ea:       02853283                ld      t0,40(a0)
     4ea:       02853283                ld      t0,40(a0)
     4ee:       6928                    ld      a0,80(a0)
     4f0:       30200073                mret     ; Austin: 리눅스 커널로 리턴(슈퍼바이저 모드) 


      SP:FFFFFFFF8000B3BA|sbi_get_marchid:   c.addi     sp,-0x10      ; sp,-16
      SP:FFFFFFFF8000B3BC|                   c.sdsp     s0,0x8(sp)    ; s0,8(sp)
      SP:FFFFFFFF8000B3BE|                   c.addi4spn s0,0x10       ; s0,16
      SP:FFFFFFFF8000B3C0|                   c.li       a0,0x0
      SP:FFFFFFFF8000B3C2|                   c.li       a1,0x0
      SP:FFFFFFFF8000B3C4|                   c.li       a2,0x0
      SP:FFFFFFFF8000B3C6|                   c.li       a3,0x0
      SP:FFFFFFFF8000B3C8|                   c.li       a4,0x0
      SP:FFFFFFFF8000B3CA|                   c.li       a5,0x0
      SP:FFFFFFFF8000B3CC|                   c.li       a6,0x5
      SP:FFFFFFFF8000B3CE|                   c.li       a7,0x10       ; a7,16
      SP:FFFFFFFF8000B3D0|                   ecall
      SP:FFFFFFFF8000B3D4|                   c.bnez     a0,0xFFFFFFFF8000B3DE
      SP:FFFFFFFF8000B3D6|                   c.mv       a0,a1 ; Austin: 리턴값을 a0 레지스터에 복사한다. 그 이유는: RISC-V에서 리턴값은
                                                                               ; a0에 저장하기 때문.
      SP:FFFFFFFF8000B3D8|                   c.ldsp     s0,0x8(sp)    ; s0,8(sp)
      SP:FFFFFFFF8000B3DA|                   c.addi     sp,0x10       ; sp,16
      SP:FFFFFFFF8000B3DC|                   c.jr       ra ; Austin: caller 함수로 리턴한다.


아래는 위 어셈블리 명령어에 대응되는 커널 코드이다. 

arch/riscv/kernel/sbi_ecall.c
long __sbi_base_ecall(int fid)
{
struct sbiret ret;

ret = sbi_ecall(SBI_EXT_BASE, fid, 0, 0, 0, 0, 0, 0);
if (!ret.error)
return ret.value;
else
return sbi_err_map_linux_errno(ret.error);
}
EXPORT_SYMBOL(__sbi_base_ecall);

정리

콜 스택을 정리하자:

[리눅스 커널] - supervisor mode
sbi_get_marchid
 - __sbi_base_ecall
  -  ecall 
--------------------------
[opensbi]
  - _trap_handler
    -  sbi_trap_handler 
      - sbi_ecall_handler 
       - sbi_ecall_base_handler
  - a1에 리턴 값 저장    
 - mret     

PS: * 글이 도움이 됐으면 '좋아요'를 눌러주시면 좋겠습니다.

RISC-V에서 QEMU를 설정하고 빌드하는 방법은 아래 링크를 참고하자.

https://lore.kernel.org/all/20251112-v5_user_cfi_series-v23-0-b55691eacf4f@rivosinc.com/

How to test this series
=======================

Toolchain
---------
$ git clone git@github.com:sifive/riscv-gnu-toolchain.git -b cfi-dev
$ riscv-gnu-toolchain/configure --prefix=<path-to-where-to-build> --with-arch=rv64gc_zicfilp_zicfiss --enable-linux --disable-gdb  --with-extra-multilib-test="rv64gc_zicfilp_zicfiss-lp64d:-static"
$ make -j$(nproc)

Qemu
----
Get the lastest qemu
$ cd qemu
$ mkdir build
$ cd build
$ ../configure --target-list=riscv64-softmmu
$ make -j$(nproc)

Opensbi
-------
$ git clone git@github.com:deepak0414/opensbi.git -b v6_cfi_spec_split_opensbi
$ make CROSS_COMPILE=<your riscv toolchain> -j$(nproc) PLATFORM=generic

Linux
-----
Running defconfig is fine. CFI is enabled by default if the toolchain
supports it.

$ make ARCH=riscv CROSS_COMPILE=<path-to-cfi-riscv-gnu-toolchain>/build/bin/riscv64-unknown-linux-gnu- -j$(nproc) defconfig
$ make ARCH=riscv CROSS_COMPILE=<path-to-cfi-riscv-gnu-toolchain>/build/bin/riscv64-unknown-linux-gnu- -j$(nproc)

Running
-------

Modify your qemu command to have:
-bios <path-to-cfi-opensbi>/build/platform/generic/firmware/fw_dynamic.bin
-cpu rv64,zicfilp=true,zicfiss=true,zimop=true,zcmop=true

References
==========
[1] - https://github.com/riscv/riscv-cfi
[2] - https://lore.kernel.org/all/20240814081126.956287-1-samuel.holland@sifive.com/
[3] - https://lwn.net/Articles/889475/
[4] - https://developer.arm.com/documentation/109576/0100/Branch-Target-Identification
[5] - https://www.intel.com/content/dam/develop/external/us/en/documents/catc17-introduction-intel-cet-844137.pdf
[6] - https://lwn.net/Articles/940403/ 

 

Shadow stacks for 64-bit Arm systems

Return-oriented programming (ROP) has, for some years now, been a valuable tool for those who w [...]

lwn.net

 

 

RISC-V의 핵심은 opensbi이다. 어느 RISC-V 문서를 봐도 opensbi를 확인할 수 있다.
부트로더에서 opensbi가 실행되고, 리눅스 커널이 실행될 때도 opensbi에 접근한다.

이번 포스트에서는 opensbi를 빌드하는 방법에 대해 기술한다.

opensbi는 오픈 소스로 관리되며 위치는 아래와 같다.

https://github.com/riscv-software-src/opensbi

아래 명령어로 소스를 내려 받자.

$ git clone https://github.com/riscv-software-src/opensbi

빌드하기 전에 미리 아래와 같은 유틸리티를 설치할 필요가 있다. 

$ sudo apt install gcc-riscv64-linux-gnu binutils-riscv64-linux-gnu

opensbi 빌드 방법

opensbi를 빌드하는 핵심 명령어는 아래와 같다. 

make O=$OUTPUT CROSS_COMPILE=riscv64-linux-gnu- PLATFORM=generic 

계속 CROSS_COMPILE을 명령어로 하기 귀찮으니, 빌드 셸 스크립트(./build_opensbi.sh)를 생성해서 실행하자.

아래는 ./build_opensbi.sh의 구현부이다.

#!/bin/bash

echo "build opensbi"
TOP_PATH=$( cd "$(dirname "$0")" ; pwd )
OUTPUT="$TOP_PATH/out-riscv64"

BUILD_LOG="$TOP_PATH/riscv-build_log.txt"

build_start_time=`date +%s`

#echo "RPi build start" > $BUILD_LOG
#echo "Build start : $build_start_time" >> $BUILD_LOG

echo "open Build start : $build_start_time"

OUTPUT_PATH=$( cd "$(dirname "$0")" ; pwd )
OUTPUT="$OUTPUT_PATH/out-opensbi"

pushd opensbi > /dev/null

make O=$OUTPUT CROSS_COMPILE=riscv64-linux-gnu- PLATFORM=generic -j16  2>&1
popd > /dev/null

아래는 build_opensbi.sh 셸 스크립트를 실행할 때의 아웃풋이다.

$ ./build_opensbi.sh
build opensbi
open Build start : 1768870118
Loaded configuration '/home/austin/riscv_src/package_opensbi/deepack_open_sbi/opensbi/platform/generic/configs/defconfig'
Configuration saved to '/home/austin/riscv_src/package_opensbi/deepack_open_sbi/out-opensbi/platform/generic/kconfig/.config'
 CPP-DEP   platform/generic/firmware/fw_payload.elf.dep
 CPP-DEP   platform/generic/firmware/fw_jump.elf.dep
 CPP-DEP   platform/generic/firmware/fw_dynamic.elf.dep
[...]
 AR        lib/libsbi.a
 ELF       platform/generic/firmware/payloads/test.elf
 ELF       platform/generic/firmware/fw_dynamic.elf
 ELF       platform/generic/firmware/fw_jump.elf
 OBJCOPY   platform/generic/firmware/payloads/test.bin
 AS        platform/generic/firmware/fw_payload.o
 OBJCOPY   platform/generic/firmware/fw_dynamic.bin
 ELF       platform/generic/firmware/fw_payload.elf
 OBJCOPY   platform/generic/firmware/fw_jump.bin
 OBJCOPY   platform/generic/firmware/fw_payload.bin

빌드가 되면 다양한 오브젝트 파일이 생성되는데 핵심 파일은 fw_dynamic.bin이다.
fw_dynamic.bin 파일은 디버깅 정보가 없으니, fw_dynamic.elf 파일을 찾아서 어셈블리 명령어를 추출하자.

$ riscv64-linux-gnu-objdump -d fw_dynamic.elf  > assembly_opensbi.c

이제부터 어셈블리 명령어를 분석할 수 있다.

0000000000000000 <_fw_start>:
       0:       00050433                add     s0,a0,zero
       4:       000584b3                add     s1,a1,zero
       8:       00060933                add     s2,a2,zero
       c:       66c000ef                jal     ra,678 <fw_boot_hart>
[...]
00000000000003c8 <_trap_handler>:
     3c8:       34021273                csrrw   tp,mscratch,tp
     3cc:       06523023                sd      t0,96(tp) # 60 <_try_lottery+0x36>
     3d0:       300022f3                csrr    t0,mstatus
[...]
     46c:       00010533                add     a0,sp,zero
     470:       16e0c0ef                jal     ra,c5de <sbi_trap_handler>


이제부터 전처리 파일을 추출해보자.

전처리 파일(preprocessed file)은 매크로를 전부 파싱해서 보여주므로,
처음 코드를 분석할 때 유용하다.

opensbi에서 전처리 파일을 추출하려면 아래와 같은 패치를 생성하자.

diff --git a/Makefile b/Makefile
index 46541063..85e0f685 100644
--- a/Makefile
+++ b/Makefile
@@ -381,6 +381,7 @@ CFLAGS              =       -g -Wall -Werror -ffreestanding -nostdlib -fno-stack-protector -fno-st
 CFLAGS         +=      -fno-omit-frame-pointer -fno-optimize-sibling-calls
 CFLAGS         +=      -fno-asynchronous-unwind-tables -fno-unwind-tables
 CFLAGS         +=      -std=gnu11
+CFLAGS          +=      -save-temps=obj
 CFLAGS         +=      $(REPRODUCIBLE_FLAGS)
 # Optionally supported flags
 ifeq ($(CC_SUPPORT_VECTOR),y)

CFLAGS에 -save-temps=obj 옵션을 추가하는 코드이다.

전처리 파일과 C 코드에서 확인한 루틴을 비교하자.
sbi_list_del() 함수 in C 소스 파일:

include/sbi/sbi_list.h
static inline void sbi_list_del(struct sbi_dlist *entry)
{
        __sbi_list_del(entry->prev, entry->next);
        entry->next = (void *)SBI_LIST_POISON_NEXT;
        entry->prev = (void *)SBI_LIST_POISON_PREV;
}

sbi_list_del() 함수 in 전처리 소스 파일: SBI_LIST_POISON_NEXT 매크로가
0xFADEBABE으로 파싱되어서 출력된다.

static inline void sbi_list_del(struct sbi_dlist *entry)
{
 __sbi_list_del(entry->prev, entry->next);
 entry->next = (void *)0xFADEBABE;
 entry->prev = (void *)0xDEADBEEF;
}

RISC-V에서 QEMU를 설정하고 빌드하는 방법은 아래 링크를 참고하자.

https://lore.kernel.org/all/20251112-v5_user_cfi_series-v23-0-b55691eacf4f@rivosinc.com/

How to test this series
=======================

Toolchain
---------
$ git clone git@github.com:sifive/riscv-gnu-toolchain.git -b cfi-dev
$ riscv-gnu-toolchain/configure --prefix=<path-to-where-to-build> --with-arch=rv64gc_zicfilp_zicfiss --enable-linux --disable-gdb  --with-extra-multilib-test="rv64gc_zicfilp_zicfiss-lp64d:-static"
$ make -j$(nproc)

Qemu
----
Get the lastest qemu
$ cd qemu
$ mkdir build
$ cd build
$ ../configure --target-list=riscv64-softmmu
$ make -j$(nproc)

Opensbi
-------
$ git clone git@github.com:deepak0414/opensbi.git -b v6_cfi_spec_split_opensbi
$ make CROSS_COMPILE=<your riscv toolchain> -j$(nproc) PLATFORM=generic

Linux
-----
Running defconfig is fine. CFI is enabled by default if the toolchain
supports it.

$ make ARCH=riscv CROSS_COMPILE=<path-to-cfi-riscv-gnu-toolchain>/build/bin/riscv64-unknown-linux-gnu- -j$(nproc) defconfig
$ make ARCH=riscv CROSS_COMPILE=<path-to-cfi-riscv-gnu-toolchain>/build/bin/riscv64-unknown-linux-gnu- -j$(nproc)

Running
-------

Modify your qemu command to have:
-bios <path-to-cfi-opensbi>/build/platform/generic/firmware/fw_dynamic.bin
-cpu rv64,zicfilp=true,zicfiss=true,zimop=true,zcmop=true

References
==========
[1] - https://github.com/riscv/riscv-cfi
[2] - https://lore.kernel.org/all/20240814081126.956287-1-samuel.holland@sifive.com/
[3] - https://lwn.net/Articles/889475/
[4] - https://developer.arm.com/documentation/109576/0100/Branch-Target-Identification
[5] - https://www.intel.com/content/dam/develop/external/us/en/documents/catc17-introduction-intel-cet-844137.pdf
[6] - https://lwn.net/Articles/940403/

리눅스 커널에서 preemptive scheduling이 일어나는 지점은 다음과 같다:

 - 인터럽트를 처리하고 난 시점
 - 시스템 콜을 처리하고 난 시점 [***] 
 - 시그널을 처리하고 난 시점

이 동작은 아키텍처에 의존적인 동작이었다. 즉, Arm64나 RISCV 별로 루틴이 달랐다.
최근에 common한 path로 처리가 됐다.

관련 커밋 메시지와 커밋 아이디는 아래와 같다: 

f0bddf50586da81360627a772be0e3
riscv: entry: Convert to generic entry

'21 Feb 2023'에 머지됐으니, v6.3 버전부터 이와 같은 path로 처리된다.

RISCV 관점 분석

RISCV에서 시스템 콜은 do_trap_ecall_u() 함수 (위치: arch/riscv/kernel/traps.c)에서 핸들링한다.

arch/riscv/kernel/traps.c
void do_trap_ecall_u(struct pt_regs *regs)
{
if (user_mode(regs)) {
long syscall = regs->a7;

regs->epc += 4;
regs->orig_a0 = regs->a0;
regs->a0 = -ENOSYS;

riscv_v_vstate_discard(regs);

syscall = syscall_enter_from_user_mode(regs, syscall);

add_random_kstack_offset();

if (syscall >= 0 && syscall < NR_syscalls) {
syscall = array_index_nospec(syscall, NR_syscalls);
syscall_handler(regs, syscall);
}
[...]
syscall_exit_to_user_mode(regs);

syscall_exit_to_user_mode() 함수가 시스템 콜 핸들러 함수가 처리된 다음에 호출된다.

include/linux/entry-common.h 
static __always_inline void syscall_exit_to_user_mode(struct pt_regs *regs)
{
instrumentation_begin();
syscall_exit_to_user_mode_work(regs);
instrumentation_end();
exit_to_user_mode();
}

arm64_exit_to_user_mode()

arch/arm64/kernel/entry-common.c 
static __always_inline void arm64_exit_to_user_mode(struct pt_regs *regs)
{
local_irq_disable();
exit_to_user_mode_prepare_legacy(regs);
local_daif_mask();
mte_check_tfsr_exit();
exit_to_user_mode();
}

exit_to_user_mode()

include/linux/irq-entry-common.h
static __always_inline void exit_to_user_mode(void)
{
instrumentation_begin();
unwind_reset_info();
trace_hardirqs_on_prepare();
lockdep_hardirqs_on_prepare();
instrumentation_end();

user_enter_irqoff();
arch_exit_to_user_mode();
lockdep_hardirqs_on(CALLER_ADDR0);
}

정리하면 다음과 같다: 

RISCV: do_trap_ecall_u() -> syscall_exit_to_user_mode()  
Arm64:  arm64_exit_to_user_mode() -> exit_to_user_mode() 

RISCV

syscall_exit_to_user_mode()

__visible noinstr void syscall_exit_to_user_mode(struct pt_regs *regs)
{
instrumentation_begin();
__syscall_exit_to_user_mode_work(regs); >>>
instrumentation_end();
__exit_to_user_mode();
}

syscall_exit_to_user_mode_work()

void syscall_exit_to_user_mode_work(struct pt_regs *regs)
{
__syscall_exit_to_user_mode_work(regs);  >>>
}

__syscall_exit_to_user_mode_work()

static __always_inline void __syscall_exit_to_user_mode_work(struct pt_regs *regs)
{
syscall_exit_to_user_mode_prepare(regs);
local_irq_disable_exit_to_user();
exit_to_user_mode_prepare(regs);  >>>
}

exit_to_user_mode_prepare()

static void exit_to_user_mode_prepare(struct pt_regs *regs)
{
unsigned long ti_work;

lockdep_assert_irqs_disabled();

/* Flush pending rcuog wakeup before the last need_resched() check */
tick_nohz_user_enter_prepare();

ti_work = read_thread_flags();
if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK))
ti_work = exit_to_user_mode_loop(regs, ti_work);  >>>

arch_exit_to_user_mode_prepare(regs, ti_work);

/* Ensure that kernel state is sane for a return to userspace */
kmap_assert_nomap();
lockdep_assert_irqs_disabled();
lockdep_sys_exit();
}

Arm64

arm64_exit_to_user_mode()

static __always_inline void arm64_exit_to_user_mode(struct pt_regs *regs)
{
local_irq_disable();
exit_to_user_mode_prepare_legacy(regs); >>
local_daif_mask();
mte_check_tfsr_exit();
exit_to_user_mode();
}

exit_to_user_mode_prepare_legacy()

/* Temporary workaround to keep ARM64 alive */
static __always_inline void exit_to_user_mode_prepare_legacy(struct pt_regs *regs)
{
__exit_to_user_mode_prepare(regs); >>
rseq_exit_to_user_mode_legacy();
__exit_to_user_mode_validate();
}

static __always_inline void __exit_to_user_mode_prepare(struct pt_regs *regs)
{
unsigned long ti_work;

lockdep_assert_irqs_disabled();

/* Flush pending rcuog wakeup before the last need_resched() check */
tick_nohz_user_enter_prepare();

ti_work = read_thread_flags();
if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK))
ti_work = exit_to_user_mode_loop(regs, ti_work); >>

arch_exit_to_user_mode_prepare(regs, ti_work);
}


exit_to_user_mode_loop

static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
    unsigned long ti_work)
{
/*
 * Before returning to user space ensure that all pending work
 * items have been completed.
 */
while (ti_work & EXIT_TO_USER_MODE_WORK) {

local_irq_enable_exit_to_user(ti_work);

if (ti_work & _TIF_NEED_RESCHED)
schedule();

if (ti_work & _TIF_UPROBE)
uprobe_notify_resume(regs);

if (ti_work & _TIF_PATCH_PENDING)
klp_update_patch_state(current);

if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
arch_do_signal_or_restart(regs);

if (ti_work & _TIF_NOTIFY_RESUME)
resume_user_mode_work(regs);

리눅스 커널에서 preemptive scheduling이 일어나는 지점은 다음과 같다:

 - 인터럽트를 처리하고 난 시점
 - 시스템 콜을 처리하고 난 시점 
 - 시그널을 처리하고 난 시점

이 동작은 아키텍처에 의존적인 동작이었다. 즉, Arm64나 RISCV 별로 루틴이 달랐다.
최근에 common한 path로 처리가 됐다.

관련 커밋 메시지와 커밋 아이디는 아래와 같다: 

f0bddf50586da81360627a772be0e3
riscv: entry: Convert to generic entry

'21 Feb 2023'에 머지됐으니, v6.3 버전부터 이와 같은 path로 처리된다.

RISCV 관점 분석

RISCV에서 인터럽트는 do_irq() 함수 (위치: arch/riscv/kernel/traps.c)에서 핸들링한다.

arch/riscv/kernel/traps.c
asmlinkage void noinstr do_irq(struct pt_regs *regs)
{
        irqentry_state_t state = irqentry_enter(regs);

        if (IS_ENABLED(CONFIG_IRQ_STACKS) && on_thread_stack())
                call_on_irq_stack(regs, handle_riscv_irq);
        else
                handle_riscv_irq(regs);

        irqentry_exit(regs, state);
}

handle_riscv_irq() 함수에서 인터럽트 디스크립터를 읽어서 인터럽트 핸들러를 호출한다. 이 과정을
마무리하면 결국 irqentry_exit() 함수가 호출된다. 

Arm64 관점 분석

이번에는 Arm64 아키텍처 코드이다.

exit_to_kernel_mode() 함수에서 irqentry_exit() 함수를 호출한다.

arch/arm64/kernel/entry-common.c 
static void noinstr exit_to_kernel_mode(struct pt_regs *regs,
                                        irqentry_state_t state)
{
        mte_check_tfsr_exit();
        irqentry_exit(regs, state);
}

참고로, exit_to_kernel_mode() 함수는 __el1_irq() 함수에서 호출된다.

arch/arm64/kernel/entry-common.c 
static __always_inline void __el1_irq(struct pt_regs *regs,
                                      void (*handler)(struct pt_regs *))
{
        irqentry_state_t state;

        state = enter_from_kernel_mode(regs);

        irq_enter_rcu();
        do_interrupt_handler(regs, handler);
        irq_exit_rcu();

        exit_to_kernel_mode(regs, state);
}

코드 리뷰: 

- do_interrupt_handler() 함수: 인터럽트 핸들러 호출 
- exit_to_kernel_mode() 함수 호출

정리하면 다음과 같다: 

RISCV: do_irq -> exit_to_kernel_mode
Arm64:  __el1_irq -> exit_to_kernel_mode

irqentry_exit() 함수의 구현부다.

kernel/entry/common.c 
noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state)
{
        lockdep_assert_irqs_disabled();

[...]
                instrumentation_begin();
                if (IS_ENABLED(CONFIG_PREEMPTION))
                        irqentry_exit_cond_resched();

                /* Covers both tracing and lockdep */
                trace_hardirqs_on();
                instrumentation_end();

irqentry_exit_cond_resched() 함수를 호출한다. 

irqentry_exit_cond_resched() 함수는 매크로 타입으로 dynamic_irqentry_exit_cond_resched() 함수로
치환된다. 따라서 dynamic_irqentry_exit_cond_resched() 함수를 분석하자.

dynamic_irqentry_exit_cond_resched -> raw_irqentry_exit_cond_resched

(where) #define irqentry_exit_cond_resched()       dynamic_irqentry_exit_cond_resched()
void dynamic_irqentry_exit_cond_resched(void)
{
        if (!static_branch_unlikely(&sk_dynamic_irqentry_exit_cond_resched))
                return;
        raw_irqentry_exit_cond_resched();
}

raw_irqentry_exit_cond_resched() 함수를 호출한다.

raw_irqentry_exit_cond_resched() 함수의 구현부이다.

kernel/entry/common.c
void raw_irqentry_exit_cond_resched(void)
{
        if (!preempt_count()) {
                /* Sanity check RCU and thread stack */
                rcu_irq_exit_check_preempt();
                if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
                        WARN_ON_ONCE(!on_thread_stack());
                if (need_resched() && arch_irqentry_exit_need_resched())
                        preempt_schedule_irq();
        }
}

need_resched() 함수와 arch_irqentry_exit_need_resched() 함수가 true이면 preempt_schedule_irq() 함수를
호출한다. 

 - TIF_NEED_RESCHED가 task_struct.thread_info.flags에 포함된 경우
 - 아키텍처마다 선언된 arch_irqentry_exit_need_resched() 매크로가 true를 리턴하는 경우: 대부분 true로 선언됨

새해를 맞이해서 할인 쿠폰을 요청하시는 분이 계셔서, 
인프런에 오픈한 강의에 대한 특별 할인 쿠폰(40%)을 공유드립니다..

※ 인프런은 '꼭' 과정별 링크를 이용해 접속해 주세요!!!!!!
쿠폰 유효기간: 01월 31일 (2026년) 

[TRACE32 디버깅]

- TRACE32 입문 - MCU & RTOS 개발자를 위한 실전 디버깅 과정: 40% 할인쿠폰 50매
(할인 쿠폰 링크)
https://inf.run/bBNm5

[Arm, RISC-V 아키텍처 시리즈]   

 - 시스템 소프트웨어 개발을 위한 Arm 아키텍처의 구조와 원리 - 1부 저자 직강: 40% 할인쿠폰 50매 
(할인 쿠폰 링크)
https://inf.run/WhKRD
 
 - 시스템 소프트웨어 개발을 위한 Arm 아키텍처의 구조와 원리 - 2부 저자 직강: 40% 할인쿠폰 50매 
(할인 쿠폰 링크)
https://inf.run/V5H3x

- 디버깅으로 배우는 RISC-V 아키텍처 -1부: 40% 할인쿠폰 50매 
(할인 쿠폰 링크)
https://inf.run/V2ThR

- 디버깅으로 배우는 RISC-V 아키텍처 -2부: 40% 할인쿠폰 50매 
(할인 쿠폰 링크)
https://inf.run/3AZN7

- 디버깅으로 배우는 RISC-V 아키텍처 -3부: 40% 할인쿠폰 50매 
(할인 쿠폰 링크)
https://inf.run/n7XFy

[리눅스 커널 시리즈]  

리눅스 소개와 리눅스 커널 Overview [저자직강 1부-1]: 40% 할인쿠폰 50매 
(할인 쿠폰 링크)
https://inf.run/NGbVU

리눅스 커널의 구조와 원리: 디버깅 - Basic [저자 직강 1부-2]: 40% 할인쿠폰 50매 
(할인 쿠폰 링크)
https://inf.run/d41vV

리눅스 커널의 구조와 원리: 디버깅 - Advanced 실전 [저자 직강 1부-3]: 40% 할인쿠폰 50매
(할인 쿠폰 링크)
https://inf.run/6qDwp
 
리눅스 커널의 구조와 원리: 프로세스 [저자 직강 1부-4]: 40% 할인쿠폰 50매
(할인 쿠폰 링크)
https://inf.run/EtLT2
 
리눅스 커널의 구조와 원리: 인터럽트 [저자 직강 1부-5]: 40% 할인쿠폰 50매
(할인 쿠폰 링크)
https://inf.run/96Avs

리눅스 커널의 구조와 원리: 인터럽트 후반부 [저자 직강 1부-6]: 40% 할인쿠폰 50매
(할인 쿠폰 링크)
https://inf.run/gnmYW

리눅스 커널의 구조와 원리: 워크큐 [저자 직강 1부-7]: 50% 할인쿠폰 50매
(할인 쿠폰 링크)
https://inf.run/s7QNs

늘 고맙습니다.
감사합니다... :) 😀

+ Recent posts