본문 바로가기

Core BSP 분석/리눅스 커널 핵심 분석

[Kernel]panic @__wake_up

Debugging

"ndroid.systemui" 프로세스가 raw_spin_lock_irqsave() API 동작 중에 aee_wdt_atf_entry() Trigger

-000|aee_wdt_atf_entry()
-001|prefetchw(inline)
-001|arch_spin_lock(inline)
-001|do_raw_spin_lock_flags(inline)
-001|__raw_spin_lock_irqsave(inline)
-001|raw_spin_lock_irqsave(lock = 0xDD82725C)
-002|__wake_up(q = 0xDD82725C, mode = 1, nr_exclusive = 1, key = 0x0)
-003|mmc_wait_data_done
-004|msdc_irq_data_complete(host = 0xDD827280, ?, error = 0)
-005|msdc_irq(?, dev_id = 0xDD13B9C8)
-006|static_key_count(inline)
-006|static_key_false(inline)
-006|trace_irq_handler_exit(inline)
-006|handle_irq_event_percpu(?, action = 0xDD13B9C8)
-007|handle_irq_event(desc = 0xC0F38440)
-008|cond_unmask_eoi_irq(inline)
-008|handle_fasteoi_irq(?, desc = 0xC0F38440)
-009|static_key_count(inline)
-009|static_key_false(inline)
-009|trace_irq_exit(inline)
-009|__handle_domain_irq(domain = 0xDE806000, hwirq = 111, lookup = TRUE, ?)
-010|__raw_readl(inline)
-010|gic_handle_irq(regs = 0xDD13BFB0)
-011|__irq_usr(asm)


@ spinlock이 release되어 spinlock 변수가 [owner: 0x56AC, next: 0x56AD]로 업데이트되기까지 기다림.

(raw_spinlock_t *)0xDD82725C = 0xDD82725C -> (
raw_lock = (
slock = 0x56AD56AB,
tickets = (
owner = 0x56AB, // <<--
next = 0x56AD))) // <<--

@ 0xDD82725C spinlock 변수 주소를 누가 접근하는 지 검색해보면.

(where)
(raw_spinlock_t *)0xDD82725C
c2d01b78: dd82725c
c2d01b7c: dd82725c
c44f0994: dd82725c
dd13bd60: dd82725c // "ndroid.systemui"
dd13bd9c: dd82725c // "ndroid.systemui"
dd13bdc8: dd82725c // "ndroid.systemui"
ddbefdb8: dd82725c // <<--"mmcqd/0"
ddbefdd0: dd82725c
e0abd994: dd82725c


"mmcqd/0" 프로세스가 접근함을 알 수 있음

v.v % (struct thread_info*)(0xDDBEFDB8 & ~0x1fff)
(long unsigned int) swap_in = 0 = 0x0 = '....',
(long unsigned int) swap_out = 0 = 0x0 = '....',
(struct task_cputime) cputime_expires = ((cputime_t) utime = 0 = 0x0 = '....', (cputime_t) stime = 0 = 0x0 = '....', (long lo
(struct list_head [3]) cpu_timers = ([0] = ((struct list_head *) next = 0xDDB49A08 = -> ((struct list_head *) next = 0xDDB49
(struct cred *) real_cred = 0xDDB71200 = -> ((atomic_t) usage = ((int) counter = 2 = 0x2 = '....'), (kuid_t) uid = ((uid_t)
(struct cred *) cred = 0xDDB71200 = -> ((atomic_t) usage = ((int) counter = 2 = 0x2 = '....'), (kuid_t) uid = ((uid_t) val =
(char [16]) comm = "mmcqd/0",


@ "mmcqd/0" 프로세스 콜스택

-000|schedule()
-001|mmc_wait_for_data_req_done(inline)
-001|mmc_start_req(host = 0xC0F75D0C, areq = 0xC0F644E0, error = 0xC102CB08)
-002|mmc_blk_issue_rw_rq(mq = 0xDDB64408, rqc = 0x0)
-003|mmc_blk_issue_rq(mq = 0xDDB64408, req = 0x0)
-004|mmc_queue_thread(d = 0xDDB64408)
-005|kthread(_create = 0xDDB8BDC0)
-006|ret_from_fork(asm)
-007|ret_fast_syscall(asm)


@ 코드를 가보면, spin_lock_irqsave(&context_info->lock, flags); API를 통해 lock을 걸고 있음

static int mmc_wait_for_data_req_done(struct mmc_host *host,
struct mmc_request *mrq,
struct mmc_async_req *next_req)
{
    struct mmc_command *cmd;
    struct mmc_context_info *context_info = &host->context_info;
    int err;
    unsigned long flags;
    while (1) {
        wait_event_interruptible(context_info->wait,
                                               (context_info->is_done_rcv ||
                                               context_info->is_new_req));
        spin_lock_irqsave(&context_info->lock, flags);
        context_info->is_waiting_last_req = false;


@ Watchdog Reset이 trigger된 "ndroid.systemui" 프로세스 콜 스택에 아래 함수들이 있었음

-003|mmc_wait_data_done
-004|msdc_irq_data_complete(host = 0xDD827280, ?, error = 0)


@ mmc_wait_data_done() 함수를 보면 mrq->host->context_info.is_done_rcv = true로 설정하고,
&mrq->host->context_info.wait wait queue을 깨우는 역할을 수행

static void mmc_wait_data_done(struct mmc_request *mrq){

    mrq->host->context_info.is_done_rcv = true;

     wake_up_interruptible(&mrq->host->context_info.wait);

}


그런데 mmc_wait_for_data_req_done() 함수의 아래 구문은 context_info->is_done_rcv, context_info->is_new_req가 true가
될 때 까지 기다리는 코드다.

wait_event_interruptible(context_info->wait,
(context_info->is_done_rcv ||
context_info->is_new_req));


정리해보면, race condition으로 wake_up_interruptible(&mrq->host->context_info.wait); 함수를 통해 wait_queue에 queue된 
&mrq->host->context_info.wait으로 등록된 프로세스를 깨우기 전에 아래 구문이 풀려버려 이미 깨어버린 경우임을 짐작.


잠을 자고 있는 누군가를 깨우려고 손을 대려고 하는데 이미 깨버린 경우와 비슷하네.

In-depth Analysis

내 추정이 맞다면 아래 코드를 삽입하면 바로 커널 패닉이 나야 한다.

(램덤프 force_mmc_wait_data_done_crash)

static void mmc_wait_data_done(struct mmc_request mrq){*
mrq->host->context_info.is_done_rcv = true;
+ mdelay(1);
wake_up_interruptible(&mrq->host->context_info.wait);
}


그런데 바로 커널 패닉이 발생했다.

-000|do_DataAbort(addr = 0, fsr = 0, regs = 0x0)
-001|__dabt_svc(asm)
-->|exception
-002|debug_spin_lock_before(inline)
-002|do_raw_spin_lock(lock = 0x02B4)
-003|raw_spin_lock_irqsave(lock = 0x02B4)
-004|__wake_up(q = 0x02B4, mode = 1, nr_exclusive = 1, key = 0x0)
-005|mmc_wait_data_done(mrq = 0xDC9725F8)
-006|msdc_irq_data_complete(host = 0xDC919300, ?, error = 0)
-007|msdc_irq(?, dev_id = 0x1)
-008|handle_irq_event_percpu(?, action = 0xDC91E0C0)
-009|handle_irq_event(desc = 0xC121A440)
-010|cond_unmask_eoi_irq(inline)
-010|handle_fasteoi_irq(?, desc = 0xC121A440)
-011|static_key_count(inline)
-011|static_key_false(inline)
-011|trace_irq_exit(inline)
-011|__handle_domain_irq(domain = 0xDE806000, hwirq = 111, lookup = TRUE, ?)
-012|__raw_readl(inline)
-012|gic_handle_irq(regs = 0xCB773E10)
-013|__irq_svc(asm)
-->|exception
-014|current_thread_info(inline)
-014|check_preemption_disabled(what1 = 0xC0F115AC, what2 = 0xC0EE0B28)
-015|debug_smp_processor_id()
-016|MT_trace_preempt_on()
-017|preempt_count_sub
-018|current_thread_info(inline)
-018|preempt_count(inline)
-018|should_resched(inline)
-018|__percpu_counter_add(fbc = 0xD8AC7178, amount = 0, batch = 32)
-019|__sb_start_write(sb = 0xC0F115AC, level = -1058141400, wait = FALSE)
-020|file_start_write(inline)
-020|vfs_write(file = 0xD366C480, buf = 0xBEA83A4C, ?, pos = 0xCB773F78)
-021|SYSC_write(inline)
-021|sys_write(?, buf = -1096271284, count = 146)
-022|ret_fast_syscall(asm)

@mrq->host->context_info.wait 변수를 확인해보면, 이미 mrq->host 변수가 0이다.

(struct mmc_request *)0xDC9725F8 = 0xDC9725F8 -> (
    sbc = 0xDC97262C,
    cmd = 0xDC972660,
    data = 0xDC9726C8,
    stop = 0xDC972694,
    completion = (done = 0x0, wait = (lock = (rlock = (raw_lock = (slock = 0x0,
      done = 0x0,
     host = 0x0)

@어머 그런데, mmcqd/0 프로세스가 mmc_wait_for_data_req_done() 코드를 이미 수행했네?

-000|context_switch(inline)
-000|__schedule()
-001|schedule()
-002|mmc_wait_for_data_req_done(inline)
-002|mmc_start_req(host = 0xDC919000, areq = 0xDC972710, ?)
-003|mmc_blk_issue_rw_rq(mq = 0x0, rqc = 0xC64B18C0)
-004|mmc_blk_issue_rq(mq = 0xDC972494, req = 0xC64B18C0)
-005|mmc_queue_thread(d = 0xDC972494)
-006|kthread(_create = 0xDCA24E40)
-007|ret_from_fork(asm)
-008|ret_fast_syscall(asm)


@ftrace log를 좀 더 추가해보자.

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index db74413..fcf6692 100644
? a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1034,6 +1034,9 @@ out:
}
EXPORT_SYMBOL(mmc_start_bkops);
#endif
/*
mmc_wait_data_done() - done callback for data request
@mrq: done data request
@@ -1042,10 +1045,25 @@ EXPORT_SYMBOL(mmc_start_bkops);
*/
static void mmc_wait_data_done(struct mmc_request *mrq)
static void mmc_wait_data_done(struct mmc_request mrq) *{
        mrq->host->context_info.is_done_rcv = true;


+      trace_printk("+msdc [F: %s, L:%d]: ? ? from(%pS)\n", _func,LINE, (void *)_builtin_return_address(0));
        mdelay(1);
+      trace_printk("++msdc [F: %s, L:%d]: ? ? from(%pS)\n", _func,LINE, (void *)_builtin_return_address(0));
        wake_up_interruptible(&mrq->host->context_info.wait);
+      trace_printk("--msdc [F: %s, L:%d]: ? ? from(%pS)\n", _func,LINE, (void *)_builtin_return_address(0));

 


@ 역시나 GIC mtk-msdc(111) IRQ가 Trigger된 다음에 아래와 같이 커널 패닉이 발생함

(램덤프: neo_more_crash_ftrace)

-000|do_DataAbort(addr = 0, fsr = 0, regs = 0x0)
-001|__dabt_svc(asm)
-->|exception
-002|debug_spin_lock_before(inline)
-002|do_raw_spin_lock(lock = 0x02B4)
-003|raw_spin_lock_irqsave(lock = 0x02B4)
-004|__wake_up(q = 0x02B4, mode = 1, nr_exclusive = 1, key = 0x0)
-005|mmc_wait_data_done(mrq = 0xDC9235F8)
-006|msdc_irq_data_complete(host = 0xDC927300, ?, error = 0)
-007|msdc_irq(?, dev_id = 0x1)
-008|handle_irq_event_percpu(?, action = 0xDC92B180)
-009|handle_irq_event(desc = 0xC121A440)
-010|cond_unmask_eoi_irq(inline)
-010|handle_fasteoi_irq(?, desc = 0xC121A440)
-011|static_key_count(inline)
-011|static_key_false(inline)
-011|trace_irq_exit(inline)
-011|__handle_domain_irq(domain = 0xDE806000, hwirq = 111, lookup = TRUE, ?)
-012|__raw_readl(inline)
-012|gic_handle_irq(regs = 0xC2631E20)
-013|__irq_svc(asm)
-->|exception
-014|__fget(?, mask = 16384)
-015|__fget_light(?, mask = 16384)
-016|__fdget
-017|sockfd_lookup_light(?, err = 0xC2631EC8, fput_needed = 0xC2631ECC)
-018|SYSC_recvfrom(inline)
-018|sys_recvfrom(?, ubuf = -1212542344, ?, flags = 64, addr = 0, addr_len = 0)
-019|ret_fast_syscall(asm)

 

@ Ftrace log 분석
커널 패닉이 일어나기 직전 상황

[1]: irq=111 name=mtk-msdc 가 Trigger됨
[2]: IRQ context에서 mmc_wait_data_done() 함수가 호출 from(msdc_irq_data_complete)
[2]~[3] 사이 mmcq0 process가 context switching 되지 않고 수행 ( InputDispatcher-1538)
[3]: mdelay(1); 이후 찍히는 ftrace log
[4]: wake_up_interruptible(&mrq->host->context_info.wait); API 호출로 mmcq0 process을 wakeup 시킴
CPU5: mmcqd/0, pid=244
sched_wakeup: comm=mmcqd/0 pid=244 prio=120 success=1 target_cpu=005 state=W
[5]: wake_up_interruptible(&mrq->host->context_info.wait); API 호출이 끝난 후 찍히는 로그(별 의미 없음)


kworker/4:0-25 [004] 149.827488: irq_handler_entry: irq=111 name=mtk-msdc // <<--[1]
kworker/4:0-25 [004] 149.827495: bprint: mmc_wait_data_done: +msdc [F: mmc_wait_data_done, L:1049]: ? ? from(msdc_irq_data_complete) // <<--[2]
InputDispatcher-1538 [005] 149.827496: sched_migrate_task: comm=m.rpi.launcher3 pid=2135 prio=120 orig_cpu=6 dest_cpu=4 state=W
InputDispatcher-1538 [005] 149.827519: sched_wakeup: comm=m.rpi.launcher3 pid=2135 prio=120 success=1 target_cpu=004 state=W
InputDispatcher-1538 [005] 149.827591: sched_migrate_task: comm=ndroid.systemui pid=1622 prio=120 orig_cpu=5 dest_cpu=6 state=W
InputDispatcher-1538 [005] 149.827608: sched_wakeup: comm=ndroid.systemui pid=1622 prio=120 success=1 target_cpu=006 state=W
InputDispatcher-1538 [005] 149.827675: sched_migrate_task: comm=android.ui pid=1287 prio=118 orig_cpu=4 dest_cpu=6 state=W
InputDispatcher-1538 [005] 149.827694: sched_wakeup: comm=android.ui pid=1287 prio=118 success=1 target_cpu=006 state=W
InputDispatcher-1538 [005] 149.827855: sched_switch: prev_comm=InputDispatcher prev_pid=1538 prev_prio=112 prev_state=S ==> next_comm=swapper/5 next_pid=0 next_prio=120 extra_prev_state=K|m|d 
<idle>-0 [005] 149.827912: sched_switch: prev_comm=swapper/5 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=InputDispatcher next_pid=1538 next_prio=112 extra_prev_state=K|m|d 
InputDispatcher-1538 [005] 149.828064: sched_switch: prev_comm=InputDispatcher prev_pid=1538 prev_prio=112 prev_state=S ==> next_comm=swapper/5 next_pid=0 next_prio=120 extra_prev_state=K|m|d 
<idle>-0 [005] 149.828209: sched_switch: prev_comm=swapper/5 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=InputDispatcher next_pid=1538 next_prio=112 extra_prev_state=K|m|d 
InputDispatcher-1538 [005] 149.828352: sched_switch: prev_comm=InputDispatcher prev_pid=1538 prev_prio=112 prev_state=S ==> next_comm=swapper/5 next_pid=0 next_prio=120 extra_prev_state=K|m|d 
kworker/4:0-25 [004] 149.828497: bprint: mmc_wait_data_done: ++msdc [F: mmc_wait_data_done, L:1058]: ? ? from(msdc_irq_data_complete) // <<--[3]
kworker/4:0-25 [004] 149.828521: sched_wakeup: comm=mmcqd/0 pid=244 prio=120 success=1 target_cpu=005 state=W // <<--[4]
kworker/4:0-25 [004] 149.828525: bprint: mmc_wait_data_done: -msdc [F: mmc_wait_data_done, L:1060]: ? ? from(msdc_irq_data_complete) <<-[5]
<idle>0 [005] 149.828528: sched_switch: prev_comm=swapper/5 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=mmcqd/0 next_pid=244 next_prio=120 extra_prev_state=K|m|d <<-[6]
kworker/4:0-25 [004] 149.828530: irq_handler_exit: irq=111 ret=handled

 

1046 static void mmc_wait_data_done(struct mmc_request *mrq)
1047*{*
1048 mrq->host->context_info.is_done_rcv = true;
1049 trace_printk("+msdc [F: %s, L:%d]: ? ? from(%pS)\n", _func,LINE, (void *)_builtin_return_address(0)); // <<--[2]
//생략
1051 mdelay(1);
//생략
1057
1058 trace_printk("++msdc [F: %s, L:%d]: ? ? from(%pS)\n", _func,LINE, (void *)_builtin_return_address(0)); // <<--[3]
1059 wake_up_interruptible(&mrq->host->context_info.wait);
1060 trace_printk("-msdc [F: %s, L:%d]: ? ? from(%pS)\n", _func,LINE, (void *)_builtin_return_address(0)); // <<-[5]


커널 패닉 상황
[7]: irq=111 name=mtk-msdc 가 handle 되었음
[8]: [mmcqd/0-244, CPU5]mmc_wait_for_data_req_done() 함수 내[from(mmc_blk_issue_rw_rq)] wait_event_interruptible()에서 빠져나옴.
[9][10]: [mmcqd/0-244, CPU5]mmc_wait_for_data_req_done() 함수를 빠져나옴
[11]: irq=111 name=mtk-msdc 가 Trigger됨 --> CPU4의 m.lge.launcher3-2135
[12]: [mmcqd/0-244, CPU5] 깨어난 mmcqd/0-244 프로세스 활동 시작
&mqrq->brq 변수를 0으로 밀어버림 (where: memset(brq, 0, sizeof(struct mmc_blk_request));
[13]: [m.lge.launcher3-2135 CPU4] irq=111 name=mtk-msdc IRQ로 Trigger되어 실행되어
mmc_wait_data_done() 함수 진입 
[14][15]: [mmcqd/0-244, CPU5] mmc_blk_rw_rq_prep() 함수 퇴장하려함
[16][17][18] [mmcqd/0-244, CPU5] mmc_wait_for_data_req_done() 함수 처리 후 퇴장하려함
[19]: [m.lge.launcher3-2135 CPU4] wake_up_interruptible(&mrq->host->context_info.wait);
[20] 커널 패닉
[12]번 동작에서 0으로 초기화된 &mrq->host 변수를 통해 밀린((where: memset(brq, 0, sizeof(struct mmc_blk_request)) wake_up_interruptible() API 호출

kworker/4:0-25 [004] 149.828530: irq_handler_exit: irq=111 ret=handled // <<--[7]
mmcqd/0-244 [005] 149.828536: bprint: mmc_start_req: ++mmcq [F: mmc_wait_for_data_req_done, L:1236]: ? ? from(mmc_blk_issue_rw_rq) // <<--[8]
kworker/4:0-25 [004] 149.828537: irq_handler_entry: irq=30 name=arch_timer
kworker/4:0-25 [004] 149.828546: irq_handler_exit: irq=30 ret=handled
mmcqd/0-244 [005] 149.828559: bprint: mmc_start_req: mmcq [F: mmc_wait_for_data_req_done, L:1268]: ? ? from(mmc_blk_issue_rw_rq) // <<-[9]
mmcqd/0-244 [005] 149.828561: bprint: mmc_start_req: mmcq release [F: mmc_wait_for_data_req_done, L:1270]: ? ? from(mmc_blk_issue_rw_rq) // <<--[10]
kworker/4:0-25 [004] 149.828584: workqueue_execute_end: work struct 0xcb40cae4
kworker/4:0-25 [004] 149.828630: sched_switch: prev_comm=kworker/4:0 prev_pid=25 prev_prio=120 prev_state=S ==> next_comm=m.lge.launcher3 next_pid=2135 next_prio=120 extra_prev_state=K|m|d 
m.lge.launcher3-2135 [004] 149.828660: irq_handler_entry: irq=111 name=mtk-msdc // <<--[11]
mmcqd/0-244 [005] 149.828661: bprint: mmc_blk_rw_rq_prep: ++mmc_blk_rw_rq_prep [F: mmc_blk_rw_rq_prep, L:1725]: ? ? from(mmc_blk_issue_rw_rq) // <<--[12]
m.lge.launcher3-2135 [004] 149.828666: bprint: mmc_wait_data_done: +msdc [F: mmc_wait_data_done, L:1049]: ? ? from(msdc_irq_data_complete) // <<--[13]
mmcqd/0-244 [005] 149.828670: bprint: mmc_blk_rw_rq_prep: -mmc_blk_rw_rq_prep [F: mmc_blk_rw_rq_prep, L:1905]: ? ? from(mmc_blk_issue_rw_rq) // <<-[14]
mmcqd/0-244 [005] 149.828672: bprint: mmc_blk_rw_rq_prep: mmc_blk_rw_rq_prep [F: mmc_blk_rw_rq_prep, L:1907]: ? ? from(mmc_blk_issue_rw_rq) // <<-[15]
mmcqd/0-244 [005] 149.828681: bprint: mmc_start_req: +mmcq [F: mmc_wait_for_data_req_done, L:1231]: ? ? from(mmc_blk_issue_rw_rq) // <<--[16]
mmcqd/0-244 [005] 149.828683: bprint: mmc_start_req: ++mmcq [F: mmc_wait_for_data_req_done, L:1236]: ? ? from(mmc_blk_issue_rw_rq) // <<--[17]
mmcqd/0-244 [005] 149.828695: bprint: mmc_start_req: -mmcq [F: mmc_wait_for_dat뗥nQ, L:0]: ? ? from(0x726f776bS) // <<--[18]
m.lge.launcher3-2135 [004] 149.829669: bprint: mmc_wait_data_done: ++msdc [F: mmc_wait_data_done, L:1058]: ? ? from(msdc_irq_data_complete) // <<--[19]
m.lge.launcher3-2135 [004] 149.829748: tracing_on: ftrace is disabled caller=oops_enter // <<–[20]


1222 static int mmc_wait_for_data_req_done(struct mmc_host *host,
1223 struct mmc_request *mrq,
1224 struct mmc_async_req *next_req)
1225*{*
1226 struct mmc_command *cmd;
1227 struct mmc_context_info *context_info = &host->context_info;
1228 int err;
1229 unsigned long flags;
1230
1231 trace_printk("+mmcq [F: %s, L:%d]: ? ? from(%pS)\n", _func,LINE, (void *)_builtin_return_address(0)); // <<--[16]
1232 while (1) {
1233 wait_event_interruptible(context_info->wait,
1234 (context_info->is_done_rcv ||
1235 context_info->is_new_req));
1236 trace_printk("++mmcq [F: %s, L:%d]: ? ? from(%pS)\n", _func,LINE, (void *)_builtin_return_address(0)); // <<--[8], [17]
1237 spin_lock_irqsave(&context_info->lock, flags);
//snip
1268 trace_printk("mmcq [F: %s, L:%d]: ? ? from(%pS)\n", _func,LINE, (void *)_builtin_return_address(0)); // <<-[9], [18]
1269 mmc_retune_release(host);
1270 trace_printk("mmcq release [F: %s, L:%d]: ? ? from(%pS)\n", _func,LINE, (void *)_builtin_return_address(0));
1271 return err;
1272 }


1701 static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq,
1702 struct mmc_card *card,
1703 int disable_multi,
1704 struct mmc_queue *mq)
1705*{*
1706 u32 readcmd, writecmd;
1707 struct mmc_blk_request *brq = &mqrq->brq;
//생략
1724 memset(brq, 0, sizeof(struct mmc_blk_request));
1725 trace_printk("++mmc_blk_rw_rq_prep [F: %s, L:%d]: ? ? from(%pS)\n", _func,LINE, (void *)_builtin_return_address(0)); // <<--[12]
1726 brq->mrq.cmd = &brq->cmd;
//생략
1905 trace_printk("-mmc_blk_rw_rq_prep [F: %s, L:%d]: ? ? from(%pS)\n", _func,LINE, (void *)_builtin_return_address(0)); //<<-[14]
1906 mmc_queue_bounce_pre(mqrq);
1907 trace_printk("-mmc_blk_rw_rq_prep [F: %s, L:%d]: ? ? from(%pS)\n", _func,LINE, (void *)_builtin_return_address(0));
1908 }


1046 static void mmc_wait_data_done(struct mmc_request *mrq)
1047*{*
1048 mrq->host->context_info.is_done_rcv = true;
1049 trace_printk("+msdc [F: %s, L:%d]: ? ? from(%pS)\n", _func,LINE, (void *)_builtin_return_address(0)); // <<--[13]
//생략
1051 mdelay(1);
//생략
1057
1058 trace_printk("++msdc [F: %s, L:%d]: ? ? from(%pS)\n", _func,LINE, (void *)_builtin_return_address(0)); // <<--[19]
1059 wake_up_interruptible(&mrq->host->context_info.wait);
1060 trace_printk("--msdc [F: %s, L:%d]: ? ? from(%pS)\n", _func,LINE, (void *)_builtin_return_address(0));


정리하면 wake_up_interruptible() API에 파라미터로 전달되는 &mrq->host->context_info.wait를 다른 값(mmcqd를 깨울 수 있는 wait_queue)으로 바꾸거나,
wake_up_interruptible() 호출 시 조건을 줘야 한다. 혹은 spinlock irq save로 코드 보호


솔류션
가능 패치 1건: mrq->host->context_info.is_done_rcv; wake_up_interruptible(&mrq->host->context_info.wait); 처리 도중 MSDC irq가
trigger되었으므로 아래아 같이 spinlock irq save로 보호

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 347108a..21b7307 100644
? a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1042,8 +1042,10 @@ EXPORT_SYMBOL(mmc_start_bkops);
*/
static void mmc_wait_data_done(struct mmc_request *mrq)
{*
+    spin_lock_irqsave(&context_info->lock, flags);
     mrq->host->context_info.is_done_rcv = true;
     wake_up_interruptible(&mrq->host->context_info.wait);
+   spin_unlock_irqrestore(&context_info->lock, flags);
}


가능 패치 2건: wake_up_interruptible() 호출 시 조건을 줌. 패치가 허접해 보임.

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 347108a..ea832709 100644
? a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1043,7 +1043,8 @@ EXPORT_SYMBOL(mmc_start_bkops);
static void mmc_wait_data_done(struct mmc_request *mrq)
{
    mrq->host->context_info.is_done_rcv = true;
    wake_up_interruptible(&mrq->host->context_info.wait);
+  if (mrq->host)
+       wake_up_interruptible(&mrq->host->context_info.wait);
}

  1.  

'Core BSP 분석 > 리눅스 커널 핵심 분석' 카테고리의 다른 글

[Linux][Kernel] 타이머(Timer) Overview  (0) 2023.05.06
파레트  (0) 2023.05.06
세마포어  (0) 2023.05.06
리눅스 커널 디버깅  (0) 2023.05.06
stack_overflow_check  (0) 2023.05.06