본문 바로가기

Core BSP 분석/커널 트러블슈팅

[tegra4i] UI freeze due to RT class 프로세스

커널 로그는 다음과 같습니다. UI freeze 상태라 강제 커널 크래시를 유발시켜 램덤프를 추출했습니다.
 
[ 7164.614352] tegra sound_card.27: Ply shutdown, devnum 7
[ 7165.077580] ------------[ cut here ]------------
[ 7165.077807] WARNING: at /root/rpi/kernel/workqueue.c:2904 __cancel
_work_timer+0x12c/0x178()
[ 7165.087114] Modules linked in: texfat(PO)
[ 7165.092596] CPU: 3 PID: 1373 Comm: mmcqd/0 Tainted: P        W  O 3.10.44+ #1
[ 7165.101579] [<c0017344>] (unwind_backtrace+0x0/0x144) from [<c0013874>] (show_stack+0x20/0x24)
[ 7165.111750] [<c0013874>] (show_stack+0x20/0x24) from [<c0936350>] (dump_stack+0x20/0x28)
[ 7165.121489] [<c0936350>] (dump_stack+0x20/0x28) from [<c0028848>] (warn_slowpath_common+0x5c/0x7c)
[ 7165.132133] [<c0028848>] (warn_slowpath_common+0x5c/0x7c) from [<c0028894>] (warn_slowpath_null+0x2c/0x34)
[ 7165.143418] [<c0028894>] (warn_slowpath_null+0x2c/0x34) from [<c0048600>] (__cancel_work_timer+0x12c/0x178)
[ 7165.154654] [<c0048600>] (__cancel_work_timer+0x12c/0x178) from [<c0048668>] (cancel_delayed_work_sync+0x1c/0x20)
[ 7165.168097] [<c0048668>] (cancel_delayed_work_sync+0x1c/0x20) from [<c007b588>] (pm_qos_update_request+0x34/0xac)
[ 7165.178711] [<c007b588>] (pm_qos_update_request+0x34/0xac) from [<c064348c>] (sdhci_disable+0x2c/0x48)
[ 7165.189268] [<c064348c>] (sdhci_disable+0x2c/0x48) from [<c062e68c>] (mmc_release_host+0xa8/0xc0)
[ 7165.199855] [<c062e68c>] (mmc_release_host+0xa8/0xc0) from [<c0640794>] (mmc_blk_issue_rq+0x104/0x664)
[ 7165.210600] [<c0640794>] (mmc_blk_issue_rq+0x104/0x664) from [<c0641514>] (mmc_queue_thread+0xb4/0x14c)
[ 7165.221673] [<c0641514>] (mmc_queue_thread+0xb4/0x14c) from [<c004ef1c>] (kthread+0xc0/0xd0)
[ 7165.231685] [<c004ef1c>] (kthread+0xc0/0xd0) from [<c000f788>] (ret_from_fork+0x14/0x20)
[ 7165.241314] ---[ end trace b64c1dfefe8a5e9e ]---
 
 
각 cpu별 current task들의 backtrace
0 번 CPU만 key에 의한 강제 crash를 발생 시키기 위한 work thread가 돌고 있고 나머지 cpu들은 idle task였습니다.
 
PID: 21555  TASK: c3e76400  CPU: 0   COMMAND: "kworker/0:1" ? 강제 crash를 발생한 work
#0 [<c03df0c4>] (crash_kexec_odin) from [<c0931038>]
#1 [<c0931038>] (panic) from [<c0013b4c>]
#2 [<c0013b4c>] (die) from [<c0013ba8>]
#3 [<c0013ba8>] (arm_notify_die) from [<c0008370>]
#4 [<c0008370>] (do_undefinstr) from [<c000f2f0>]
 
PID: 0      TASK: d909e400  CPU: 1   COMMAND: "swapper/1"
#0 [<c00166b0>] (machine_crash_nonpanic_core) from [<c0098774>]
#1 [<c0098774>] (generic_smp_call_function_single_interrupt) from [<c00158ec>]
#2 [<c00158ec>] (handle_IPI) from [<c00086f0>]
#3 [<c00086f0>] (gic_handle_irq) from [<c000f244>]
#4 [<c000f244>] (__irq_svc) from [<c008dce4>]
 
PID: 0      TASK: d909ee00  CPU: 2   COMMAND: "swapper/2"
#0 [<c00166b0>] (machine_crash_nonpanic_core) from [<c0098774>]
#1 [<c0098774>] (generic_smp_call_function_single_interrupt) from [<c00158ec>]
#2 [<c00158ec>] (handle_IPI) from [<c00086f0>]
#3 [<c00086f0>] (gic_handle_irq) from [<c000f244>]
#4 [<c000f244>] (__irq_svc) from [<c008dce4>]
#5 [<c007e63c>] (cpu_startup_entry) from [<c092dc10>]
 
PID: 0      TASK: d92a0000  CPU: 3   COMMAND: "swapper/3"
#0 [<c00166b0>] (machine_crash_nonpanic_core) from [<c0098774>]
#1 [<c0098774>] (generic_smp_call_function_single_interrupt) from [<c00158ec>]
#2 [<c00158ec>] (handle_IPI) from [<c00086f0>]
#3 [<c00086f0>] (gic_handle_irq) from [<c000f244>]
#4 [<c000f244>] (__irq_svc) from [<c008dce4>]
 
 
task state 확인
system_server가 UNINTERRUPTIBLE 상태였습니다. 따라서, UI freeze 발생한 것으로 보입니다.
 
2611      2   3  d79d4600  UN   0.0       0      0  [jbd2/mmcblk0p29]
   2627      2   2  d82dc600  UN   0.0       0      0  [jbd2/mmcblk0p10]
   2630      2   1  d82dee00  UN   0.0       0      0  [jbd2/mmcblk0p22]
   2977      1   3  d82c0a00  UN   0.0    1092    524  rctd
   2993      1   2  d6763c00  UN   0.0    3576    776  sdcard
   3053   2981   0  d84a6e00  UN   0.0     904    432  logcat
   3066   2980   2  d7c91e00  UN   0.0    1312    876  kernel_logger
   3070   2984   0  d7f2da00  UN   0.0     900    428  logcat
   3071   2985   3  d7c91400  UN   0.0     916    440  logcat
   3072   2982   0  d75a8a00  UN   0.0     900    428  logcat
   3077   2986   3  d776b200  UN   0.1    3560   1144  tcpdump
   3127   2758   1  d66ca800  UN   5.5 1075264 112420  system_server
   3304      2   2  d6f69e00  UN   0.0       0      0  [ts_ldisc_tx_loo]
   3305      1   3  d7cbe400  UN   0.1    5568   1248  nvm_server
   3332   2758   2  d675e400  UN   5.5 1075264 112420  InputDispatcher
   3546   2758   3  d7cba800  UN   5.5 1075264 112420  Binder_5
   3556   2758   1  d6f6a800  UN   5.5 1075264 112420  watchdog
   3598   2758   0  d79f8000  UN   3.5  986664  72460  com.lge.ime
   4674      1   2  c6562800  UN   0.0   11324    576  thermalmgr
   5103      1   0  c55f0000  UN   0.0    1040    576  dhcpcd
 
 
system_server Call Stack
system_server가 file open을 하는 도중, mutex_lock을 얻기 위해, UNINTERRUPTIBLE 상태로 들어간 상태에서 schedule된 상태입니다.
 
crash> bt 3127
PID: 3127   TASK: d66ca800  CPU: 1   COMMAND: "system_server"
#0 [<c093de28>] (__schedule) from [<c093e324>]
#1 [<c093e324>] (schedule) from [<c093e6e0>]
#2 [<c093e6e0>] (schedule_preempt_disabled) from [<c093cab4>]
#3 [<c093cab4>] (mutex_lock_nested) from [<c0145ed8>]
#4 [<c0145ed8>] (do_last) from [<c0146950>]
#5 [<c0146950>] (path_openat) from [<c014707c>]
#6 [<c014707c>] (do_filp_open) from [<c01372d0>]
#7 [<c01372d0>] (do_sys_open) from [<c0137398>]
#8 [<c0137398>] (sys_open) from [<c000f6c0>]
    pc : [<400a0918>]    lr : [<400ae009>]    psr: 200f0010
    sp : be807318  ip : 6d773a44  fp : be80736c
    r10: 417ed7a0  r9 : 6d773a38  r8 : be807354
    r7 : 00000005  r6 : 00000042  r5 : 00000180  r4 : 417ed178
    r3 : be807328  r2 : 00000180  r1 : 00020042  r0 : 88d01960
    Flags: nzCv  IRQs on  FIQs on  Mode USER_32  ISA ARM
 
 
해당 mutex lock 조사
 
crash> struct mutex 0xD8F0B900
struct mutex {
  count = {
    counter = -1  // lock 걸려있음
  },
<생략>
  wait_list = {
    next = 0xd626fdb0,
    prev = 0xd626fdb0
  },
  owner = 0xc4815000,   // lock의 owner임
  name = 0x0,
  magic = 0xd8f0b900,
  dep_map = {
    key = 0xc0fa51e8 <ext4_fs_type+92>,
    class_cache = {0xc124114c <lock_classes+231200>, 0xc1241bec <lock_classes+233920>},
    name = 0xc0ce3190 "&type->i_mutex_dir_key"
  }
}
 
 
owner task의 backtrace
 
PID: 21441   TASK: c4815000  CPU: 4   COMMAND: "BatteryStats-Wr"
#0 [<c093de28>] (__schedule) from [<c093e324>]
#1 [<c093e324>] (schedule) from [<c01eb6a0>]
#2 [<c01eb6a0>] (start_this_handle) from [<c01ebb44>]
#3 [<c01ebb44>] (jbd2__journal_start) from [<c01d3810>]
#4 [<c01d3810>] (__ext4_journal_start_sb) from [<c01a66f8>]
#5 [<c01a66f8>] (__ext4_new_inode) from [<c01b5bd4>]
#6 [<c01b5bd4>] (ext4_create) from [<c0143dcc>]
#7 [<c0143dcc>] (vfs_create) from [<c01464a0>]
#8 [<c01464a0>] (do_last) from [<c0146950>]
#9 [<c0146950>] (path_openat) from [<c014707c>]
#10 [<c014707c>] (do_filp_open) from [<c01372d0>]
#11 [<c01372d0>] (do_sys_open) from [<c0137398>]
#12 [<c0137398>] (sys_open) from [<c000f6c0>]
    pc : [<400a0918>]    lr : [<400ae009>]    psr: 20070010
    sp : 7f639ad0  ip : 796fbef4  fp : 7f639b24
    r10: 87b27138  r9 : 796fbee8  r8 : 7f639b0c
    r7 : 00000005  r6 : 00000241  r5 : 00000180  r4 : 86d5cd08
    r3 : 7f639ae0  r2 : 00000180  r1 : 00020241  r0 : 80b05e48
    Flags: nzCv  IRQs on  FIQs on  Mode USER_32  ISA ARM
 
 
0xc4815000 task(PID ? 21441, BatteryStats-Wr ) 도 UNINTERRUPTIBLE 상태임
 
21441   2758   4  c4815000  UN   5.5 1075264 112420  BatteryStats-Wr
 
 
BatteryStats-Wr task가 mutex lock을 accquire한 상태에서, TASK_UNINTERRUPTIBLE로 들어갔고, 
system_server는 같은 mutex lock을 accquire하기 위해 대기중 인 상태라 UI Freeze가 발생한 것으로 보입니다.
 
static int start_this_handle(journal_t *journal, handle_t *handle) {
<생략>
              spin_lock(&transaction->t_handle_lock);
              needed = transaction->t_outstanding_credits + nblocks;
 
              if (needed > journal->j_max_transaction_buffers) {
                            DEFINE_WAIT(wait);
 
                            jbd_debug(2, "Handle %p starting new commit...\n", handle);
                            spin_unlock(&transaction->t_handle_lock);
                            prepare_to_wait(&journal->j_wait_transaction_locked, &wait, TASK_UNINTERRUPTIBLE); //--> 여기서 UN으로 들어감
                            __log_start_commit(journal, transaction->t_tid);
                            spin_unlock(&journal->j_state_lock);
                            schedule();
                            finish_wait(&journal->j_wait_transaction_locked, &wait);
                            goto repeat;
              }
}
 
 
TASK_UNINTERRUPTIBLE 상태인 다른 몇몇 task의 backtrace를 조사해 봤습니다.
 
PID: 2611   TASK: d79d4600  CPU: 3   COMMAND: "jbd2/mmcblk0p29"
#0 [<c093de28>] (__schedule) from [<c093e324>]
#1 [<c093e324>] (schedule) from [<c01ee80c>]
#2 [<c01ee80c>] (jbd2_journal_commit_transaction) from [<c01f4794>]
#3 [<c01f4794>] (kjournald2) from [<c004ef1c>]
#4 [<c004ef1c>] (kthread) from [<c000f788>]
 
 
아래의 코드에서 UNINTERRUPTIBLE로 상태로 변경 후 schedule됩니다.
 
void jbd2_journal_commit_transaction(journal_t *journal){
// snip
              while (atomic_read(&commit_transaction->t_updates)) {
                            DEFINE_WAIT(wait);
                            prepare_to_wait(&journal->j_wait_updates, &wait, TASK_UNINTERRUPTIBLE);
                            if (atomic_read(&commit_transaction->t_updates)) {
                                          spin_unlock(&commit_transaction->t_handle_lock);
                                          write_unlock(&journal->j_state_lock);
                                          schedule();
                                          write_lock(&journal->j_state_lock);
                                          spin_lock(&commit_transaction->t_handle_lock);
                            }
                            finish_wait(&journal->j_wait_updates, &wait);
              }  // snip
 
 
아래와 같이 commit_transaction->t_updates가 2입니다.
 
crash> struct transaction_s.t_updates 0xD6858D80
 t_updates = {
   counter = 2
 }
 
 
BatteryStats-Wr task가 mutex lock을 accquire한 상태에서, TASK_UNINTERRUPTIBLE로 들어갔고, system_server는 같은 mutex lock을 accquire하기 위해 대기중 인 상태라 UI Freeze가 발생한 것으로 보입니다.
 
static int start_this_handle(journal_t *journal, handle_t *handle) {
<생략>
              spin_lock(&transaction->t_handle_lock);
              needed = transaction->t_outstanding_credits + nblocks;
 
              if (needed > journal->j_max_transaction_buffers) {
                            DEFINE_WAIT(wait);
 
                            jbd_debug(2, "Handle %p starting new commit...\n", handle);
                            spin_unlock(&transaction->t_handle_lock);
                            prepare_to_wait(&journal->j_wait_transaction_locked, &wait, TASK_UNINTERRUPTIBLE); //<<-- 여기서 UN으로 들어감
                            __log_start_commit(journal, transaction->t_tid);
                            spin_unlock(&journal->j_state_lock);
                            schedule();
                            finish_wait(&journal->j_wait_transaction_locked, &wait);
                            goto repeat;
              }
}
.
 
jbd2/mmcblk0p10 task를 보면, 아래의 코드에서 UNINTERRUPTIBLE로 상태로 변경 후 schedule됐는데, io buffer가 lock상태여서, lock이 풀릴때까지 대기 상태에 있는 것입니다.
 
void jbd2_journal_commit_transaction(journal_t *journal) {
<생략>
                            jh = commit_transaction->t_iobuf_list->b_tprev;
                            bh = jh2bh(jh);
                            if (buffer_locked(bh)) {
                                          wait_on_buffer(bh);
                                          goto wait_for_iobuf;
                            }
<생략>
}
PID: 2627   TASK: d82dc600  CPU: 2   COMMAND: "jbd2/mmcblk0p10"
#0 [<c093de28>] (__schedule) from [<c093e324>]
#1 [<c093e324>] (schedule) from [<c093e3d4>]
#2 [<c093e3d4>] (io_schedule) from [<c0168084>]
#3 [<c0168084>] (sleep_on_buffer) from [<c093b784>]
#4 [<c093b784>] (__wait_on_bit) from [<c093b834>]
#5 [<c093b834>] (out_of_line_wait_on_bit) from [<c0168064>]
#6 [<c0168064>] (__wait_on_buffer) from [<c01ef218>]
#7 [<c01ef218>] (jbd2_journal_commit_transaction) from [<c01f4794>]
#8 [<c01f4794>] (kjournald2) from [<c004ef1c>]
#9 [<c004ef1c>] (kthread) from [<c000f788>]
 
 
TASK_UNINTERRUPTIBLE상태의 task중 아래와 같이 대부분의 task들이 file관련 동작중 TASK_UNINTERRUPTIBLE상태로 진입한 상태입니다.
 
PID: 2977   TASK: d82c0a00  CPU: 3   COMMAND: "rctd"
PID: 2993   TASK: d6763c00  CPU: 2   COMMAND: "sdcard"
PID: 3053   TASK: d84a6e00  CPU: 0   COMMAND: "logcat"
PID: 3070   TASK: d7f2da00  CPU: 0   COMMAND: "logcat"
PID: 3071   TASK: d7c91400  CPU: 3   COMMAND: "logcat"
PID: 3072   TASK: d75a8a00  CPU: 0   COMMAND: "logcat"
PID: 3077   TASK: d776b200  CPU: 3   COMMAND: "tcpdump"
PID: 3127   TASK: d66ca800  CPU: 1   COMMAND: "system_server"
PID: 3305   TASK: d7cbe400  CPU: 3   COMMAND: "nvm_server"
PID: 3332   TASK: d675e400  CPU: 2   COMMAND: "InputDispatcher"
PID: 3546   TASK: d7cba800  CPU: 3   COMMAND: "Binder_5"
PID: 3556   TASK: d6f6a800  CPU: 1   COMMAND: "watchdog"
PID: 5103   TASK: c55f0000  CPU: 0   COMMAND: "dhcpcd"
PID: 5165   TASK: c5008a00  CPU: 1   COMMAND: "GC"
PID: 5738   TASK: c59e3200  CPU: 2   COMMAND: "Thread-315"
PID: 8773   TASK: c5344600  CPU: 3   COMMAND: "kworker/u16:6"
PID: 21359  TASK: c4826e00  CPU: 2   COMMAND: "OMXCallbackDisp"
PID: 21413  TASK: d6e48000  CPU: 0   COMMAND: "Thread-220"
PID: 21416  TASK: c9ee0a00  CPU: 1   COMMAND: "referencesProto"
PID: 21424  TASK: d84a4600  CPU: 0   COMMAND: "Compiler"
PID: 21431  TASK: c62c1e00  CPU: 4   COMMAND: "UsageStatsServi"
PID: 21441  TASK: c4815000  CPU: 4   COMMAND: "BatteryStats-Wr"
 
 
다시 최초의 log를 리뷰하니 PID 1373, mmcqd/0 task에서 WARNING 메시지 출력합니다.
 
[ 7165.077807] WARNING: at /root/pi/kernel/workqueue.c:2904 __cancel_work_timer+0x12c/0x178()
[ 7165.087114] Modules linked in: texfat(PO)
[ 7165.092596] CPU: 3 PID: 1373 Comm: mmcqd/0 Tainted: P        W  O 3.10.44+ #1
[ 7165.101579] [<c0017344>] (unwind_backtrace+0x0/0x144) from [<c0013874>] (show_stack+0x20/0x24)
[ 7165.111750] [<c0013874>] (show_stack+0x20/0x24) from [<c0936350>] (dump_stack+0x20/0x28)
[ 7165.121489] [<c0936350>] (dump_stack+0x20/0x28) from [<c0028848>] (warn_slowpath_common+0x5c/0x7c)
[ 7165.132133] [<c0028848>] (warn_slowpath_common+0x5c/0x7c) from [<c0028894>] (warn_slowpath_null+0x2c/0x34)
[ 7165.143418] [<c0028894>] (warn_slowpath_null+0x2c/0x34) from [<c0048600>] (__cancel_work_timer+0x12c/0x178)
[ 7165.154654] [<c0048600>] (__cancel_work_timer+0x12c/0x178) from [<c0048668>] (cancel_delayed_work_sync+0x1c/0x20)
[ 7165.168097] [<c0048668>] (cancel_delayed_work_sync+0x1c/0x20) from [<c007b588>] (pm_qos_update_request+0x34/0xac)
[ 7165.178711] [<c007b588>] (pm_qos_update_request+0x34/0xac) from [<c064348c>] (sdhci_disable+0x2c/0x48)
[ 7165.189268] [<c064348c>] (sdhci_disable+0x2c/0x48) from [<c062e68c>] (mmc_release_host+0xa8/0xc0)
[ 7165.199855] [<c062e68c>] (mmc_release_host+0xa8/0xc0) from [<c0640794>] (mmc_blk_issue_rq+0x104/0x664)
[ 7165.210600] [<c0640794>] (mmc_blk_issue_rq+0x104/0x664) from [<c0641514>] (mmc_queue_thread+0xb4/0x14c)
[ 7165.221673] [<c0641514>] (mmc_queue_thread+0xb4/0x14c) from [<c004ef1c>] (kthread+0xc0/0xd0)
[ 7165.231685] [<c004ef1c>] (kthread+0xc0/0xd0) from [<c000f788>] (ret_from_fork+0x14/0x20)
 
 
 mmcqd관련 task의 backtrace 조사한 결과를 정리하면 다음과 같습니다.
 
 01 |  mmcqd/0 task의 backtrace를 보면, WARNING을 출력했던 backtrace와 동일한 backtrace를 가지고 있음을 알 수 있음.
 02 | mmcqd/0는 mmc 동작중 pm_qos를 통해 boosting을 했고, 완료 이후, pm_qos설정을 이전으로 돌리기 위한 작업을 진행하는 과정
 03 |   sdhci_disable () -> pm_qos_update_request () -> cancel_delayed_work_sync () -> __cancel_work_timer () -> schedule ()
 
PID: 1373    TASK: d7d29400  CPU: 7   COMMAND: "mmcqd/0"
#0 [<c093de28>] (__schedule) from [<c093e324>]
#1 [<c093e324>] (schedule) from [<c0048610>]
#2 [<c0048610>] (__cancel_work_timer) from [<c0048668>]
#3 [<c0048668>] (cancel_delayed_work_sync) from [<c007b588>]
#4 [<c007b588>] (pm_qos_update_request) from [<c064348c>]
#5 [<c064348c>] (sdhci_disable) from [<c062e68c>]
#6 [<c062e68c>] (mmc_release_host) from [<c0640794>]
#7 [<c0640794>] (mmc_blk_issue_rq) from [<c0641514>]
#8 [<c0641514>] (mmc_queue_thread) from [<c004ef1c>]
#9 [<c004ef1c>] (kthread) from [<c000f788>]
 
PID: 1374   TASK: d7d29e00  CPU: 0   COMMAND: "mmcqd/0boot0"
#0 [<c093de28>] (__schedule) from [<c093e324>]
#1 [<c093e324>] (schedule) from [<c0641580>]
#2 [<c0641580>] (mmc_queue_thread) from [<c004ef1c>]
#3 [<c004ef1c>] (kthread) from [<c000f788>]
 
PID: 1375   TASK: d79d6e00  CPU: 0   COMMAND: "mmcqd/0boot1"
#0 [<c093de28>] (__schedule) from [<c093e324>]
#1 [<c093e324>] (schedule) from [<c0641580>]
#2 [<c0641580>] (mmc_queue_thread) from [<c004ef1c>]
#3 [<c004ef1c>] (kthread) from [<c000f788>]
 
PID: 1376   TASK: d79d6400  CPU: 2   COMMAND: "mmcqd/0rpmb"
#0 [<c093de28>] (__schedule) from [<c093e324>]
#1 [<c093e324>] (schedule) from [<c0641580>]
#2 [<c0641580>] (mmc_queue_thread) from [<c004ef1c>]
#3 [<c004ef1c>] (kthread) from [<c000f788>]
 
PID: 1472   TASK: d8222800  CPU: 2   COMMAND: "mmcqd/1"
#0 [<c093de28>] (__schedule) from [<c093e324>]
#1 [<c093e324>] (schedule) from [<c0641580>]
#2 [<c0641580>] (mmc_queue_thread) from [<c004ef1c>]
#3 [<c004ef1c>] (kthread) from [<c000f788>]
 
 
__cancel_work_timer ()  함수에서 mmcqd  프로세스가 TASK_INERRUPTIBLE로 진입 후 schedule() 됩니다.
 
static bool __cancel_work_timer(struct work_struct *work, bool is_dwork) {
<생략>
              do {
                            ret = try_to_grab_pending(work, is_dwork, &flags);
                            if (unlikely(ret == -ENOENT)) {
                                          if (!flush_work(work)) {
                                                        if (work_is_canceling(work)) {
                                                                      set_current_state(TASK_INTERRUPTIBLE);
                                                                      schedule(); //<<--INTERRUPTIBLE로 진입후 스케쥴
                                                                      __set_current_state(TASK_RUNNING);
                                                        }
                                          }
                            }
              } while (unlikely(ret < 0));
<생략> }
 
 
최초 로그의 WARNING이 출력된 시간과 비슷(7165s) 합니다.
 
crash> struct task_struct.se 0xd7d29400
 se = {
   load = {
     weight = 1024,
     inv_weight = 4194304
   },
   run_node = {
     __rb_parent_color = 1,
     rb_right = 0x0,
     rb_left = 0x0
   },
   group_node = {
     next = 0xd7d2944c,
     prev = 0xd7d2944c
   },
   on_rq = 0,
   exec_start = 7165247678131, //<<-- 최초 로그의 WARNING이 출력된 시간과 비슷함(7165s)
 
 
task_struct.se.exec_start는 아래의 코드에서 update가 되도록 되어있습니다.
 
static void update_curr(struct cfs_rq *cfs_rq){ <생략>
              curr->exec_start = now;
<생략>
}
 
 
emmc 관련 동작을 하는 mmcqd/0 가 7165s wakeup되어 running중, TASK_INTERRUPTIBLE 상태로 schedule이 되고, 다시 wakeup되지 않습니다.
 1.  file open/read/write 및 jonrnaling 등의 동작을 시도한 task들이 전부 TASK_UNINTERRUPTIBLE상태로 들어가게 됨.
 2.  따라서, UI Freeze로 현상이 나타남.
 
 
 
다음 패치를 적용한 후 UI Freeze 현상은 사라졌습니다.
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index e1cae9d..cb3a27b 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2759,6 +2759,7 @@ static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
 * If someone else is canceling, wait for the same event it
                 * would be waiting for before retrying.
                 */
 
-               if (unlikely(ret == -ENOENT))
-                       flush_work(work);
+               if (unlikely(ret == -ENOENT)) {
+                       if (!flush_work(work) &&  current->policy == SCHED_FIFO && work_is_canceling(work)) {   
+                               struct sched_param old, new;
+                               new.sched_priority = 0;
+                               sched_setscheduler(current, SCHED_NORMAL, &new);
+                               schedule();
+                               sched_setscheduler(current, SCHED_FIFO, &old);
+                       }
+               }
 
 
"이 포스팅이 유익하다고 생각되시면 댓글로 응원해주시면 감사하겠습니다.  
혹시 글을 읽고 궁금점이 있으면 댓글로 질문 남겨주세요. 상세한 답글 올려 드리겠습니다!"