Kernel panic log

 2107.232713 / 01-01 11:11:03.809][7] init: cannot find '/system/bin/qrngp' (No such file or directory), disabling 'qrngp'

[ 2107.239317 / 01-01 11:11:03.809][5] Unable to handle kernel NULL pointer dereference at virtual address 00000028

[ 2107.239351 / 01-01 11:11:03.809][5] pgd = e37ec000

[ 2107.239366 / 01-01 11:11:03.809][0] [00000028] *pgd=00000000

[ 2107.239388 / 01-01 11:11:03.809][5] Internal error: Oops: 5 [#1] PREEMPT SMP ARM

[ 2107.239405 / 01-01 11:11:03.809][0] Modules linked in: texfat(PO)

[ 2107.239433 / 01-01 11:11:03.809][5] CPU: 5 PID: 2803 Comm: sensorservice Tainted: P        W  O   3.18.31-perf-gd069b48-00001-g8a6d6e5 #1

[ 2107.239452 / 01-01 11:11:03.809][5] task: e3ffb700 ti: e37f4000 task.ti: e37f4000

[ 2107.239479 / 01-01 11:11:03.809][5] PC is at find_vma_links+0x1c/0x78

[ 2107.239499 / 01-01 11:11:03.809][5] LR is at vma_adjust+0x3a0/0x574


Equivalent callstacks can be restored using the following command.

crash>  bt -I C01002D8 -S  E37F5DD0 0xE3FFB700

PID: 2803   TASK: e3ffb700  CPU: 5   COMMAND: "sensorservice"

bt: WARNING:  stack address:0xe37f5b98, program counter:0xc0ee5b60

 #0 [<c01002d8>] (do_DataAbort) from [<c010ad58>]

    pc : [<c01f980c>]    lr : [<c01fa708>]    psr: 200f0013

    sp : e37f5ec4  ip : 00000034  fp : e8236d9c

    r10: 00000000  r9 : 00000000  r8 : b2c99000

    r7 : c4616c80  r6 : e8236d68  r5 : 7f555034  r4 : 7f555034

    r3 : e37f5f04  r2 : b2c97000  r1 : b2c95000  r0 : 7f555038

    Flags: nzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM

 #1 [<c010ad58>] (__dabt_svc) from [<c01fa708>]

 #2 [<c01f980c>] (find_vma_links) from [<c01fa708>]

 #3 [<c01fa708>] (vma_adjust) from [<c01fa9e4>]

 #4 [<c01fa9e4>] (__split_vma) from [<c01fb364>]

 #5 [<c01fb364>] (do_munmap) from [<c01fb558>]

 #6 [<c01fb558>] (vm_munmap) from [<c0106820>]


Execution profile at the moment of kernel panic

[1]: R12 is updated as 0x34 where 0x34 = *0x7f555038 = *R0

[2]: Data abort is raised since base address for LDR execution is 0x28(0x34-0xC = R12-0xC)

0xc01f97f0 <find_vma_links>:    push    {r4, r5, lr}

0xc01f97f4 <find_vma_links+0x4>:        add     r0, r0, #4

0xc01f97f8 <find_vma_links+0x8>:        mov     r4, #0

0xc01f97fc <find_vma_links+0xc>:        mov     r5, r4

0xc01f9800 <find_vma_links+0x10>:       ldr     r12, [r0]  //<<--[1]

0xc01f9804 <find_vma_links+0x14>:       cmp     r12, #0

0xc01f9808 <find_vma_links+0x18>:       beq     0xc01f983c <find_vma_links+76>

0xc01f980c <find_vma_links+0x1c>:       ldr     r0, [r12, #-12] //<<--[2]


When tracing back to the context where the call to find_vma_links() is made inside vma_adjust(), R0 is copied from R7(0xC4616C80) whose data structure type is (struct mm_struct*) .

0xc01fa6e8 <vma_adjust+896>:    mov     r0, r7

0xc01fa6ec <vma_adjust+900>:    str     r3, [sp]

0xc01fa6f0 <vma_adjust+904>:    add     r3, sp, #60     ; 0x3c

0xc01fa6f4 <vma_adjust+908>:    str     r3, [sp, #4]

0xc01fa6f8 <vma_adjust+912>:    add     r3, sp, #52     ; 0x34

0xc01fa6fc <vma_adjust+916>:    ldr     r1, [r2]

0xc01fa700 <vma_adjust+920>:    ldr     r2, [r2, #4]

0xc01fa704 <vma_adjust+924>:    bl      0xc01f97f0 <find_vma_links>  //<<--


Identical debug information

crash> struct task_struct.mm e3ffb700

  mm = 0xc4616d80


At logical level, when find_vma_links() is called, R0 should have been 0xC4616C80 instead of 0x7f555038. This signature cannot be understandable from SW point of view.


This issue ended up with replacing defective PMIC with normal one in the problematic target device.


Debugging 

Kernel panic occurs at 68 line inside __list_del_entry() whose caller function is process_one_work().

Code Review at the moment of kernel panic


49void __list_del_entry(struct list_head *entry)

50{

51     struct list_head *prev, *next;

52

53     prev = entry->prev;

54     next = entry->next;

55

56    if (WARN(next == LIST_POISON1,

57                      "list_del corruption, %p->next is LIST_POISON1 (%p)\n",

58                      entry, LIST_POISON1) ||

59       WARN(prev == LIST_POISON2,

60                     "list_del corruption, %p->prev is LIST_POISON2 (%p)\n",

61                     entry, LIST_POISON2) ||

62      WARN(prev->next != entry,

63                    "list_del corruption. prev->next should be %p, "

64                    "but was %p\n", entry, prev->next) ||

65      WARN(next->prev != entry,

66                    "list_del corruption. next->prev should be %p, "

67                   "but was %p\n", entry, next->prev)) {

68                   BUG_ON(PANIC_CORRUPTION); // <<-- kernel panic

69     return;

70 }

71

72     __list_del(prev, next);

73}


Callstacks

-000|do_undefinstr(regs = 0x0)

-001|__und_svc_fault(asm)

-->|exception

-002|__list_del_entry(?) // <<-- kernel panic

-003|process_one_work(worker = 0xEC048600, work = 0xEA2585AC)

-004|worker_thread(__worker = 0x0)

-005|kthread(_create = 0xEB9B3000)

-006|ret_from_fork(asm)

-007|ret_fast_syscall(asm)

The list_del_init() is called inside process_one_work() whose parameter is &work->entry.

Now, it is important to debug the state of &work->entry

static void process_one_work(struct worker *worker, struct work_struct *work)

__releases(&pool->lock)

__acquires(&pool->lock)

{

    struct pool_workqueue *pwq = get_work_pwq(work);

    struct worker_pool *pool = worker->pool;

    bool cpu_intensive = pwq->wq->flags & WQ_CPU_INTENSIVE;

    int work_color;

    struct worker *collision;

//snip

/* claim and dequeue */

    debug_work_deactivate(work);

    hash_add(pool->busy_hash, &worker->hentry, (unsigned long)work);

    worker->current_work = work;

    worker->current_func = work->func;

    worker->current_pwq = pwq;

    work_color = get_work_color(work);

    list_del_init(&work->entry);

Going back to process_one_work() function frame using T32, I can have the following debug information

[1],[2]: next element is corrupted

[3]: worker.current_func is found to be irq_affinity_notify which looks abnormal.

process_one_work(

    worker = 0xEC048600 -> (

        entry = (next = 0x0, prev = 0xEC5AE968),

        hentry = (next = 0x0, pprev = 0xEC5AE968),

        current_work = 0xEA2585AC,

        current_func = 0xC016FD40,

        current_pwq = 0xEC5B2400,

        desc_valid = FALSE,

The memory content around 0xEC048600 seems to be corrupted especially for irq_affinity_notify at 0xEC04860C.

NSD:EC0485F8| 5A 5A 5A 5A 0x5A5A5A5A

NSD:EC0485FC| 5A 5A 5A 5A 0x5A5A5A5A

NSD:EC048600| 00 00 00 00 0x0 // <<--

NSD:EC048604| 68 E9 5A EC 0xEC5AE968

NSD:EC048608| AC 85 25 EA 0xEA2585AC

NSD:EC04860C| 40 FD 16 C0 0xC016FD40 \\vmlinux\manage\irq_affinity_notify // <<--

NSD:EC048610| 00 24 5B EC 0xEC5B2400

NSD:EC048614| 00 00 00 00 0x0

NSD:EC048618| 18 86 04 EC 0xEC048618

NSD:EC04861C| 18 86 04 EC 0xEC048618

NSD:EC048620| C0 57 2A EC 0xEC2A57C0

NSD:EC048624| 00 E8 5A EC 0xEC5AE800

NSD:EC048628| E8 E0 04 E9 0xE904E0E8

NSD:EC04862C| F0 E9 5A EC 0xEC5AE9F0

NSD:EC048630| AC 8D FF FF 0xFFFF8DAC

Searching "irq_affinity_notify" key word enables me to the definition of irq_affinity_notify structure.

struct irq_affinity_notify {

    unsigned int irq;

    struct kref kref;

    struct work_struct work;

    void (*notify)(struct irq_affinity_notify *, const cpumask_t *mask);

    void (*release)(struct kref *ref);

};



By casting 0xEA2585AC as struct work_struct, the following debug information can be deduced which looks very odd.

The struct work_struct.entry.next is corrupted as NULL and work_func_t is registered as irq_affinity_notify().

(struct work_struct *) [-] (struct work_struct*)0xEA2585AC = 0xEA2585AC = __bss_stop+0x2857B9A4 -> (

  (atomic_long_t) [D:0xEA2585AC] data = ((int) [D:0xEA2585AC] counter = -329571323 = 0xEC5B2405 = '.[

    (struct list_head) [D:0xEA2585B0] entry = (

    (struct list_head *) [D:0xEA2585B0] next = 0xE9E9EAB4 = __bss_stop+0x281C1EAC -> (

      (struct list_head *) [D:0xE9E9EAB4] next = 0x0 = -> NULL,

    (struct list_head *) [D:0xE9E9EAB8] prev = 0x0 = -> NULL),

  (struct list_head *) [D:0xEA2585B4] prev = 0xEC5AE820 = -> ((struct list_head *) [D:0xEC5AE820]

 (work_func_t) [D:0xEA2585B8] func = 0xC016FD40 = irq_affinity_notify -> )


Looking into the symbol information at 0xEA2585AC address, I can have the following symbols below.

(i.e) irq_affinity_notify, pm_qos_irq_notify, pm_qos_irq_release, pm_qos_work_fn

________address|_data________|value_____________|symbol

NSD:EA25859C| FF 00 00 00 0xFF \\vmlinux\Global\up_b_offset+0xCF

NSD:EA2585A0| 9D 00 00 00 0x9D \\vmlinux\Global\up_b_offset+0x6D

NSD:EA2585A4| 9D 00 00 00 0x9D \\vmlinux\Global\up_b_offset+0x6D

NSD:EA2585A8| 03 00 00 00 0x3

NSD:EA2585AC| 05 24 5B EC 0xEC5B2405

NSD:EA2585B0| B4 EA E9 E9 0xE9E9EAB4

NSD:EA2585B4| 20 E8 5A EC 0xEC5AE820

NSD:EA2585B8| 40 FD 16 C0 0xC016FD40 \\vmlinux\manage\irq_affinity_notify

NSD:EA2585BC| 58 90 16 C0 0xC0169058 \\vmlinux\kernel/power/qos\pm_qos_irq_notify

NSD:EA2585C0| 00 90 16 C0 0xC0169000 \\vmlinux\kernel/power/qos\pm_qos_irq_release

NSD:EA2585C4| 00 94 35 77 0x77359400

NSD:EA2585C8| C8 85 25 EA 0xEA2585C8

NSD:EA2585CC| C8 85 25 EA 0xEA2585C8

NSD:EA2585D0| 14 81 86 C1 0xC1868114 \\vmlinux\adreno\device_3d0+0x45C

NSD:EA2585D4| F8 C6 C4 E9 0xE9C4C6F8

NSD:EA2585D8| 01 00 00 00 0x1

NSD:EA2585DC| E0 FF FF FF 0xFFFFFFE0

NSD:EA2585E0| E0 85 25 EA 0xEA2585E0

NSD:EA2585E4| E0 85 25 EA 0xEA2585E0

NSD:EA2585E8| 10 8F 16 C0 0xC0168F10 \\vmlinux\kernel/power/qos\pm_qos_work_fn

NSD:EA2585EC| 00 00 00 00 0x0

NSD:EA2585F0| 00 00 00 00 0x0

NSD:EA2585F4| 00 00 00 00 0x0

NSD:EA2585F8| 02 00 B8 EB 0xEBB80002

NSD:EA2585FC| A8 77 13 C0 0xC01377A8 \\vmlinux\workqueue\delayed_work_timer_fn

NSD:EA258600| DC 85 25 EA 0xEA2585DC

When searching pm_qos_irq_notify in the kernel source code, I can see that pm_qos_irq_notify is registered as req->irq_notify.notify.

void pm_qos_add_request(struct pm_qos_request *req,

int pm_qos_class, s32 value)

{

//snip 

case PM_QOS_REQ_AFFINE_IRQ:

    if (irq_can_set_affinity(req->irq)) {

         int ret = 0;

         struct irq_desc *desc = irq_to_desc(req->irq);

         struct cpumask *mask = desc->irq_data.affinity;

         /* Get the current affinity */

         cpumask_copy(&req->cpus_affine, mask);

         req->irq_notify.irq = req->irq;

         req->irq_notify.notify = pm_qos_irq_notify;

         req->irq_notify.release = pm_qos_irq_release;

I can see that pm_qos_irq_notify() as well as pm_qos_irq_release() is registered as struct irq_affinity_notify.

struct irq_affinity_notify {

    unsigned int irq;

    struct kref kref;

    struct work_struct work;

    void (*notify)(struct irq_affinity_notify *, const cpumask_t *mask); // pm_qos_irq_notify

    void (*release)(struct kref *ref); // pm_qos_irq_release

};


struct pm_qos_request {

    enum pm_qos_req_type type;

    struct cpumask cpus_affine;

#ifdef CONFIG_SMP

    uint32_t irq;

/* Internal structure members */

    struct irq_affinity_notify irq_notify;

#endif

    struct plist_node node;

    int pm_qos_class;

    struct delayed_work work; /* for pm_qos_update_request_timeout */

};

The 0xEA2585A4 address is starting point where struct irq_affinity_notify element is located.

(struct irq_affinity_notify *) [-] (struct irq_affinity_notify*)0xEA2585A4

(unsigned int) [D:0xEA2585A4] irq = 157 = 0x9D = '....',

(struct kref) [D:0xEA2585A8] kref = (

   (atomic_t) [D:0xEA2585A8] refcount = ((int) [D:0xEA2585A8] counter = 3 = 0x3 = '....')),

(struct work_struct) [D:0xEA2585AC] work = (

   (atomic_long_t) [D:0xEA2585AC] data = ((int) [D:0xEA2585AC] counter = -329571323 = 0xEC5B2

   (struct list_head) [D:0xEA2585B0] entry = (

      (struct list_head *) [D:0xEA2585B0] next = 0xE9E9EAB4

      (struct list_head *) [D:0xEA2585B4] prev = 0xEC5AE820

   (work_func_t) [D:0xEA2585B8] func = 0xC016FD40 = irq_affinity_notify -> ),

   (void (*)()) [D:0xEA2585BC] notify = 0xC0169058 = pm_qos_irq_notify -> ,

   (void (*)()) [D:0xEA2585C0] release = 0xC0169000 = pm_qos_irq_release -> )

________address|_data________|value_____________|symbol

NSD:EA25859C| FF 00 00 00 0xFF \\vmlinux\Global\up_b_offset+0xCF

NSD:EA2585A0| 9D 00 00 00 0x9D \\vmlinux\Global\up_b_offset+0x6D

NSD:EA2585A4| 9D 00 00 00 0x9D //struct irq_affinity_notify.irq

NSD:EA2585A8| 03 00 00 00 0x3 //struct irq_affinity_notify.kref

NSD:EA2585AC| 05 24 5B EC 0xEC5B2405 //struct irq_affinity_notify.work.data

NSD:EA2585B0| B4 EA E9 E9 0xE9E9EAB4 //struct irq_affinity_notify.work.entry.next

NSD:EA2585B4| 20 E8 5A EC 0xEC5AE820 //struct irq_affinity_notify.work.entry.prev

NSD:EA2585B8| 40 FD 16 C0 0xC016FD40 \\vmlinux\manage\irq_affinity_notify //struct irq_affinity_notify.work.func

NSD:EA2585BC| 58 90 16 C0 0xC0169058 \\vmlinux\kernel/power/qos\pm_qos_irq_notify

NSD:EA2585C0| 00 90 16 C0 0xC0169000 \\vmlinux\kernel/power/qos\pm_qos_irq_release

NSD:EA2585C4| 00 94 35 77 0x77359400

NSD:EA2585C8| C8 85 25 EA 0xEA2585C8

NSD:EA2585CC| C8 85 25 EA 0xEA2585C8

NSD:EA2585D0| 14 81 86 C1 0xC1868114 \\vmlinux\adreno\device_3d0+0x45C

NSD:EA2585D4| F8 C6 C4 E9 0xE9C4C6F8

NSD:EA2585D8| 01 00 00 00 0x1

NSD:EA2585DC| E0 FF FF FF 0xFFFFFFE0

NSD:EA2585E0| E0 85 25 EA 0xEA2585E0

NSD:EA2585E4| E0 85 25 EA 0xEA2585E0

NSD:EA2585E8| 10 8F 16 C0 0xC0168F10 \\vmlinux\kernel/power/qos\pm_qos_work_fn

NSD:EA2585EC| 00 00 00 00 0x0

NSD:EA2585F0| 00 00 00 00 0x0

NSD:EA2585F4| 00 00 00 00 0x0

NSD:EA2585F8| 02 00 B8 EB 0xEBB80002

NSD:EA2585FC| A8 77 13 C0 0xC01377A8 \\vmlinux\workqueue\delayed_work_timer_fn

NSD:EA258600| DC 85 25 EA 0xEA2585DC


155: 1608 0 0 0 0 0 0 0 GIC mmc0

157: 98 0 0 0 0 0 0 0 GIC mmc1

166: 10 0 0 0 0 0 0 0 GIC msm_otg,msm_hsusb


Taking a looking at the structure associated with struct irq_affinity_notify,

struct pm_qos_request seems to include struct irq_affinity_notify irq_notify.

struct pm_qos_request {

    enum pm_qos_req_type type;

    struct cpumask cpus_affine;

#ifdef CONFIG_SMP

    uint32_t irq;

/* Internal structure members */

    struct irq_affinity_notify irq_notify;

#endif

    struct plist_node node;

    int pm_qos_class;

    struct delayed_work work; /* for pm_qos_update_request_timeout */

};

By casting 0xEA258598 as struct pm_qos_request, the whole picture is drawn as below.

(i.e) type = PM_QOS_REQ_AFFINE_IRQ

(struct pm_qos_request *) [-] (struct pm_qos_request*)0xEA258598 = 0xEA258598 = __bss_sto

  (enum pm_qos_req_type) [D:0xEA258598] type = PM_QOS_REQ_AFFINE_IRQ = 2 = 0x2 = '....',

  (struct cpumask) [D:0xEA25859C] cpus_affine = (

    (long unsigned int [1]) [D:0xEA25859C] bits = (

    [0] = 255 = 0xFF = '....')),

  (uint32_t) [D:0xEA2585A0] irq = 157 = 0x9D = '....',

  (struct irq_affinity_notify) [D:0xEA2585A4] irq_notify = ((unsigned int) [D:0xEA2585A4]  

  (struct plist_node) [D:0xEA2585C4] node = ((int) [D:0xEA2585C4] prio = 2000000000 = 0x7

  (int) [D:0xEA2585D8] pm_qos_class = 1 = 0x1 = '....',

  (struct delayed_work) [D:0xEA2585DC] work = (

   (struct work_struct) [D:0xEA2585DC] work = (

    (atomic_long_t) [D:0xEA2585DC] data = ((int) [D:0xEA2585DC] counter = -32 = 0xFFFFF

    (struct list_head) [D:0xEA2585E0] entry = ((struct list_head *) [D:0xEA2585E0] next

  (work_func_t) [D:0xEA2585E8] func = 0xC0168F10 = pm_qos_work_fn -> ),

  (struct timer_list) [D:0xEA2585EC] timer = ((struct list_head) [D:0xEA2585EC] entry =

 (struct workqueue_struct *) [D:0xEA258620] wq = 0x0 = -> NULL,

 (int) [D:0xEA258624] cpu = 0 = 0x0 = '....'))

(where)

________address|_data________|value_____________|symbol

NSD:EA258590| 00 00 00 00 0x0

NSD:EA258594| 01 00 00 00 0x1

NSD:EA258598| 02 00 00 00 0x2 //enum pm_qos_req_type type;

NSD:EA25859C| FF 00 00 00 0xFF //struct cpumask cpus_affine;

NSD:EA2585A0| 9D 00 00 00 0x9D //uint32_t irq;

NSD:EA2585A4| 9D 00 00 00 0x9D //struct irq_affinity_notify

NSD:EA2585A8| 03 00 00 00 0x3

NSD:EA2585AC| 05 24 5B EC 0xEC5B2405

NSD:EA2585B0| B4 EA E9 E9 0xE9E9EAB4

NSD:EA2585B4| 20 E8 5A EC 0xEC5AE820

NSD:EA2585B8| 40 FD 16 C0 0xC016FD40 \\vmlinux\manage\irq_affinity_notify

NSD:EA2585BC| 58 90 16 C0 0xC0169058 \\vmlinux\kernel/power/qos\pm_qos_irq_notify

NSD:EA2585C0| 00 90 16 C0 0xC0169000 \\vmlinux\kernel/power/qos\pm_qos_irq_release

NSD:EA2585C4| 00 94 35 77 0x77359400

NSD:EA2585C8| C8 85 25 EA 0xEA2585C8

NSD:EA2585CC| C8 85 25 EA 0xEA2585C8

NSD:EA2585D0| 14 81 86 C1 0xC1868114 \\vmlinux\adreno\device_3d0+0x45C

NSD:EA2585D4| F8 C6 C4 E9 0xE9C4C6F8

NSD:EA2585D8| 01 00 00 00 0x1

NSD:EA2585DC| E0 FF FF FF 0xFFFFFFE0

NSD:EA2585E0| E0 85 25 EA 0xEA2585E0

NSD:EA2585E4| E0 85 25 EA 0xEA2585E0

NSD:EA2585E8| 10 8F 16 C0 0xC0168F10 \\vmlinux\kernel/power/qos\pm_qos_work_fn

NSD:EA2585EC| 00 00 00 00 0x0

The (*(((struct pm_qos_request*)0xEA258598))).work.work.data is equal to 0xFFFFFFE0.

[-] (struct pm_qos_request*)0xEA258598 = 0xEA258598 -> (

//snip

D:0xEA2585DC] work = (

[D:0xEA2585DC] work = (

[D:0xEA2585DC] data = ([D:0xEA2585DC] counter = 0xFFFFFFE0), // <<-- WORK_STRUCT_NO_POOL

[D:0xEA2585E0] entry = ([D:0xEA2585E0] next = 0xEA2585E0, [D:0xEA2585E4] prev = 0xEA2585E0),

[D:0xEA2585E8] func = 0xC0168F10),

[D:0xEA2585EC] timer = ([D:0xEA2585EC] entry = ([D:0xEA2585EC] next = 0x0, [D:0xEA2585F0] prev = 0x0), [

[D:0xEA258620] wq = 0x0,

[D:0xEA258624] cpu = 0x0))

So it is needed to apply the following patch to see if the same crash occurs since 0xFFFFFFE0,

where 0xFFFFFFE0 = WORK_STRUCT_NO_POOL = (unsigned long)WORK_OFFQ_POOL_NONE << WORK_OFFQ_POOL_SHIFT,

diff --git a/kernel/power/qos.c b/kernel/power/qos.c

index 5f4c006..a122891 100644

--- a/kernel/power/qos.c

+++ b/kernel/power/qos.c

@@ -405,6 +405,9 @@ void pm_qos_update_request(struct pm_qos_request *req,

return;

}

+     if (work_is_canceling(&req->work) )

+        return;

+

      cancel_delayed_work_sync(&req->work);

      __pm_qos_update_request(req, new_value);

}

@@ -427,6 +430,9 @@ void pm_qos_update_request_timeout(struct pm_qos_request *req, s32 new_value,

"%s called for unknown object.", __func__))

return;

+     if (work_is_canceling(&req->work) )

+         return;

+

      cancel_delayed_work_sync(&req->work);

trace_pm_qos_update_request_timeout(req->pm_qos_class,

@@ -458,6 +464,9 @@ void pm_qos_remove_request(struct pm_qos_request *req)

return;

}

+     if (work_is_canceling(&req->work) )

+         return;

+

       cancel_delayed_work_sync(&req->work);

       trace_pm_qos_remove_request(req->pm_qos_class, PM_QOS_DEFAULT_VALUE);


static bool work_is_canceling(struct work_struct *work)

{

    unsigned long data = atomic_long_read(&work->data);

    return !(data & WORK_STRUCT_PWQ) && (data & WORK_OFFQ_CANCELING);

}

(where)

WORK_STRUCT_PWQ = 0x4, WORK_OFFQ_CANCELING = 0x10


In-depth Analysis


The work->data is set as WORK_STRUCT_NO_POOL inside clear_work_data() which is called from

static void clear_work_data(struct work_struct *work)

{

    smp_wmb(); /* see set_work_pool_and_clear_pending() */

    set_work_data(work, WORK_STRUCT_NO_POOL, 0);

}


static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)

{

    static DECLARE_WAIT_QUEUE_HEAD(cancel_waitq);

    unsigned long flags;

    int ret;

    do {

        ret = try_to_grab_pending(work, is_dwork, &flags);

        if (unlikely(ret == -ENOENT)) {

               struct cwt_wait cwait;

               init_wait(&cwait.wait);

               cwait.wait.func = cwt_wakefn;

               cwait.work = work;

              prepare_to_wait_exclusive(&cancel_waitq, &cwait.wait,

              TASK_UNINTERRUPTIBLE);

              if (work_is_canceling(work))

                         schedule();

              finish_wait(&cancel_waitq, &cwait.wait);

      }

    } while (unlikely(ret < 0));

/* tell other tasks trying to grab @work to back off */ 

    mark_work_canceling(work);

    local_irq_restore(flags);

    flush_work(work); 

    clear_work_data(work); // <<-- 


임베디드 개발에서 T32-Trace32 장비를 많이 써서 소스 코드 디버깅을 합니다.


이번 시간에는 리눅스 커널에서 웨이크락(wake lock)을 잡는 모듈이 누구인지 파악하는 디버깅(Debugging) 방법을 소개합니다.


리눅스 커널는 wakeup_sources이란 연결 리스트를 통해 wakelock 정보를 확인할 수 있습니다.


리눅스 커널 개발자 중에 crash-utility을 잘 쓰는 분이 있습니다.

하지만 임베디드 개발에선 Trace32도 더 많이 활용하는 것 같습니다.

개발자는 다양한 툴을 써야 한 가지 툴에 종속된 노예 개발자가 되는 것을 피할 수 있습니다.

또한 각 툴의 장점을 잘 활용할 수도 있습니다.


먼저, 다음 T32 명령어를 입력해서 offsetof와 container_of 매크로를 생성합시다.

sYmbol.NEW.MACRO offsetof(type,member) ((int)(&((type*)0)->member))

sYmbol.NEW.MACRO container_of(ptr,type,member) ((type *)((char *)(ptr)-offsetof(type,member)))


리눅스 커널 소스 코드에서 구현된 그래도 매크로를 선언하는 것입니다.

#define container_of(ptr, type, member) ({ \

void *__mptr = (void *)(ptr); \

((type *)(__mptr - offsetof(type, member))); })


#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER)


우선 wakeup_sources 변수를 봅시다.

v.v %h %t wakeup_sources

  (static struct list_head) wakeup_sources = (

    (struct list_head *) next = 0xFFFFFFC5FD92EA88 -> (

      (struct list_head *) next = 0xFFFFFFC5FD92FC88 -> (

        (struct list_head *) next = 0xFFFFFFC541226488,

        (struct list_head *) prev = 0xFFFFFFC5FD92EA88),

      (struct list_head *) prev = 0xFFFFFF97D92B4D00),

    (struct list_head *) prev = 0xFFFFFFC62D3EDB88)


wakelock을 잡은 모듈들은 wakeup_sources 이란 전역 변수에 등록돼 있습니다.

계속 struct list_head->next로 리스트들이 연결돼 있다는 사실을 파악할 수 있습니다.


이제 앞에서 생성한 container_of 매크로를 쓸 순간입니다. 다음과 같이 명령어를 입력합시다.

v.v %h %t %s  container_of(0xFFFFFFC5FD92EA88,struct wakeup_source,entry)

  (struct wakeup_source *) container_of(0xFFFFFFC5FD92EA88,struct wakeup_source,entry) = 0xFFFFFFC5FD92EA80 -> (

    (char *) name = 0xFFFFFFC55DEE3280 -> "ipc00001269_1988_HwBinder:682_1",

    (struct list_head) entry = ((struct list_head *) next = 0xFFFFFFC5FD92FC88, (struct list_head *) prev = 0xFFF

    (spinlock_t) lock = ((struct raw_spinlock) rlock = ((arch_spinlock_t) raw_lock = ((u16) owner = 0x298F, (u16)

    (struct wake_irq *) wakeirq = 0x0,

    (struct timer_list) timer = ((struct hlist_node) entry = ((struct hlist_node *) next = 0x0, (struct hlist_nod

    (long unsigned int) timer_expires = 0x0,

    (ktime_t) total_time = ((s64) tv64 = 0x0),

    (ktime_t) max_time = ((s64) tv64 = 0x0),

    (ktime_t) last_time = ((s64) tv64 = 0x000040E1FC6B0DF2),

    (ktime_t) start_prevent_time = ((s64) tv64 = 0x0),

    (ktime_t) prevent_sleep_time = ((s64) tv64 = 0x0),

    (long unsigned int) event_count = 0x0,

    (long unsigned int) active_count = 0x0,

    (long unsigned int) relax_count = 0x0,

    (long unsigned int) expire_count = 0x0,

    (long unsigned int) wakeup_count = 0x0,

    (long unsigned int) pending_count = 0x0,

    (bool:1) active = FALSE,

    (bool:1) autosleep_enabled = FALSE)


위 멤버 변수 중 active 변수가 FALSE이니 "ipc00001269_1988_HwBinder:682_1"이란 wakeup source가 wakelock을 잡고 있지 않습니다.


v.v %h %t %s  container_of(0xFFFFFFC5FD92EA88,struct wakeup_source,entry)


위와 같은 명령어를 쓴 이유는, struct wakeup_source이란 구조체에 entry이란 struct list_head 타입 멤버 변수가

wakeup_sources이란 링크드 리스트 전역 변수 wakeup_sources->next로 등록됐기 때문입니다.


해당 구조체를 봅시다.

struct wakeup_source {

const char *name;

struct list_head entry;

spinlock_t lock;

struct wake_irq *wakeirq;

struct timer_list timer;

unsigned long timer_expires;

ktime_t total_time;

ktime_t max_time;

ktime_t last_time;

ktime_t start_prevent_time;

ktime_t prevent_sleep_time;

unsigned long event_count;

unsigned long active_count;

unsigned long relax_count;

unsigned long expire_count;

unsigned long wakeup_count;

bool active:1;

bool autosleep_enabled:1;

};


쉽게 설명하면 아래 방식으로 wakeup_source가 wakeup_sources 리스트에 등록합니다.

1st wakeup source 등록

wakeup_sources->next = struct wakeup_source->entry


2nd wakeup source등록

wakeup_sources->next->next = struct wakeup_source->entry


개발자분들이여, 임베디드 동네의 최강의 툴인 Trace32를 잘 활용해서 일찍 퇴근합시다.

+ Recent posts