Kernel panic log

 2107.232713 / 01-01 11:11:03.809][7] init: cannot find '/system/bin/qrngp' (No such file or directory), disabling 'qrngp'

[ 2107.239317 / 01-01 11:11:03.809][5] Unable to handle kernel NULL pointer dereference at virtual address 00000028

[ 2107.239351 / 01-01 11:11:03.809][5] pgd = e37ec000

[ 2107.239366 / 01-01 11:11:03.809][0] [00000028] *pgd=00000000

[ 2107.239388 / 01-01 11:11:03.809][5] Internal error: Oops: 5 [#1] PREEMPT SMP ARM

[ 2107.239405 / 01-01 11:11:03.809][0] Modules linked in: texfat(PO)

[ 2107.239433 / 01-01 11:11:03.809][5] CPU: 5 PID: 2803 Comm: sensorservice Tainted: P        W  O   3.18.31-perf-gd069b48-00001-g8a6d6e5 #1

[ 2107.239452 / 01-01 11:11:03.809][5] task: e3ffb700 ti: e37f4000 task.ti: e37f4000

[ 2107.239479 / 01-01 11:11:03.809][5] PC is at find_vma_links+0x1c/0x78

[ 2107.239499 / 01-01 11:11:03.809][5] LR is at vma_adjust+0x3a0/0x574


Equivalent callstacks can be restored using the following command.

crash>  bt -I C01002D8 -S  E37F5DD0 0xE3FFB700

PID: 2803   TASK: e3ffb700  CPU: 5   COMMAND: "sensorservice"

bt: WARNING:  stack address:0xe37f5b98, program counter:0xc0ee5b60

 #0 [<c01002d8>] (do_DataAbort) from [<c010ad58>]

    pc : [<c01f980c>]    lr : [<c01fa708>]    psr: 200f0013

    sp : e37f5ec4  ip : 00000034  fp : e8236d9c

    r10: 00000000  r9 : 00000000  r8 : b2c99000

    r7 : c4616c80  r6 : e8236d68  r5 : 7f555034  r4 : 7f555034

    r3 : e37f5f04  r2 : b2c97000  r1 : b2c95000  r0 : 7f555038

    Flags: nzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM

 #1 [<c010ad58>] (__dabt_svc) from [<c01fa708>]

 #2 [<c01f980c>] (find_vma_links) from [<c01fa708>]

 #3 [<c01fa708>] (vma_adjust) from [<c01fa9e4>]

 #4 [<c01fa9e4>] (__split_vma) from [<c01fb364>]

 #5 [<c01fb364>] (do_munmap) from [<c01fb558>]

 #6 [<c01fb558>] (vm_munmap) from [<c0106820>]


Execution profile at the moment of kernel panic

[1]: R12 is updated as 0x34 where 0x34 = *0x7f555038 = *R0

[2]: Data abort is raised since base address for LDR execution is 0x28(0x34-0xC = R12-0xC)

0xc01f97f0 <find_vma_links>:    push    {r4, r5, lr}

0xc01f97f4 <find_vma_links+0x4>:        add     r0, r0, #4

0xc01f97f8 <find_vma_links+0x8>:        mov     r4, #0

0xc01f97fc <find_vma_links+0xc>:        mov     r5, r4

0xc01f9800 <find_vma_links+0x10>:       ldr     r12, [r0]  //<<--[1]

0xc01f9804 <find_vma_links+0x14>:       cmp     r12, #0

0xc01f9808 <find_vma_links+0x18>:       beq     0xc01f983c <find_vma_links+76>

0xc01f980c <find_vma_links+0x1c>:       ldr     r0, [r12, #-12] //<<--[2]


When tracing back to the context where the call to find_vma_links() is made inside vma_adjust(), R0 is copied from R7(0xC4616C80) whose data structure type is (struct mm_struct*) .

0xc01fa6e8 <vma_adjust+896>:    mov     r0, r7

0xc01fa6ec <vma_adjust+900>:    str     r3, [sp]

0xc01fa6f0 <vma_adjust+904>:    add     r3, sp, #60     ; 0x3c

0xc01fa6f4 <vma_adjust+908>:    str     r3, [sp, #4]

0xc01fa6f8 <vma_adjust+912>:    add     r3, sp, #52     ; 0x34

0xc01fa6fc <vma_adjust+916>:    ldr     r1, [r2]

0xc01fa700 <vma_adjust+920>:    ldr     r2, [r2, #4]

0xc01fa704 <vma_adjust+924>:    bl      0xc01f97f0 <find_vma_links>  //<<--


Identical debug information

crash> struct task_struct.mm e3ffb700

  mm = 0xc4616d80


At logical level, when find_vma_links() is called, R0 should have been 0xC4616C80 instead of 0x7f555038. This signature cannot be understandable from SW point of view.


This issue ended up with replacing defective PMIC with normal one in the problematic target device.


When I enter the command adb shell "cat /d/shrinker", the system crashes with 100% after dumping the following kernel message as below.

[  761.636711] Unable to handle kernel paging request at virtual address f38a9a84

[  761.645048] pgd = e8074000

[  761.649800] [f38a9a84] *pgd=a0721811, *pte=00000000, *ppte=00000000

[  761.658106] Internal error: Oops: 7 [#1] PREEMPT SMP ARM

[  761.665481] Modules linked in:

[  761.670605] CPU: 6 PID: 6978 Comm: sh Tainted: G        W      3.18.31-g5b0528e-dirty #12

[  761.680852] task: eb5b8000 ti: d228e000 task.ti: d228e000

[  761.688315] PC is at do_raw_spin_lock+0x8/0x17c

[  761.694912] LR is at list_lru_count_node+0x14/0x60

//--- skip ---

[  761.885719] [<c016a650>] (do_raw_spin_lock) from [<c02057f4>] (list_lru_count_node+0x14/0x60)

[  761.896307] [<c02057f4>] (list_lru_count_node) from [<c022dffc>] (super_cache_count+0x40/0x94)

[  761.906982] [<c022dffc>] (super_cache_count) from [<c01f2708>] (debug_shrinker_show+0x44/0x7c)

[  761.917658] [<c01f2708>] (debug_shrinker_show) from [<c024a56c>] (seq_read+0x1a4/0x3f0)

[  761.927726] [<c024a56c>] (seq_read) from [<c022c87c>] (vfs_read+0x88/0x138)

[  761.936755] [<c022c87c>] (vfs_read) from [<c022cda8>] (SyS_read+0x3c/0x7c)

[  761.945695] [<c022cda8>] (SyS_read) from [<c0106840>] (ret_fast_syscall+0x0/0x38)

[  761.955242] Code: e12fff1e deaf1eed e92d47f0 e1a08000 (e5902004) 


After loading coredump using T32 program, we can find out that the data abort is triggered inside debug_spin_lock_before() function during sanity-check procedure of spinlock where list_lru_count_node() is attempting to acquire.


-000|do_DataAbort(addr = 170, fsr = 3247439880, regs = 0xEC4901C0)

-001|__dabt_svc(asm)

 -->|exception

-002|debug_spin_lock_before(inline)  //<<-- data abort

-002|do_raw_spin_lock(lock = 0xF38A9A80)

-003|list_lru_count_node(?, ?)

-004|super_cache_count(shrink = 0xEBC47DE0, sc = 0xD228FEF8)

-005|debug_shrinker_show(s = 0xE7CF63C0, ?)

-006|seq_read(file = 0xE805CA00, buf = 0xB3246000, size = 4096, ppos = 0xD228FF80)

-007|vfs_read(file = 0xFFFFFFFF, buf = 0x0, ?, pos = 0xD228FF80)

-008|SYSC_read(inline)

-008|sys_read(?, buf = -1289461760, count = 4096)

-009|ret_fast_syscall(asm)



Upon acquiring spinlock, all members of raw_spinlock_t(0xF38A9A80) have been scribbled.


It is necessary to examine the variable which contains spinlock as member.


    (register raw_spinlock_t *) [R0] lock = 0xF38A9A80 -> (

      (arch_spinlock_t) [NSD:0xF38A9A80] raw_lock = (

        (u32) [NSD:0xF38A9A80] slock = 0x6F632F34,

        (struct __raw_tickets) [NSD:0xF38A9A80] tickets = (

          (u16) [NSD:0xF38A9A80] owner = 0x2F34, //<<--

          (u16) [NSD:0xF38A9A82] next = 0x6F63)), //<<--

      (unsigned int) [NSD:0xF38A9A84] magic = 0x6E6F6D6D //<<--

      (unsigned int) [NSD:0xF38A9A88] owner_cpu = 0x6F6F675F,

      (void *) [NSD:0xF38A9A8C] owner = 0x5F656C67))



The address of spin_lock is passed from &nlru->lock which is responsible for saving &lru->node[nid].


51unsigned long

52list_lru_count_node(struct list_lru *lru, int nid)

53{

54 unsigned long count = 0;

55 struct list_lru_node *nlru = &lru->node[nid];  <<--

56

57 spin_lock(&nlru->lock);



The list_lru_count_node() is called inside super_cache_count() [128 line.].


108static unsigned long super_cache_count(struct shrinker *shrink,

109           struct shrink_control *sc)

110{

111 struct super_block *sb;

112 long total_objects = 0;

113

114 sb = container_of(shrink, struct super_block, s_shrink);

//snip

124 if (sb->s_op && sb->s_op->nr_cached_objects)

125  total_objects = sb->s_op->nr_cached_objects(sb,

126       sc->nid);

127

128 total_objects += list_lru_count_node(&sb->s_dentry_lru, //<<--

129       sc->nid);


Tracing back to the caller function of super_cache_count() using T32 program, we can find out that debug_shrinker_show() calls super_cache_count().


Here, the second parameter in a call to super_cache_count() is &sc defined as local variable whose memory location belongs to stack area, which looks very *odd*. odd debug information.

205static int debug_shrinker_show(struct seq_file *s, void *unused)

206{

207 struct shrinker *shrinker;

208 struct shrink_control sc;

209

210 sc.gfp_mask = -1;

211 sc.nr_to_scan = 0;

212

213 down_read(&shrinker_rwsem);

214 list_for_each_entry(shrinker, &shrinker_list, list) {

215  int num_objs;

216

217  num_objs = shrinker->count_objects(shrinker, &sc);  //<<-- shrinker->count_objects = super_cache_count


Here, let me wrap up the call trace flow as follows.

1. shrinker->count_objects(shrinker, &sc); // <<-- super_cache_count


2. list_lru_count_node(&sb->s_dentry_lru, sc->nid); // The second parameter sc->nid contains garbage value(since it is defined as local variable without initialization)


3. list_lru_count_node(struct list_lru *lru, int nid) // <<-- nid(sc->nid): garbage value

  3.1 struct list_lru_node *nlru = &lru->node[nid]; // &lru->node[nid]; // access lru->node from  nid(garbage value)

  3.2 spin_lock(&nlru->lock); // &nlru->lock: garbage address


Solution

After visiting linux kernel community and google web-site, we have found that google already generated revert commit

https://chromium-review.googlesource.com/c/345895/


The commit message below seems to contain identical debugging signature what we analyzed(shrink_control.nid is used but not initialzed)


Commit log

ANDROID: Revert "mm: vmscan: Add a debug file for shrinkers"


Kernel panic when type "cat /sys/kernel/debug/shrinker"


Unable to handle kernel paging request at virtual address 0af37d40

pgd = d4dec000

[0af37d40] *pgd=00000000

Internal error: Oops: 5 [#1] PREEMPT SMP ARM

[<c0bb8f24>] (_raw_spin_lock) from [<c020aa08>] (list_lru_count_one+0x14/0x28)

[<c020aa08>] (list_lru_count_one) from [<c02309a8>] (super_cache_count+0x40/0xa0)

[<c02309a8>] (super_cache_count) from [<c01f6ab0>] (debug_shrinker_show+0x50/0x90)

[<c01f6ab0>] (debug_shrinker_show) from [<c024fa5c>] (seq_read+0x1ec/0x48c)

[<c024fa5c>] (seq_read) from [<c022e8f8>] (__vfs_read+0x20/0xd0)

[<c022e8f8>] (__vfs_read) from [<c022f0d0>] (vfs_read+0x7c/0x104)

[<c022f0d0>] (vfs_read) from [<c022f974>] (SyS_read+0x44/0x9c)

[<c022f974>] (SyS_read) from [<c0107580>] (ret_fast_syscall+0x0/0x3c)

Code: e1a04000 e3a00001 ebd66b39 f594f000 (e1943f9f)

---[ end trace 60c74014a63a9688 ]---

Kernel panic - not syncing: Fatal exception


shrink_control.nid is used but not initialzed, same for

shrink_control.memcg.


This reverts commit b0e7a582b2264cdf75874dcd8df915b6b4427755.


Reported-by: Xiaowen Liu <xiaowen.liu@freescale.com>

Signed-off-by: Dmitry Shmidt <dimitrysh@google.com>

(cherry picked from https://android.googlesource.com/kernel/common android-4.4

 commit ad95c12f66df9efae04b15d5c4d0d0ba56ab2620)


BUG=chromium:612862

TEST=build and boot on a few devices

Debugging 

Kernel panic occurs at 68 line inside __list_del_entry() whose caller function is process_one_work().

Code Review at the moment of kernel panic


49void __list_del_entry(struct list_head *entry)

50{

51     struct list_head *prev, *next;

52

53     prev = entry->prev;

54     next = entry->next;

55

56    if (WARN(next == LIST_POISON1,

57                      "list_del corruption, %p->next is LIST_POISON1 (%p)\n",

58                      entry, LIST_POISON1) ||

59       WARN(prev == LIST_POISON2,

60                     "list_del corruption, %p->prev is LIST_POISON2 (%p)\n",

61                     entry, LIST_POISON2) ||

62      WARN(prev->next != entry,

63                    "list_del corruption. prev->next should be %p, "

64                    "but was %p\n", entry, prev->next) ||

65      WARN(next->prev != entry,

66                    "list_del corruption. next->prev should be %p, "

67                   "but was %p\n", entry, next->prev)) {

68                   BUG_ON(PANIC_CORRUPTION); // <<-- kernel panic

69     return;

70 }

71

72     __list_del(prev, next);

73}


Callstacks

-000|do_undefinstr(regs = 0x0)

-001|__und_svc_fault(asm)

-->|exception

-002|__list_del_entry(?) // <<-- kernel panic

-003|process_one_work(worker = 0xEC048600, work = 0xEA2585AC)

-004|worker_thread(__worker = 0x0)

-005|kthread(_create = 0xEB9B3000)

-006|ret_from_fork(asm)

-007|ret_fast_syscall(asm)

The list_del_init() is called inside process_one_work() whose parameter is &work->entry.

Now, it is important to debug the state of &work->entry

static void process_one_work(struct worker *worker, struct work_struct *work)

__releases(&pool->lock)

__acquires(&pool->lock)

{

    struct pool_workqueue *pwq = get_work_pwq(work);

    struct worker_pool *pool = worker->pool;

    bool cpu_intensive = pwq->wq->flags & WQ_CPU_INTENSIVE;

    int work_color;

    struct worker *collision;

//snip

/* claim and dequeue */

    debug_work_deactivate(work);

    hash_add(pool->busy_hash, &worker->hentry, (unsigned long)work);

    worker->current_work = work;

    worker->current_func = work->func;

    worker->current_pwq = pwq;

    work_color = get_work_color(work);

    list_del_init(&work->entry);

Going back to process_one_work() function frame using T32, I can have the following debug information

[1],[2]: next element is corrupted

[3]: worker.current_func is found to be irq_affinity_notify which looks abnormal.

process_one_work(

    worker = 0xEC048600 -> (

        entry = (next = 0x0, prev = 0xEC5AE968),

        hentry = (next = 0x0, pprev = 0xEC5AE968),

        current_work = 0xEA2585AC,

        current_func = 0xC016FD40,

        current_pwq = 0xEC5B2400,

        desc_valid = FALSE,

The memory content around 0xEC048600 seems to be corrupted especially for irq_affinity_notify at 0xEC04860C.

NSD:EC0485F8| 5A 5A 5A 5A 0x5A5A5A5A

NSD:EC0485FC| 5A 5A 5A 5A 0x5A5A5A5A

NSD:EC048600| 00 00 00 00 0x0 // <<--

NSD:EC048604| 68 E9 5A EC 0xEC5AE968

NSD:EC048608| AC 85 25 EA 0xEA2585AC

NSD:EC04860C| 40 FD 16 C0 0xC016FD40 \\vmlinux\manage\irq_affinity_notify // <<--

NSD:EC048610| 00 24 5B EC 0xEC5B2400

NSD:EC048614| 00 00 00 00 0x0

NSD:EC048618| 18 86 04 EC 0xEC048618

NSD:EC04861C| 18 86 04 EC 0xEC048618

NSD:EC048620| C0 57 2A EC 0xEC2A57C0

NSD:EC048624| 00 E8 5A EC 0xEC5AE800

NSD:EC048628| E8 E0 04 E9 0xE904E0E8

NSD:EC04862C| F0 E9 5A EC 0xEC5AE9F0

NSD:EC048630| AC 8D FF FF 0xFFFF8DAC

Searching "irq_affinity_notify" key word enables me to the definition of irq_affinity_notify structure.

struct irq_affinity_notify {

    unsigned int irq;

    struct kref kref;

    struct work_struct work;

    void (*notify)(struct irq_affinity_notify *, const cpumask_t *mask);

    void (*release)(struct kref *ref);

};



By casting 0xEA2585AC as struct work_struct, the following debug information can be deduced which looks very odd.

The struct work_struct.entry.next is corrupted as NULL and work_func_t is registered as irq_affinity_notify().

(struct work_struct *) [-] (struct work_struct*)0xEA2585AC = 0xEA2585AC = __bss_stop+0x2857B9A4 -> (

  (atomic_long_t) [D:0xEA2585AC] data = ((int) [D:0xEA2585AC] counter = -329571323 = 0xEC5B2405 = '.[

    (struct list_head) [D:0xEA2585B0] entry = (

    (struct list_head *) [D:0xEA2585B0] next = 0xE9E9EAB4 = __bss_stop+0x281C1EAC -> (

      (struct list_head *) [D:0xE9E9EAB4] next = 0x0 = -> NULL,

    (struct list_head *) [D:0xE9E9EAB8] prev = 0x0 = -> NULL),

  (struct list_head *) [D:0xEA2585B4] prev = 0xEC5AE820 = -> ((struct list_head *) [D:0xEC5AE820]

 (work_func_t) [D:0xEA2585B8] func = 0xC016FD40 = irq_affinity_notify -> )


Looking into the symbol information at 0xEA2585AC address, I can have the following symbols below.

(i.e) irq_affinity_notify, pm_qos_irq_notify, pm_qos_irq_release, pm_qos_work_fn

________address|_data________|value_____________|symbol

NSD:EA25859C| FF 00 00 00 0xFF \\vmlinux\Global\up_b_offset+0xCF

NSD:EA2585A0| 9D 00 00 00 0x9D \\vmlinux\Global\up_b_offset+0x6D

NSD:EA2585A4| 9D 00 00 00 0x9D \\vmlinux\Global\up_b_offset+0x6D

NSD:EA2585A8| 03 00 00 00 0x3

NSD:EA2585AC| 05 24 5B EC 0xEC5B2405

NSD:EA2585B0| B4 EA E9 E9 0xE9E9EAB4

NSD:EA2585B4| 20 E8 5A EC 0xEC5AE820

NSD:EA2585B8| 40 FD 16 C0 0xC016FD40 \\vmlinux\manage\irq_affinity_notify

NSD:EA2585BC| 58 90 16 C0 0xC0169058 \\vmlinux\kernel/power/qos\pm_qos_irq_notify

NSD:EA2585C0| 00 90 16 C0 0xC0169000 \\vmlinux\kernel/power/qos\pm_qos_irq_release

NSD:EA2585C4| 00 94 35 77 0x77359400

NSD:EA2585C8| C8 85 25 EA 0xEA2585C8

NSD:EA2585CC| C8 85 25 EA 0xEA2585C8

NSD:EA2585D0| 14 81 86 C1 0xC1868114 \\vmlinux\adreno\device_3d0+0x45C

NSD:EA2585D4| F8 C6 C4 E9 0xE9C4C6F8

NSD:EA2585D8| 01 00 00 00 0x1

NSD:EA2585DC| E0 FF FF FF 0xFFFFFFE0

NSD:EA2585E0| E0 85 25 EA 0xEA2585E0

NSD:EA2585E4| E0 85 25 EA 0xEA2585E0

NSD:EA2585E8| 10 8F 16 C0 0xC0168F10 \\vmlinux\kernel/power/qos\pm_qos_work_fn

NSD:EA2585EC| 00 00 00 00 0x0

NSD:EA2585F0| 00 00 00 00 0x0

NSD:EA2585F4| 00 00 00 00 0x0

NSD:EA2585F8| 02 00 B8 EB 0xEBB80002

NSD:EA2585FC| A8 77 13 C0 0xC01377A8 \\vmlinux\workqueue\delayed_work_timer_fn

NSD:EA258600| DC 85 25 EA 0xEA2585DC

When searching pm_qos_irq_notify in the kernel source code, I can see that pm_qos_irq_notify is registered as req->irq_notify.notify.

void pm_qos_add_request(struct pm_qos_request *req,

int pm_qos_class, s32 value)

{

//snip 

case PM_QOS_REQ_AFFINE_IRQ:

    if (irq_can_set_affinity(req->irq)) {

         int ret = 0;

         struct irq_desc *desc = irq_to_desc(req->irq);

         struct cpumask *mask = desc->irq_data.affinity;

         /* Get the current affinity */

         cpumask_copy(&req->cpus_affine, mask);

         req->irq_notify.irq = req->irq;

         req->irq_notify.notify = pm_qos_irq_notify;

         req->irq_notify.release = pm_qos_irq_release;

I can see that pm_qos_irq_notify() as well as pm_qos_irq_release() is registered as struct irq_affinity_notify.

struct irq_affinity_notify {

    unsigned int irq;

    struct kref kref;

    struct work_struct work;

    void (*notify)(struct irq_affinity_notify *, const cpumask_t *mask); // pm_qos_irq_notify

    void (*release)(struct kref *ref); // pm_qos_irq_release

};


struct pm_qos_request {

    enum pm_qos_req_type type;

    struct cpumask cpus_affine;

#ifdef CONFIG_SMP

    uint32_t irq;

/* Internal structure members */

    struct irq_affinity_notify irq_notify;

#endif

    struct plist_node node;

    int pm_qos_class;

    struct delayed_work work; /* for pm_qos_update_request_timeout */

};

The 0xEA2585A4 address is starting point where struct irq_affinity_notify element is located.

(struct irq_affinity_notify *) [-] (struct irq_affinity_notify*)0xEA2585A4

(unsigned int) [D:0xEA2585A4] irq = 157 = 0x9D = '....',

(struct kref) [D:0xEA2585A8] kref = (

   (atomic_t) [D:0xEA2585A8] refcount = ((int) [D:0xEA2585A8] counter = 3 = 0x3 = '....')),

(struct work_struct) [D:0xEA2585AC] work = (

   (atomic_long_t) [D:0xEA2585AC] data = ((int) [D:0xEA2585AC] counter = -329571323 = 0xEC5B2

   (struct list_head) [D:0xEA2585B0] entry = (

      (struct list_head *) [D:0xEA2585B0] next = 0xE9E9EAB4

      (struct list_head *) [D:0xEA2585B4] prev = 0xEC5AE820

   (work_func_t) [D:0xEA2585B8] func = 0xC016FD40 = irq_affinity_notify -> ),

   (void (*)()) [D:0xEA2585BC] notify = 0xC0169058 = pm_qos_irq_notify -> ,

   (void (*)()) [D:0xEA2585C0] release = 0xC0169000 = pm_qos_irq_release -> )

________address|_data________|value_____________|symbol

NSD:EA25859C| FF 00 00 00 0xFF \\vmlinux\Global\up_b_offset+0xCF

NSD:EA2585A0| 9D 00 00 00 0x9D \\vmlinux\Global\up_b_offset+0x6D

NSD:EA2585A4| 9D 00 00 00 0x9D //struct irq_affinity_notify.irq

NSD:EA2585A8| 03 00 00 00 0x3 //struct irq_affinity_notify.kref

NSD:EA2585AC| 05 24 5B EC 0xEC5B2405 //struct irq_affinity_notify.work.data

NSD:EA2585B0| B4 EA E9 E9 0xE9E9EAB4 //struct irq_affinity_notify.work.entry.next

NSD:EA2585B4| 20 E8 5A EC 0xEC5AE820 //struct irq_affinity_notify.work.entry.prev

NSD:EA2585B8| 40 FD 16 C0 0xC016FD40 \\vmlinux\manage\irq_affinity_notify //struct irq_affinity_notify.work.func

NSD:EA2585BC| 58 90 16 C0 0xC0169058 \\vmlinux\kernel/power/qos\pm_qos_irq_notify

NSD:EA2585C0| 00 90 16 C0 0xC0169000 \\vmlinux\kernel/power/qos\pm_qos_irq_release

NSD:EA2585C4| 00 94 35 77 0x77359400

NSD:EA2585C8| C8 85 25 EA 0xEA2585C8

NSD:EA2585CC| C8 85 25 EA 0xEA2585C8

NSD:EA2585D0| 14 81 86 C1 0xC1868114 \\vmlinux\adreno\device_3d0+0x45C

NSD:EA2585D4| F8 C6 C4 E9 0xE9C4C6F8

NSD:EA2585D8| 01 00 00 00 0x1

NSD:EA2585DC| E0 FF FF FF 0xFFFFFFE0

NSD:EA2585E0| E0 85 25 EA 0xEA2585E0

NSD:EA2585E4| E0 85 25 EA 0xEA2585E0

NSD:EA2585E8| 10 8F 16 C0 0xC0168F10 \\vmlinux\kernel/power/qos\pm_qos_work_fn

NSD:EA2585EC| 00 00 00 00 0x0

NSD:EA2585F0| 00 00 00 00 0x0

NSD:EA2585F4| 00 00 00 00 0x0

NSD:EA2585F8| 02 00 B8 EB 0xEBB80002

NSD:EA2585FC| A8 77 13 C0 0xC01377A8 \\vmlinux\workqueue\delayed_work_timer_fn

NSD:EA258600| DC 85 25 EA 0xEA2585DC


155: 1608 0 0 0 0 0 0 0 GIC mmc0

157: 98 0 0 0 0 0 0 0 GIC mmc1

166: 10 0 0 0 0 0 0 0 GIC msm_otg,msm_hsusb


Taking a looking at the structure associated with struct irq_affinity_notify,

struct pm_qos_request seems to include struct irq_affinity_notify irq_notify.

struct pm_qos_request {

    enum pm_qos_req_type type;

    struct cpumask cpus_affine;

#ifdef CONFIG_SMP

    uint32_t irq;

/* Internal structure members */

    struct irq_affinity_notify irq_notify;

#endif

    struct plist_node node;

    int pm_qos_class;

    struct delayed_work work; /* for pm_qos_update_request_timeout */

};

By casting 0xEA258598 as struct pm_qos_request, the whole picture is drawn as below.

(i.e) type = PM_QOS_REQ_AFFINE_IRQ

(struct pm_qos_request *) [-] (struct pm_qos_request*)0xEA258598 = 0xEA258598 = __bss_sto

  (enum pm_qos_req_type) [D:0xEA258598] type = PM_QOS_REQ_AFFINE_IRQ = 2 = 0x2 = '....',

  (struct cpumask) [D:0xEA25859C] cpus_affine = (

    (long unsigned int [1]) [D:0xEA25859C] bits = (

    [0] = 255 = 0xFF = '....')),

  (uint32_t) [D:0xEA2585A0] irq = 157 = 0x9D = '....',

  (struct irq_affinity_notify) [D:0xEA2585A4] irq_notify = ((unsigned int) [D:0xEA2585A4]  

  (struct plist_node) [D:0xEA2585C4] node = ((int) [D:0xEA2585C4] prio = 2000000000 = 0x7

  (int) [D:0xEA2585D8] pm_qos_class = 1 = 0x1 = '....',

  (struct delayed_work) [D:0xEA2585DC] work = (

   (struct work_struct) [D:0xEA2585DC] work = (

    (atomic_long_t) [D:0xEA2585DC] data = ((int) [D:0xEA2585DC] counter = -32 = 0xFFFFF

    (struct list_head) [D:0xEA2585E0] entry = ((struct list_head *) [D:0xEA2585E0] next

  (work_func_t) [D:0xEA2585E8] func = 0xC0168F10 = pm_qos_work_fn -> ),

  (struct timer_list) [D:0xEA2585EC] timer = ((struct list_head) [D:0xEA2585EC] entry =

 (struct workqueue_struct *) [D:0xEA258620] wq = 0x0 = -> NULL,

 (int) [D:0xEA258624] cpu = 0 = 0x0 = '....'))

(where)

________address|_data________|value_____________|symbol

NSD:EA258590| 00 00 00 00 0x0

NSD:EA258594| 01 00 00 00 0x1

NSD:EA258598| 02 00 00 00 0x2 //enum pm_qos_req_type type;

NSD:EA25859C| FF 00 00 00 0xFF //struct cpumask cpus_affine;

NSD:EA2585A0| 9D 00 00 00 0x9D //uint32_t irq;

NSD:EA2585A4| 9D 00 00 00 0x9D //struct irq_affinity_notify

NSD:EA2585A8| 03 00 00 00 0x3

NSD:EA2585AC| 05 24 5B EC 0xEC5B2405

NSD:EA2585B0| B4 EA E9 E9 0xE9E9EAB4

NSD:EA2585B4| 20 E8 5A EC 0xEC5AE820

NSD:EA2585B8| 40 FD 16 C0 0xC016FD40 \\vmlinux\manage\irq_affinity_notify

NSD:EA2585BC| 58 90 16 C0 0xC0169058 \\vmlinux\kernel/power/qos\pm_qos_irq_notify

NSD:EA2585C0| 00 90 16 C0 0xC0169000 \\vmlinux\kernel/power/qos\pm_qos_irq_release

NSD:EA2585C4| 00 94 35 77 0x77359400

NSD:EA2585C8| C8 85 25 EA 0xEA2585C8

NSD:EA2585CC| C8 85 25 EA 0xEA2585C8

NSD:EA2585D0| 14 81 86 C1 0xC1868114 \\vmlinux\adreno\device_3d0+0x45C

NSD:EA2585D4| F8 C6 C4 E9 0xE9C4C6F8

NSD:EA2585D8| 01 00 00 00 0x1

NSD:EA2585DC| E0 FF FF FF 0xFFFFFFE0

NSD:EA2585E0| E0 85 25 EA 0xEA2585E0

NSD:EA2585E4| E0 85 25 EA 0xEA2585E0

NSD:EA2585E8| 10 8F 16 C0 0xC0168F10 \\vmlinux\kernel/power/qos\pm_qos_work_fn

NSD:EA2585EC| 00 00 00 00 0x0

The (*(((struct pm_qos_request*)0xEA258598))).work.work.data is equal to 0xFFFFFFE0.

[-] (struct pm_qos_request*)0xEA258598 = 0xEA258598 -> (

//snip

D:0xEA2585DC] work = (

[D:0xEA2585DC] work = (

[D:0xEA2585DC] data = ([D:0xEA2585DC] counter = 0xFFFFFFE0), // <<-- WORK_STRUCT_NO_POOL

[D:0xEA2585E0] entry = ([D:0xEA2585E0] next = 0xEA2585E0, [D:0xEA2585E4] prev = 0xEA2585E0),

[D:0xEA2585E8] func = 0xC0168F10),

[D:0xEA2585EC] timer = ([D:0xEA2585EC] entry = ([D:0xEA2585EC] next = 0x0, [D:0xEA2585F0] prev = 0x0), [

[D:0xEA258620] wq = 0x0,

[D:0xEA258624] cpu = 0x0))

So it is needed to apply the following patch to see if the same crash occurs since 0xFFFFFFE0,

where 0xFFFFFFE0 = WORK_STRUCT_NO_POOL = (unsigned long)WORK_OFFQ_POOL_NONE << WORK_OFFQ_POOL_SHIFT,

diff --git a/kernel/power/qos.c b/kernel/power/qos.c

index 5f4c006..a122891 100644

--- a/kernel/power/qos.c

+++ b/kernel/power/qos.c

@@ -405,6 +405,9 @@ void pm_qos_update_request(struct pm_qos_request *req,

return;

}

+     if (work_is_canceling(&req->work) )

+        return;

+

      cancel_delayed_work_sync(&req->work);

      __pm_qos_update_request(req, new_value);

}

@@ -427,6 +430,9 @@ void pm_qos_update_request_timeout(struct pm_qos_request *req, s32 new_value,

"%s called for unknown object.", __func__))

return;

+     if (work_is_canceling(&req->work) )

+         return;

+

      cancel_delayed_work_sync(&req->work);

trace_pm_qos_update_request_timeout(req->pm_qos_class,

@@ -458,6 +464,9 @@ void pm_qos_remove_request(struct pm_qos_request *req)

return;

}

+     if (work_is_canceling(&req->work) )

+         return;

+

       cancel_delayed_work_sync(&req->work);

       trace_pm_qos_remove_request(req->pm_qos_class, PM_QOS_DEFAULT_VALUE);


static bool work_is_canceling(struct work_struct *work)

{

    unsigned long data = atomic_long_read(&work->data);

    return !(data & WORK_STRUCT_PWQ) && (data & WORK_OFFQ_CANCELING);

}

(where)

WORK_STRUCT_PWQ = 0x4, WORK_OFFQ_CANCELING = 0x10


In-depth Analysis


The work->data is set as WORK_STRUCT_NO_POOL inside clear_work_data() which is called from

static void clear_work_data(struct work_struct *work)

{

    smp_wmb(); /* see set_work_pool_and_clear_pending() */

    set_work_data(work, WORK_STRUCT_NO_POOL, 0);

}


static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)

{

    static DECLARE_WAIT_QUEUE_HEAD(cancel_waitq);

    unsigned long flags;

    int ret;

    do {

        ret = try_to_grab_pending(work, is_dwork, &flags);

        if (unlikely(ret == -ENOENT)) {

               struct cwt_wait cwait;

               init_wait(&cwait.wait);

               cwait.wait.func = cwt_wakefn;

               cwait.work = work;

              prepare_to_wait_exclusive(&cancel_waitq, &cwait.wait,

              TASK_UNINTERRUPTIBLE);

              if (work_is_canceling(work))

                         schedule();

              finish_wait(&cancel_waitq, &cwait.wait);

      }

    } while (unlikely(ret < 0));

/* tell other tasks trying to grab @work to back off */ 

    mark_work_canceling(work);

    local_irq_restore(flags);

    flush_work(work); 

    clear_work_data(work); // <<-- 


I can restore callstack using T32 as followings;

[<c001df40>] do_page_fault+0x338/0x3f8 

[<c0008544>] do_DataAbort+0x38/0x98 

[<c0015058>] __dabt_svc+0x38/0x60 

[<c031a5a0>] tty_wakeup+0xc/0x64 

[<c06f82ec>] gs_start_io+0x94/0xf4 

[<c06f8698>] gserial_connect+0xe0/0x180

[<c06f7578>] acm_set_alt+0x88/0x1a8 

[<c06f3c5c>] composite_setup+0xd34/0x1520 

[<c070d154>] android_setup+0x1f4/0x1fc 

[<c03fc6e8>] forward_to_driver+0x64/0x100 

[<c03fd418>] musb_g_ep0_irq+0x7d8/0x1c18 

[<c03fb094>] musb_interrupt+0x94/0xc78 

[<c04024f0>] generic_interrupt+0xc34/0x1218 

[<c009b020>] handle_irq_event_percpu+0xe0/0x2e4

[<c009b268>] handle_irq_event+0x44/0x64

[<c009df54>] handle_fasteoi_irq+0xe8/0x1a4 

[<c009a7dc>] __handle_domain_irq+0x104/0x264 

[<c0008668>] gic_handle_irq+0x2c/0x64 

[<c00153a8>] __irq_usr+0x48/0x60


Kernel panic occurs because R0 is holding 0x0 whose instance is (struct tty_struct *tty).

NSR:C031A594|E1A0C00D tty_wakeup: cpy r12,r13

NSR:C031A598|E92DD830 push {r4-r5,r11-r12,r14,pc}

NSR:C031A59C|E24CB004 sub r11,r12,#0x4 ; r11,r12,#4

NSR:C031A5A0|E59031CC_______________ldr____r3,r0,#0x1CC  //<--


Let me take times to look into tty_wakeup() more.

void tty_wakeup(struct tty_struct *tty)

{

    struct tty_ldisc *ld;


    if (test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) { // <<-- kernel panic

        ld = tty_ldisc_ref(tty);


        if (ld)

        {

           if (ld->ops->write_wakeup) ld->ops->write_wakeup(tty);

           tty_ldisc_deref(ld);

        }

    }


    wake_up_interruptible_poll(&tty->write_wait, POLLOUT);

}


As kernel log indicates below right before kernel panic occurs, gs_close() is called as mdl_sock_host.

Inside gs_close(), port->port.tty is updated as NULL.

[ 6285.082083 / 06-01 11:43:40.318] -(4)[981:mdl_sock_host]gs_close: ttyGS1 (d3681900,d7e1ca80) ...


<< Code fragment >> 

static void gs_close(struct tty_struct *tty, struct file *file)

{

    struct gs_port *port = tty->driver_data;

    struct gserial *gser;


//snip

    if (gser == NULL)

        gs_buf_free(&port->port_write_buf);

    else

        gs_buf_clear(&port->port_write_buf);

        tty->driver_data = NULL;


        port->port.tty = NULL; // <<--

        port->openclose = false;


Right after gs_close() is called, acm driver calls tty_wakeup() function during enumeration operation, which causes kernel panic.


With following patch, kernel crash was fixed. 

diff --git a/drivers/usb/gadget/u_serial.c b/drivers/usb/gadget/u_serial.c

index 27a7cdc..4b88415 100644

? a/drivers/usb/gadget/u_serial.c

+++ b/drivers/usb/gadget/u_serial.c

@@ -784,8 +784,11 @@ static int gs_start_io(struct gs_port *port)

    port->n_read = 0;

    started = gs_start_rx(port);


+   if (!port->port_usb)

+      return -EIO;

+

/* unblock any pending writes into our circular buffer */


    if (started) {

+        if (started && port->port.tty) {

                tty_wakeup(port->port.tty);

         } else {

           gs_free_requests(ep, head, &port->read_allocated);


The call to tty_wakeup() is only made in case port->port.tty points to valid address.

Very interesting kernel panic occurs with the following callstacks.

crash> bt -I C0ED8B64 -S E853FD20  e5752c00

PID: 1787   TASK: e5752c00  CPU: 4   COMMAND: "python"

bt: WARNING:  stack address:0xe853fa38, program counter:0xc0ee5b60

 #0 [<c0ed8b64>] (panic) from [<c0125038>]

 #1 [<c0125038>] (__stack_chk_fail) from [<c032b6cc>]

 #2 [<c032b6cc>] (sock_has_perm) from [<c0327d00>]

 #3 [<c0327d00>] (security_socket_recvmsg) from [<c0ceb1c8>]

 #4 [<c0ceb1c8>] (sock_recvmsg) from [<c0cec474>]

 #5 [<c0cec474>] (___sys_recvmsg) from [<c0ced5b4>]

 #6 [<c0ced5b4>] (__sys_recvmsg) from [<c0106820>]


[1]: sock_has_perm() is first called from security_socket_recvmsg().. Stack: 0xE853FDD8

[2]: After a set of registers are pushed into stack area.. Stack: 0xE853FDBC(0xE853FDD8-0x1C)

[3]: The stack is pushed for local variable Stack: 0xE853FD68(0xE853FDBC-0x54)

[4]: R7 is holding 0xC1809948 where __stack_chk_guard is located.

(where: T32)

NSP:C032B6D4|C1809948            dcd     0xC1809948       ; __stack_chk_guard

(where: binary utility)

c032b6d4:       c1809948        .word   0xc1809948


crash> p &__stack_chk_guard

$3 = (unsigned long *) 0xc1809948 <__stack_chk_guard>


[5]: R3 is now holding stack canary magic dump 0xdc8cb01f where  R3 = 0xdc8cb01f =  *0xC1809948

crash> rd 0xC1809948

c1809948:  dc8cb01f    


[6]: The stack canary magic dump(0xdc8cb01f: R3) is stored into 0xE853FDB4(=0xE853FD68+0x4c = SP+0x4c)

[7]: To get stack canary magic dump, access 0xE853FDB4 where "ldr r2, [sp, #76] ; 0x4c" instruction is executed.

In this case, R2 is updated as 0xdc8cb01f.


[8]: R3 should have been 0xdc8cb01f instead of 0x0. The weird debug signature is that R7 holds 0xC1800048, which should have been 0xC1809948.

After the following instruction is executed, R7 is never changed within sock_has_perm().

  • 0xc032b638 <sock_has_perm+8>: ldr r7, [pc, #148]

[9]: kernel panic occurs


#Code

0xc032b630 <sock_has_perm>:     push    {r4, r5, r6, r7, r8, r9, lr} //<<--[2]

0xc032b634 <sock_has_perm+0x4>: sub     sp, sp, #84     ; 0x54  //<<--[3]

0xc032b638 <sock_has_perm+0x8>: ldr     r7, [pc, #148]  ; 0xc032b6d4 <sock_has_perm+0xa4> //<<--[4]

0xc032b63c <sock_has_perm+0xc>: add     r6, sp, #28

0xc032b640 <sock_has_perm+0x10>:        mov     r9, r0

0xc032b644 <sock_has_perm+0x14>:        ldr     r4, [r1, #420]  ; 0x1a4

0xc032b648 <sock_has_perm+0x18>:        mov     r5, r1

0xc032b64c <sock_has_perm+0x1c>:        mov     r8, r2

0xc032b650 <sock_has_perm+0x20>:        ldr     r3, [r7]  //<<--[5]

0xc032b654 <sock_has_perm+36>:  mov     r1, #0

0xc032b658 <sock_has_perm+40>:  mov     r2, #48 ; 0x30

0xc032b65c <sock_has_perm+44>:  mov     r0, r6

0xc032b660 <sock_has_perm+48>:  str     r3, [sp, #76]   ; 0x4c //<<--[6]

// snip

0xc032b6b8 <sock_has_perm+0x88>:        ldr     r2, [sp, #76]   ; 0x4c  //<<--[7]

0xc032b6bc <sock_has_perm+0x8c>:        ldr     r3, [r7]  //<<--[8]

0xc032b6c0 <sock_has_perm+0x90>:        cmp     r2, r3

0xc032b6c4 <sock_has_perm+0x94>:        beq     0xc032b6cc <sock_has_perm+156>

0xc032b6c8 <sock_has_perm+0x98>:        bl      0xc0125028 <__stack_chk_fail>  //<<--[9]


#Stack dump

NSD:E853FD5C| 00 00 00 00  0x0

NSD:E853FD60| 00 00 00 00  0x0

NSD:E853FD64| CC B6 32 C0  0xC032B6CC         \\vmlinux\hooks\sock_has_perm+0x9C

NSD:E853FD68| 74 FD 53 E8  0xE853FD74  //<<--[3]

NSD:E853FD6C| A0 FD 53 E8  0xE853FDA0

//snip

NSD:E853FDAC| 00 00 00 00  0x0

NSD:E853FDB0| 00 00 00 00  0x0

NSD:E853FDB4| 1F B0 8C DC  0xDC8CB01F  //<<--[6] location where stack canary magic is saved.

NSD:E853FDB8| 00 0A 82 ED  0xED820A00

NSD:E853FDBC| 00 9C C9 C3  0xC3C99C00  //<<--[R4]  //<<--[2] New Stack

NSD:E853FDC0| 00 00 01 00  0x10000    //<<--[R5]

NSD:E853FDC4| 7C FF 53 E8  0xE853FF7C   //<<--[R6]

NSD:E853FDC8| 00 00 00 00  0x0

NSD:E853FDCC| 00 2C 75 E5  0xE5752C00

NSD:E853FDD0| 00 00 00 00  0x0 //<<--[R9]

NSD:E853FDD4| 00 7D 32 C0  0xC0327D00  \\vmlinux\security\security_socket_recvmsg+0x14 //<<--[R14]

NSD:E853FDD8| 00 00 00 00  0x0  // <<--[1]


Please remember that  __stack_chk_fail symbol is pushed into the assemble code inside other Kernel APIs for stack corruption sanity-check.

0xc0100974 <do_one_initcall+404>:       bl      0xc0125028 <__stack_chk_fail>

0xc0100d8c <name_to_dev_t+828>: bl      0xc0125028 <__stack_chk_fail>

0xc0107854 <__show_regs+700>:   bl      0xc0125028 <__stack_chk_fail>

0xc01219c0 <sha384_neon_final+108>:     bl      0xc0125028 <__stack_chk_fail>

0xc0133c34 <sys_newuname+352>:  bl      0xc0125028 <__stack_chk_fail>

0xc0133d68 <sys_sethostname+276>:       bl      0xc0125028 <__stack_chk_fail>


In case of this coredump, the Linux Kernel is complied with fstack-protector option.

658 ifdef CONFIG_CC_STACKPROTECTOR_REGULAR

 659   stackp-flag := -fstack-protector

 660   ifeq ($(call cc-option, $(stackp-flag)),)

 661     $(warning Cannot use CONFIG_CC_STACKPROTECTOR_REGULAR: \

 662              -fstack-protector not supported by compiler)

 663   endif



Kernel panic occurs with defective device under the following call trace.

crash> bt -I C01002D8 -S  E7AABC08 0xE1804200

PID: 2285   TASK: e1804200  CPU: 5   COMMAND: "python"

bt: WARNING:  stack address:0xe7aabd80, program counter:0xc0ee5b60

 #0 [<c01002d8>] (do_DataAbort) from [<c010ad58>]

    pc : [<c01d7308>]    lr : [<c01d72ec>]    psr: 60020193

    sp : e7aabcf8  ip : c193e69c  fp : edf34bf4

    r10: 00000000  r9 : 0000001f  r8 : 00000002

    r7 : c1938280  r6 : c1938200  r5 : 00000010  r4 : ef4bddb4

    r3 : ef4bddb4  r2 : 00000100  r1 : 00000000  r0 : ef4bdda0

    Flags: nZCv  IRQs off  FIQs on  Mode SVC_32  ISA ARM

 #1 [<c010ad58>] (__dabt_svc) from [<c01d72ec>]

 #2 [<c01d7308>] (rmqueue_bulk.constprop.11) from [<c01d7540>]  //<<-- kernel panic

 #3 [<c01d7540>] (get_page_from_freelist) from [<c01d79c4>]

 #4 [<c01d79c4>] (__alloc_pages_nodemask) from [<c01f7bf4>]

 #5 [<c01f7bf4>] (handle_mm_fault) from [<c011525c>]

 #6 [<c011525c>] (do_page_fault) from [<c01002d8>]

 #7 [<c01002d8>] (do_DataAbort) from [<c010b03c>]


The data abort is raised since page.lru->next(R2) holds invalid address 0x100.

0xc01d72f4 <rmqueue_bulk.constprop.11+0x58>:    cmp     r10, #0

0xc01d72f8 <rmqueue_bulk.constprop.11+0x5c>:    add     r3, r0, #20

0xc01d72fc <rmqueue_bulk.constprop.11+0x60>:    ldreq   r2, [r4]

0xc01d7300 <rmqueue_bulk.constprop.11+0x64>:    ldrne   r2, [r4, #4]

0xc01d7304 <rmqueue_bulk.constprop.11+0x68>:    strne   r3, [r4, #4]

0xc01d7308 <rmqueue_bulk.constprop.11+0x6c>:    streq   r3, [r2, #4]  //<<-- data abort

crash> struct page.lru  0xEF4BDDA0  -px

    lru = {

      next = 0x100,  //<<--

      prev = 0x200

    }


After having code review, I have figured out that attribute of page is pcp(per-cpu page frame cache: buddy system, 0 order page)

static int rmqueue_bulk(struct zone *zone, unsigned int order,

   unsigned long count, struct list_head *list,

   int migratetype, bool cold)

{

 int i;


 spin_lock(&zone->lock);

 for (i = 0; i < count; ++i) {

  struct page *page;


//snip

  if (likely(!cold))

   list_add(&page->lru, list);  //<<--

  else

   list_add_tail(&page->lru, list);


To find out pcp address for CPU5, the following command s are used.

crash> p contig_page_data.node_zones[1].pageset

$5 = (struct per_cpu_pageset *) 0xc177ebdc


crash> struct per_cpu_pages  EDF34BDC

struct per_cpu_pages {

  count = 0x1,

  high = 0xba,

  batch = 0x1f,

  lists = {{

      next = 0xef51fc74,  //<<--MIGRATE_UNMOVABLE

      prev = 0xef51fc74

    }, {

      next = 0xedf34bf0, //<<--MIGRATE_RECLAIMABLE

      prev = 0xedf34bf0

    }, {

      next = 0xef4bdcd4,//<<--MIGRATE_MOVABLE

      prev = 0xef4bddf4

    }, {

      next = 0xedf34c00, //<<--MIGRATE_PCPTYPES

      prev = 0xedf34c00

    }}

}


(where) 0xEDF34BDC = 0xc177ebdc+0x2c7b6000

crash> p  __per_cpu_offset[5]

$7 = 0x2c7b6000


BTW the listed list 0xef4bdcd4 address is found to be corrupted as follows. 

crash> list 0x0 0xef4bdcd4

ef4bdcd4

ef4bdcf4

ef4bdd14

ef4bdd34

ef4bdd54

ef4bdd74

ef4bddb4

100

(where)

 #0 [<c01002d8>] (do_DataAbort) from [<c010ad58>]

    pc : [<c01d7308>]    lr : [<c01d72ec>]    psr: 60020193

    sp : e7aabcf8  ip : c193e69c  fp : edf34bf4

    r10: 00000000  r9 : 0000001f  r8 : 00000002

    r7 : c1938280  r6 : c1938200  r5 : 00000010  r4 : ef4bddb4

    r3 : ef4bddb4  r2 : 00000100  r1 : 00000000  r0 : ef4bdda0

    Flags: nZCv  IRQs off  FIQs on  Mode SVC_32  ISA ARM

 #1 [<c010ad58>] (__dabt_svc) from [<c01d72ec>]

 #2 [<c01d7308>] (rmqueue_bulk.constprop.11) from [<c01d7540>]  //<<-- kernel panic

 #3 [<c01d7540>] (get_page_from_freelist) from [<c01d79c4>]


After the device is disassembled again with another PMIC, the crash disappears.


+ Recent posts