[Kernel][Crash] 워크큐(workqueue) 락업(2) at [0321]

#커널 크래시 디버깅 및 TroubleShooting

자 여기까지 커널 크래시가 발행한 원인까지 확인했습니다.

그럼 CPU2에 워커풀에서 등록된 워커쓰레드를 살펴봐야 겠습니다.

워커쓰레드들은 워커 풀에 등록할 때 struct worker_pool.workers 멤버 링크드 리스트로 등록합니다.

1 (struct worker_pool *) (struct worker_pool*)0xFFFFFFC63A339000 = 0xFFFFFFC63A339000 -> (

2 (spinlock_t) lock = ((struct raw_spinlock) rlock = ((arch_spinlock_t) raw_lock = ((u16) owner =

3 (int) cpu = 2,

4 (int) node = 0,

5 (int) id = 4,

6 (unsigned int) flags = 1,

// ....

7 (struct mutex) attach_mutex = ((atomic_t) count = ((int) counter = 1), (spinlock_t) wait_lock =

8 (struct list_head) workers = (

9 (struct list_head *) next = 0xFFFFFFC4DE83A7D0 -> (

10 (struct list_head *) next = 0xFFFFFFC50C460CD0 -> (

11 (struct list_head *) next = 0xFFFFFFC4E26A30D0 -> (

12 (struct list_head *) next = 0xFFFFFFC63A339300 -> (

13 (struct list_head *) next = 0xFFFFFFC4DE83A7D0,

자 위에서 7번째부터 13번째까지 링크트 리스트로 연결된 주소가 보이죠?

struct worker_pool.workers.next에 등록된 리스트 주소는 해당 워크의 struct worker.node 와 같습니다.

struct worker {

/* on idle list while idle, on busy hash table while busy */

union {

struct list_head entry; /* L: while idle */

struct hlist_node hentry; /* L: while busy */

};

struct work_struct *current_work; /* L: work being processed */

// ..

struct task_struct *task; /* I: worker task */

struct worker_pool *pool; /* I: the associated pool */

/* L: for rescuers */

struct list_head node; /* A: anchored at pool->workers */

container_of 매크로를 써서 이 자료구조의 상관 관계를 다음과 같이 표시할 수 있습니다.

container_of(struct worker_pool.workers.next, struct worker, node)

참고로, Trace32로 다음과 같이 offsetof와 container_of 명령어를 매크로로 등록해서 쓸 수 있습니다.

sYmbol.NEW.MACRO offsetof(type,member) ((int)(&((type*)0)->member))

sYmbol.NEW.MACRO container_of(ptr,type,member) ((type *)((char *)(ptr)-offsetof(type,member)))

이 명령어를 활용해서 워커풀에 등록된 워커 쓰레드 정보를 확인할께요. 다음 자료 구조 주소를 참고하고요.

9 (struct list_head *) next = 0xFFFFFFC4DE83A7D0 -> (

10 (struct list_head *) next = 0xFFFFFFC50C460CD0 -> (

11 (struct list_head *) next = 0xFFFFFFC4E26A30D0 -> (

12 (struct list_head *) next = 0xFFFFFFC63A339300 -> (

13 (struct list_head *) next = 0xFFFFFFC4DE83A7D0,

9줄에 있는 주소부터 확인할게요.

v.v %t %h %d %s container_of(0xFFFFFFC4DE83A7D0,struct worker,node)

1 (struct worker *) container_of(0xFFFFFFC4DE83A7D0,struct worker,node) = 0xFFFFF

2 (struct list_head) entry = ((struct list_head *) next = 0x0 = , (struct list_

3 (struct hlist_node) hentry = ((struct hlist_node *) next = 0x0 = , (struct hl

4 (struct work_struct *) current_work = 0xFFFFFFC62CFE67F8 = __efistub__end+0x2

5 (work_func_t) current_func = 0xFFFFFF97D7172254 = pm_runtime_work,

6 (struct pool_workqueue *) current_pwq = 0xFFFFFFC63A340600 = __efistub__end+0

7 (bool) desc_valid = FALSE,

8 (struct list_head) scheduled = ((struct list_head *) next = 0xFFFFFFC4DE83A7B

9 (struct task_struct *) task = 0xFFFFFFC541030080

10 (struct worker_pool *) pool = 0xFFFFFFC63A339000

11 (struct list_head) node = ((struct list_head *)

12 (long unsigned int) last_active = 4302072797 = 0x00000001006C6BDD,

13 (unsigned int) flags = 0 = 0x0,

14 (int) id = 2 = 0x2,

15 (char [24]) desc = "",

16 (struct workqueue_struct *) rescue_wq = 0x0 = )

가장 중요한 정보부터 확인하면 해당 워커 쓰레드의 태스크 디스크립터는 0xFFFFFFC541030080이고,

가장 마지막에 실행된 시간은 last_active은 4302072797입니다. 이 값은 jiffies 이겠죠.

9 (struct task_struct *) task = 0xFFFFFFC541030080

12 (long unsigned int) last_active = 4302072797 = 0x00000001006C6BDD,

태스크 디스크립터는 0xFFFFFFC541030080 정보로 이 프로세스 이름과 콜스택을 알아볼까요?

(struct task_struct *) (struct task_struct*)0xFFFFFFC541030080 = 0xFFFFFFC541030080 -> (

(struct thread_info) thread_info = ((long unsigned int) flags = 0, (mm_segment_t) addr_limit = 1

(long int) state = 2,

(void *) stack = 0xFFFFFFC5F4704000,

(atomic_t) usage = ((int) counter = 2),

//...

(char [16]) comm = "kworker/2:2",

(struct nameidata *) nameidata = 0x0,

"kworker/2:2" 프로세스 콜스택은 다음과 같군요.

-000|__switch_to(prev = 0xFFFFFFC541030080, next = 0xFFFFFFC60E561180)

-001|context_switch(inline)

-001|__schedule(?)

-002|schedule()

-003|schedule_timeout(timeout = 9223372036854775807)

-004|do_wait_for_common(inline)

-004|__wait_for_common(inline)

-004|wait_for_common(x = 0xFFFFFFC5F4707B30, timeout = 9223372036854775807, ?)

-005|wait_for_completion(x = 0xFFFFFFC5F4707B30)

-006|destroy_work_on_stack(inline)

-006|flush_work(?)

-007|ufshcd_suspend(hba = 0xFFFFFFC623C0AB70, pm_op = UFS_RUNTIME_PM)

-008|ufshcd_runtime_suspend(hba = 0xFFFFFFC623C0AB70)

-009|ufshcd_pltfrm_runtime_suspend(dev = 0xFFFFFFC62CFE6690)

-010|pm_generic_runtime_suspend(dev = 0xFFFFFFC62CFE6690)

-011|__rpm_callback(inline)

-011|rpm_callback(cb = 0xFFFFFF97D716DCE0, dev = 0xFFFFFFC62CFE6690)

-012|rpm_suspend(dev = 0xFFFFFFC62CFE6690, rpmflags = 10)

-013|rpm_idle(dev = 0xFFFFFFC62CFE6690, rpmflags = 2)

-014|spin_unlock_irq(inline)

-014|pm_runtime_work(work = 0xFFFFFFC62CFE67F8)

-015|__read_once_size(inline)

-015|static_key_count(inline)

-015|static_key_false(inline)

-015|trace_workqueue_execute_end(inline)

-015|process_one_work(worker = 0xFFFFFFC4DE83A780, work = 0xFFFFFFC62CFE67F8)

-016|__read_once_size(inline)

-016|list_empty(inline)

-016|worker_thread(__worker = 0xFFFFFFC4DE83A780)

-017|kthread(_create = 0xFFFFFFC4DA494D00)

-018|ret_from_fork(asm)

위 콜스택을 보면 process_one_work -> pm_runtime_work 흐름으로 워크가 실행된 후

디바이스 드라이버에 등록된 suspend 콜백 함수를 호출합니다. 이후 flush_work 함수를 호출합니다.

1 bool flush_work(struct work_struct *work)

2 {

3 struct wq_barrier barr;

5 lock_map_acquire(&work->lockdep_map);

6 lock_map_release(&work->lockdep_map);

8 if (start_flush_work(work, &barr)) {

9 wait_for_completion(&barr.done);

10 destroy_work_on_stack(&barr.work);

11 return true;

그런데 세번째 줄 코드에 다음과 같은 변수 선언문을 볼 수 있습니다. 이 코드는 뭘 의미할까요?

3 struct wq_barrier barr;

이 구조체를 배리어 워크라고 합니다. 현재 워크(struct work_struct)가 워커 쓰레드에서 실행 중일 때,

flush_work 함수를 호출하면 지금 처리 중인 워크 실행을 마무리 할 수 있습니다. 참고하시고요.

이번에는 가장 중요한 코드 중 하나인 8번과 9번 줄 코드입니다.

8 if (start_flush_work(work, &barr)) {

9 wait_for_completion(&barr.done);

해당 코드인 flush_work 함수를 9번줄 코드에서 wait_for_completion(&barr.done) 함수를 호출합니다.

이 함수를 호출하면 complete(&barr.done) 가 실행될 때 까지 기다립니다.

static void wq_barrier_func(struct work_struct *work)

{

struct wq_barrier *barr = container_of(work, struct wq_barrier, work);

complete(&barr->done);

}

달리 설명드리면 배리어 워크가 워커 쓰레드에서 실행될 때 까지 기다리는 루틴입니다.

워크큐에서 상당히 많이 쓰는 배리어 워크는 다음 시간에 다룰 예정이니 기다려 주세요.

그럼 위 콜스택에서 &barr.done 정체가 뭔지 볼까요?

-005|wait_for_completion(x = 0xFFFFFFC5F4707B30)

wait_for_completion 함수 선언부를 보면 해당 파라미터 x = 0xFFFFFFC5F4707B30 구조체는 struct completion입니다. 그래서 이 구조체로 디버깅 정보를 확인하면 다음과 같습니다.

(struct completion *) (struct completion *)0xFFFFFFC5F4707B30 = 0xFFFFFFC5F4707B30 -> (

(unsigned int) done = 0x0,

(wait_queue_head_t) wait = (

(spinlock_t) lock = (

(struct raw_spinlock) rlock = ((arch_spinlock_t) raw_lock = ((u16) owner = 0x1, (u16) next =

(struct list_head) task_list = (

(struct list_head *) next = 0xFFFFFFC5F4707A88 -> (

(struct list_head *) next = 0xFFFFFFC5F4707B50 -> (

(struct list_head *) next = 0xFFFFFFC5F4707A88,

struct completion->wait.task_list에 2개의 링크드 리스트가 보이는군요.

여기에 등록된 링크드 리스트는 struct __wait_queue.task_list에 등록된 링크드 리스트와 같습니다.

그래서 container_of(struct completion->wait.task_list.next,struct __wait_queue, task_list) 조건을 만족합니다.

(where)

struct __wait_queue {

unsigned int flags;

void *private;

wait_queue_func_t func;

struct list_head task_list;

};

typedef struct __wait_queue wait_queue_t;

이 조건에 따라 다음 명령어를 입력하겠습니다.

v.v %all container_of(0xFFFFFFC5F4707A88,struct __wait_queue,task_list)

v.v %all container_of(0xFFFFFFC5F4707B50,struct __wait_queue,task_list)

다음 정보를 태스크 디스크립터가 0xFFFFFFC541030080인 프로세스가 종료되기를 기다리고 있습니다.

v.v %h %t container_of(0xFFFFFFC5F4707A88,struct __wait_queue,task_list)

(struct __wait_queue *) container_of(0xFFFFFFC5F4707A88,struct __wait_queue,task_list) = 0xFFFFFFC

(unsigned int) flags = 1 = 0x1 = '....',

(void *) private = 0xFFFFFFC541030080

(wait_queue_func_t) func = 0xFFFFFF97D6AE571C = default_wake_function -> ,

(struct list_head) task_list = ((struct list_head *) next = 0xFFFFFFC5F4707B50

0xFFFFFFC541030080 태스크 디스크립터는 "kworker/2:2" 프로세스 것이고요. 문제는 다음 wait queue에 등록된 인스턴스 입니다. 0xFFFFFFC5F4707B50 주소를 눈여겨 보세요.

(struct __wait_queue *) container_of(0xFFFFFFC5F4707B50,struct __wait_queue,task_list) = 0xFFFFFFC

(unsigned int) flags = 65537 = 0x00010001

(void *) private = 0xFFFFFFC6FFFFFFFF

(wait_queue_func_t) func = 0xFFFFFFFFFFFFFFFF

(struct list_head) task_list = ((struct list_head *) next = 0xFFFFFFC5F4707A88

확인해보니 안타깝게도 쓰레기 값이군요.

(where)

(struct completion *) (struct completion *)0xFFFFFFC5F4707B30 = 0xFFFFFFC5F4707B30 -> (

(unsigned int) done = 0x0,

(wait_queue_head_t) wait = (

(spinlock_t) lock = (

(struct raw_spinlock) rlock = ((arch_spinlock_t) raw_lock = ((u16) owner = 0x1, (u16) next =

(struct list_head) task_list = (

(struct list_head *) next = 0xFFFFFFC5F4707A88 -> (

(struct list_head *) next = 0xFFFFFFC5F4707B50 -> ( //<<--

여기까지 디버깅 정보를 종합하면 8번 줄에 이미 wait_queue에 등록된 프로세스 때문에 0xFFFFFFC5F4707A88에 해당하는 wait queue가 처리되지 못하고 있습니다.

1 (struct completion *) (struct completion *)0xFFFFFFC5F4707B30 = 0xFFFFFFC5F4707B30 -> (

2 (unsigned int) done = 0x0,

3 (wait_queue_head_t) wait = (

4 (spinlock_t) lock = (

5 (struct raw_spinlock) rlock = ((arch_spinlock_t) raw_lock = ((u16) owner = 0x1, (u16) next =

6 (struct list_head) task_list = (

7 (struct list_head *) next = 0xFFFFFFC5F4707A88 -> (

8 (struct list_head *) next = 0xFFFFFFC5F4707B50 -> (

여기까지 모은 디버깅 정보를 정리하면 다른 CPU 워커 풀에 있는 배리어 워크가 실행되지 않았습니다.

그래서 CPU2의 워커 풀이 Stuck 되어 워크큐 락업으로 감지된 거죠.

-000|__switch_to(prev = 0xFFFFFFC541030080, next = 0xFFFFFFC60E561180)

-001|context_switch(inline)

-001|__schedule(?)

-002|schedule()

-003|schedule_timeout(timeout = 9223372036854775807)

-004|do_wait_for_common(inline)

-004|__wait_for_common(inline)

-004|wait_for_common(x = 0xFFFFFFC5F4707B30, timeout = 9223372036854775807, ?)

-005|wait_for_completion(x = 0xFFFFFFC5F4707B30)

-006|destroy_work_on_stack(inline)

-006|flush_work(?)

-007|ufshcd_suspend(hba = 0xFFFFFFC623C0AB70, pm_op = UFS_RUNTIME_PM)

-008|ufshcd_runtime_suspend(hba = 0xFFFFFFC623C0AB70)

-009|ufshcd_pltfrm_runtime_suspend(dev = 0xFFFFFFC62CFE6690)

-010|pm_generic_runtime_suspend(dev = 0xFFFFFFC62CFE6690)

-011|__rpm_callback(inline)

-011|rpm_callback(cb = 0xFFFFFF97D716DCE0, dev = 0xFFFFFFC62CFE6690)

-012|rpm_suspend(dev = 0xFFFFFFC62CFE6690, rpmflags = 10)

-013|rpm_idle(dev = 0xFFFFFFC62CFE6690, rpmflags = 2)

-014|spin_unlock_irq(inline)

-014|pm_runtime_work(work = 0xFFFFFFC62CFE67F8)

-015|__read_once_size(inline)

-015|static_key_count(inline)

-015|static_key_false(inline)

-015|trace_workqueue_execute_end(inline)

-015|process_one_work(worker = 0xFFFFFFC4DE83A780, work = 0xFFFFFFC62CFE67F8)

-016|__read_once_size(inline)

-016|list_empty(inline)

-016|worker_thread(__worker = 0xFFFFFFC4DE83A780)

-017|kthread(_create = 0xFFFFFFC4DA494D00)

-018|ret_from_fork(asm)

이번에는 다른 워커 풀에서 Pending된 배리어 워크를 찾아야 합니다.

# Reference: For more information on 'Linux Kernel';

디버깅을 통해 배우는 리눅스 커널의 구조와 원리. 1

http://www.yes24.com/Product/Goods/90085976

https://wikibook.co.kr/linux-kernel-1/

디버깅을 통해 배우는 리눅스 커널의 구조와 원리. 2

http://www.yes24.com/Product/Goods/90087307

https://wikibook.co.kr/linux-kernel-2/

저작자표시

'Core BSP 분석 > 커널 트러블슈팅' 카테고리의 다른 글

[Linux][Kernel] 커널 크래시란 (0)	2023.05.07
[Kernel][Crash] 워크큐(workqueue) 락업(3) at [0321] (0)	2023.05.07
[Kernel][Crash][0320] 워크큐(workqueue) 락업(1) (0)	2023.05.07
[Kernel][Panic] panic@ttwu_do_activate (메모리 불량 보드) (0)	2023.05.07
[Kernel][Stability] tcp_v4_rcv -> __stack_chk_fail (0)	2023.05.07

RISC-V and Arm Linux Kernel Hacks

[Kernel][Crash] 워크큐(workqueue) 락업(2) at [0321]

'Core BSP 분석 > 커널 트러블슈팅' 카테고리의 다른 글

티스토리툴바

[Kernel][Crash] 워크큐(workqueue) 락업(2) at [0321]

'Core BSP 분석 > 커널 트러블슈팅' 카테고리의 다른 글

'Core BSP 분석/커널 트러블슈팅' Related Articles

티스토리툴바