커널 크래시가 발생한 콜스택은 다음과 같습니다.
프로세스는 이름은 Debugger인데 유저 프로세스입니다.
-001|__dabt_svc(asm)
-->|exception
-002|__list_del(inline)
-002|list_del(inline)
-002|buffered_rmqueue(inline)
-002|get_page_from_freelist(?, ?, order = 0, ?, high_zoneidx = 0, alloc_flags = 449, pref
-003|__alloc_pages_nodemask(gfp_mask = 33685722, order = 0, zonelist = 0xC1731380, nodema
-004|__alloc_zeroed_user_highpage(inline)
-004|alloc_zeroed_user_highpage_movable(inline)
-004|do_anonymous_page(inline)
-004|handle_pte_fault(inline)
-004|__handle_mm_fault(inline)
-004|handle_mm_fault(mm = 0xEDDD8A80, vma = 0xD910C948, address = 2739523584, flags = 169
-005|__do_page_fault(inline)
-005|do_page_fault(addr = 0, fsr = 748335104, regs = 0xC6D61FB0)
-006|do_DataAbort(addr = 2739523584, fsr = 2071, regs = 0xC6D61FB0)
-007|__dabt_usr(asm)
-->|exception
-008|NUR:0xA806E848(asm)
---|end of frame
buffered_rmqueue() 함수에서 링크드 리스트를 초기화하다가 익셉션이 발생했습니다.
먼저 콜스택 흐름을 보겠습니다. 콜스택을 보니 do_DataAbort() 함수가 보입니다.
우리가 분석할 do_DataAbort() 함수는 유저 공간에서 Demand-Paging 시 호출됩니다. 커널 공간에서 데이터 어보트과 성격이 약간 다릅니다. 유저 공간에서 메모리를 할당한 후 직접 메모리를 쓰려고 할 때 메모리를 할당하는 과정입니다. 헷갈리면 안됩니다.
do_DataAbort()
[1]: inf->fn 함수 포인터가 가르키는 do_page_fault() 로 점프
550do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
551{
552 const struct fsr_info *inf = fsr_info + fsr_fs(fsr);
553 struct siginfo info;
554
555 if (!inf->fn(addr, fsr & ~FSR_LNX_PF, regs)) // <<--[1]
556 return;
557
(where)
(register struct fsr_info *) inf = 0xC161514C = fsr_info[7] -> (
(int (*)()) fn = 0xC0114428 = do_page_fault -> , // <<--
(int) sig = 11 = 0x0B = '....',
(int) code = 196609 = 0x00030001 = '....',
(char *) name = 0xC11DEC42 = kallsyms_token_index+0x3ED2 -> "page translation fault")
do_page_fault() 함수 실행 흐름은 다음과 같습니다. 소스 코드에 [1]~[4]으로 라벨링된 부분을 눈으로 따라가봅시다.
[1]: regs 정보로 usr mode가 아닐 경우 return 처리함
[2]: IRQ context되었거나 irq가 disable 그리고 User Mode가 아닐 경우 __do_kernel_fault() 함수가 호출되도록 함
[3]: ARM user mode인 경우 FAULT_FLAG_USER flag를 ORing함
[4]: __do_page_fault() 함수로 점프
258static int __kprobes
259do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
260{
261 struct task_struct *tsk;
262 struct mm_struct *mm;
263 int fault, sig, code;
264 unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
265
266 if (notify_page_fault(regs, fsr)) // <<--[1]
267 return 0;
268
269 tsk = current;
270 mm = tsk->mm;
//snip
276 /*
277 * If we're in an interrupt, or have no irqs, or have no user
278 * context, we must not take the fault..
279 */
280 if (in_atomic() || irqs_disabled() || !mm) // <<--[2]
281 goto no_context;
282
283 if (user_mode(regs)) // <<--[3]
284 flags |= FAULT_FLAG_USER;
285 if (fsr & FSR_WRITE)
286 flags |= FAULT_FLAG_WRITE;
287
//snip
311
312 fault = __do_page_fault(mm, addr, fsr, flags, tsk); // <<?[4]
이번에는 __do_page_fault() 함수를 분석하겠습니다.
[1]: handle_mm_fault() 함수로 점프
223static int __kprobes
224 __do_page_fault (struct mm_struct *mm, unsigned long addr, unsigned int fsr,
225 unsigned int flags, struct task_struct *tsk)
226{
227 struct vm_area_struct *vma;
228 int fault;
229
230 vma = find_vma(mm, addr);
231 fault = VM_FAULT_BADMAP;
232 if (unlikely(!vma))
233 goto out;
234 if (unlikely(vma->vm_start > addr))
235 goto check_stack;
236
237 /*
238 * Ok, we have a good vm_area for this
239 * memory access, so we can handle it.
240 */
241good_area:
242 if (access_error(fsr, vma)) {
243 fault = VM_FAULT_BADACCESS;
244 goto out;
245 }
246
247 return handle_mm_fault(mm, vma, addr & PAGE_MASK, flags); // <<?[1]
함수 실행 흐름은 1~3 순서입니다.
1 handle_mm_fault()
2 __handle_mm_fault()
3 handle_pte_fault()
3351int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3352 unsigned long address, unsigned int flags)
3353{
3354 int ret;
3355
3356 __set_current_state(TASK_RUNNING);
//snip
3371 ret = __handle_mm_fault(mm, vma, address, flags); // <<--
3265static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3266 unsigned long address, unsigned int flags)
3267{
3268 pgd_t *pgd;
3269 pud_t *pud;
3270 pmd_t *pmd;
3271 pte_t *pte;
3272
//snip
3335 * A regular pmd is established and it can't morph into a huge pmd
3336 * from under us anymore at this point because we hold the mmap_sem
3337 * read mode and khugepaged takes it in write mode. So now it's
3338 * safe to run pte_offset_map().
3339 */
3340 pte = pte_offset_map(pmd, address);
3341
3342 return handle_pte_fault(mm, vma, address, pte, pmd, flags); // <<--
3343}
3203static int handle_pte_fault(struct mm_struct *mm,
3204 struct vm_area_struct *vma, unsigned long address,
3205 pte_t *pte, pmd_t *pmd, unsigned int flags)
3206{
3207 pte_t entry;
3208 spinlock_t *ptl;
3209
3210 entry = ACCESS_ONCE(*pte);
//snip
3218 return do_anonymous_page(mm, vma, address, // <<--
3219 pte, pmd, flags);
3220 }
이번에는 do_anonymous_page() 함수를 보겠습니다.
[1]: alloc_zeroed_user_highpage_movable() 함수를 호출
2633static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
2634 unsigned long address, pte_t *page_table, pmd_t *pmd,
2635 unsigned int flags)
2636{
2637 struct mem_cgroup *memcg;
2638 struct page *page;
2639 spinlock_t *ptl;
2640 pte_t entry;
2641
2642 pte_unmap(page_table);
2643
//snip
2657
2658 /* Allocate our own private page. */
2659 if (unlikely(anon_vma_prepare(vma)))
2660 goto oom;
2661 page = alloc_zeroed_user_highpage_movable(vma, address); // <<--[1]
2662 if (!page)
2663 goto oom;
아래 번호 순서로 매크로 함수가 호출됩니다.
1 alloc_zeroed_user_highpage_movable()
2 __alloc_zeroed_user_highpage()
3 alloc_pages_vma()
4 alloc_pages()
5 alloc_pages_node()
정리하면 alloc_page_vma() 함수는 바로 __alloc_pages_nodemask() 함수 호출합니다.
178static inline struct page *
179 alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma,
180 unsigned long vaddr)
181{
182 return __alloc_zeroed_user_highpage(__GFP_MOVABLE | __GFP_CMA, vma, vaddr);
183}
155static inline struct page *
156 __alloc_zeroed_user_highpage(gfp_t movableflags,
157 struct vm_area_struct *vma,
158 unsigned long vaddr)
159{
160 struct page *page = alloc_page_vma(GFP_HIGHUSER | movableflags, // <<--
161 vma, vaddr);
162
163 if (page)
164 clear_user_highpage(page, vaddr);
165
166 return page;
167}
359#define alloc_pages(gfp_mask, order) \
360 alloc_pages_node(numa_node_id(), gfp_mask, order)
361#define alloc_pages_vma(gfp_mask, order, vma, addr, node) \
362 alloc_pages(gfp_mask, order)
329static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,
330 unsigned int order)
331{
332 /* Unknown node is current node */
333 if (nid < 0)
334 nid = numa_node_id();
335
336 return __alloc_pages(gfp_mask, order, node_zonelist(nid, gfp_mask)); // <<--
337}
322static inline struct page *
323 __alloc_pages(gfp_t gfp_mask, unsigned int order,
324 struct zonelist *zonelist)
325{
326 return __alloc_pages_nodemask(gfp_mask, order, zonelist, NULL);
327}
이번에는 __alloc_pages_nodemask() 함수 동작을 확인하겠습니다.
[1]: migratetype 확인
[2]: get_page_from_freelist() 호출
2863struct page *
2864 __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
2865 struct zonelist *zonelist, nodemask_t *nodemask)
2866{
2867#ifdef CONFIG_ZONE_MOVABLE_CMA
2868 enum zone_type high_zoneidx = gfp_zone(gfp_mask & ~__GFP_MOVABLE);
2869#else
2870 enum zone_type high_zoneidx = gfp_zone(gfp_mask);
2871#endif
2872 struct zone *preferred_zone;
2873 struct zoneref *preferred_zoneref;
2874 struct page *page = NULL;
2875 int migratetype = gfpflags_to_migratetype(gfp_mask); // <<--[1]
2876 unsigned int cpuset_mems_cookie;
2877#ifdef CONFIG_ZONE_MOVABLE_CMA
2878 int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET;
2879#else
2880 int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET|ALLOC_FAIR;
2881#endif
2882 int classzone_idx;
2883#ifdef __LOG_PAGE_ALLOC_ORDER__
2884 struct stack_trace trace;
2885 unsigned long entries[6] = {0};
2886#endif
2887
2888 gfp_mask &= gfp_allowed_mask;
2889
2890 lockdep_trace_alloc(gfp_mask);
2891
2892 might_sleep_if(gfp_mask & __GFP_WAIT);
2893
2894 if (should_fail_alloc_page(gfp_mask, order))
2895 return NULL;
2896
//snip
2917 /* The preferred zone is used for statistics later */
2918 preferred_zoneref = first_zones_zonelist(zonelist, high_zoneidx,
2919 nodemask ? : &cpuset_current_mems_allowed,
2920 &preferred_zone);
2921 if (!preferred_zone)
2922 goto out;
2923 classzone_idx = zonelist_zone_idx(preferred_zoneref);
2924
2925 /* First allocation attempt */
2926 page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, // <<--[2]
2927 zonelist, high_zoneidx, alloc_flags,
2928 preferred_zone, classzone_idx, migratetype);
'Core BSP 분석 > 리눅스 커널 핵심 분석' 카테고리의 다른 글
[리눅스커널] smp thread에 대해서 (0) | 2023.05.06 |
---|---|
[리눅스커널][디버깅] 슬럽(슬랩) 캐시 오브젝트 T32로 메모리 디버깅하기 (0) | 2023.05.06 |
[Linux][Kernel] preempt_disable()/preempt_enable() 주의 사항 (0) | 2023.05.06 |
[라즈베리파이] 비트 처리 __test_and_set_bit() __test_and_clear_bit() 함수 동작 원리 (0) | 2023.05.06 |
[리눅스] printk 아규먼트 포멧 (0) | 2023.05.06 |