본문 바로가기

리눅스 커널의 구조와 원리/14. 메모리 관리

[Linux kernel] Quick Review over OOM(Out of Memory) Killer routine

Log at the moment of OOM

[  126.809058] sysrq: Manual OOM execution
[  126.809184] kworker/0:2 invoked oom-killer: gfp_mask=0x6000c0(GFP_KERNEL), nodemask=(null), order=-1, oom_score_                               adj=0
[  126.809217] kworker/0:2 cpuset=/ mems_allowed=0
...
[  126.810045] lowmem_reserve[]: 0 0 1204 1204
[  126.810093] HighMem free:1054428kB min:512kB low:7216kB high:13920kB active_anon:22608kB inactive_anon:9220kB ac                               tive_file:61020kB inactive_file:75840kB unevictable:16kB writepending:0kB present:1232896kB managed:1232896kB mlock                               ed:16kB kernel_stack:0kB pagetables:1740kB bounce:0kB free_pcp:3040kB local_pcp:392kB free_cma:0kB
[  126.810152] lowmem_reserve[]: 0 0 0 0
[  126.810187] DMA: 2*4kB (EC) 5*8kB (UEC) 5*16kB (UMEC) 5*32kB (UMEC) 3*64kB (UEC) 4*128kB (UMEC) 0*256kB 3*512kB                                (UEC) 1*1024kB (C) 3*2048kB (UM) 160*4096kB (MC) = 665056kB
[  126.810289] HighMem: 73*4kB (U) 17*8kB (U) 3*16kB (U) 2*32kB (U) 3*64kB (U) 6*128kB (U) 1*256kB (M) 4*512kB (UM)                                4*1024kB (UM) 1*2048kB (U) 255*4096kB (M) = 1054428kB 

Callstack with 'echo f > /proc/sysrq-trigger'

kworker/0:2-1150    [000] ....  3471.541617: <stack trace>
 => show_free_areas+0x8/0x7a8
 => show_mem+0x3c/0xd4
 => dump_header+0x68/0x204
 => oom_kill_process+0x1f8/0x200
 => out_of_memory+0xf0/0x330
 => moom_callback+0x6c/0xb0
 => process_one_work+0x1f4/0x4d8
 => worker_thread+0x50/0x480
 => kthread+0x148/0x158
 => ret_from_fork+0x10/0x30


Message routine; 
 
mm/oom_kill.c
static void dump_header(struct oom_control *oc, struct task_struct *p)
{
pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), order=%d, oom_score_adj=%hd\n",
current->comm, oc->gfp_mask, &oc->gfp_mask, oc->order,
current->signal->oom_score_adj);
if (!IS_ENABLED(CONFIG_COMPACTION) && oc->order)
pr_warn("COMPACTION is disabled!!!\n");

dump_stack();
if (is_memcg_oom(oc))
mem_cgroup_print_oom_meminfo(oc->memcg);
else {
show_mem(SHOW_MEM_FILTER_NODES, oc->nodemask);
if (should_dump_unreclaim_slab())
dump_unreclaimable_slab();
}
if (sysctl_oom_dump_tasks)
dump_tasks(oc);
if (p)
dump_oom_summary(oc, p);
}

'show_free_areas' function

https://elixir.bootlin.com/linux/v5.15.100/source/mm/page_alloc.c
void show_free_areas(unsigned int filter, nodemask_t *nodemask)
{
unsigned long free_pcp = 0;
int cpu;
struct zone *zone;
pg_data_t *pgdat;

for_each_populated_zone(zone) {
if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask))
continue;

for_each_online_cpu(cpu)
free_pcp += per_cpu_ptr(zone->per_cpu_pageset, cpu)->count;
}

printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n"
" active_file:%lu inactive_file:%lu isolated_file:%lu\n"
" unevictable:%lu dirty:%lu writeback:%lu\n"
" slab_reclaimable:%lu slab_unreclaimable:%lu\n"
" mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n"
" kernel_misc_reclaimable:%lu\n"
" free:%lu free_pcp:%lu free_cma:%lu\n",
global_node_page_state(NR_ACTIVE_ANON),
global_node_page_state(NR_INACTIVE_ANON),
global_node_page_state(NR_ISOLATED_ANON),
global_node_page_state(NR_ACTIVE_FILE),
global_node_page_state(NR_INACTIVE_FILE),
global_node_page_state(NR_ISOLATED_FILE),
global_node_page_state(NR_UNEVICTABLE),
global_node_page_state(NR_FILE_DIRTY),
global_node_page_state(NR_WRITEBACK),
global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B),
global_node_page_state_pages(NR_SLAB_UNRECLAIMABLE_B),
global_node_page_state(NR_FILE_MAPPED),
global_node_page_state(NR_SHMEM),
global_node_page_state(NR_PAGETABLE),
global_zone_page_state(NR_BOUNCE),
global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE),
global_zone_page_state(NR_FREE_PAGES),
free_pcp,
global_zone_page_state(NR_FREE_CMA_PAGES));

'global_node_page_state' function

include/linux/vmstat.h#L200
static inline unsigned long global_node_page_state(enum node_stat_item item)
{
VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));

return global_node_page_state_pages(item);
}

'global_node_page_state_pages' function

include/linux/vmstat.h#L190
static inline
unsigned long global_node_page_state_pages(enum node_stat_item item)
{
long x = atomic_long_read(&vm_node_stat[item]);
#ifdef CONFIG_SMP
if (x < 0)
x = 0;
#endif
return x;
}

'enum node_stat_item'
include/linux/mmzone.h 
enum node_stat_item {
NR_LRU_BASE,
NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */
NR_ACTIVE_ANON, /*  "     "     "   "       "         */
NR_INACTIVE_FILE, /*  "     "     "   "       "         */
NR_ACTIVE_FILE, /*  "     "     "   "       "         */
NR_UNEVICTABLE, /*  "     "     "   "       "         */
NR_SLAB_RECLAIMABLE_B,
NR_SLAB_UNRECLAIMABLE_B,
NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */
NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */
WORKINGSET_NODES,
WORKINGSET_REFAULT_BASE,
WORKINGSET_REFAULT_ANON = WORKINGSET_REFAULT_BASE,
WORKINGSET_REFAULT_FILE,
WORKINGSET_ACTIVATE_BASE,
WORKINGSET_ACTIVATE_ANON = WORKINGSET_ACTIVATE_BASE,
WORKINGSET_ACTIVATE_FILE,
WORKINGSET_RESTORE_BASE,
WORKINGSET_RESTORE_ANON = WORKINGSET_RESTORE_BASE,
WORKINGSET_RESTORE_FILE,
WORKINGSET_NODERECLAIM,
NR_ANON_MAPPED, /* Mapped anonymous pages */
NR_FILE_MAPPED, /* pagecache pages mapped into pagetables.
   only modified from process context */
NR_FILE_PAGES,
NR_FILE_DIRTY,
NR_WRITEBACK,
NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */
NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */
NR_SHMEM_THPS,
NR_SHMEM_PMDMAPPED,
NR_FILE_THPS,
NR_FILE_PMDMAPPED,
NR_ANON_THPS,
NR_VMSCAN_WRITE,
NR_VMSCAN_IMMEDIATE, /* Prioritise for reclaim when writeback ends */
NR_DIRTIED, /* page dirtyings since bootup */
NR_WRITTEN, /* page writings since bootup */
NR_KERNEL_MISC_RECLAIMABLE, /* reclaimable non-slab kernel pages */
NR_FOLL_PIN_ACQUIRED, /* via: pin_user_page(), gup flag: FOLL_PIN */
NR_FOLL_PIN_RELEASED, /* pages returned via unpin_user_page() */
NR_KERNEL_STACK_KB, /* measured in KiB */
#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
NR_KERNEL_SCS_KB, /* measured in KiB */
#endif
NR_PAGETABLE, /* used for pagetables */
#ifdef CONFIG_SWAP
NR_SWAPCACHE,
#endif
NR_VM_NODE_STAT_ITEMS
};

'vm_node_stat' with TRACE32

  vm_node_stat = (
    [0] = (counter = 52435),  // NR_INACTIVE_ANON
    [1] = (counter = 266), // NR_ACTIVE_ANON
    [2] = (counter = 115288),
    [3] = (counter = 27257),
    [4] = (counter = 17555),
    [5] = (counter = 7484),
    [6] = (counter = 7500),
    [7] = (counter = 0),
    [8] = (counter = 0),
    [9] = (counter = 0),
    [10] = (counter = 0),
    [11] = (counter = 0),
    [12] = (counter = 0),
    [13] = (counter = 0),
    [14] = (counter = 0),
    [15] = (counter = 0),
    [16] = (counter = 0),
    [17] = (counter = 49938),
    [18] = (counter = 42295),
    [19] = (counter = 162865),
    [20] = (counter = 81),
    [21] = (counter = 0),
    [22] = (counter = 0),
    [23] = (counter = 20320),
    [24] = (counter = 0),
    [25] = (counter = 0),
    [26] = (counter = 0),
    [27] = (counter = 0),
    [28] = (counter = 0),
    [29] = (counter = 0),
    [30] = (counter = 0),
    [31] = (counter = 32378),
    [32] = (counter = 31663),
    [33] = (counter = 0),
    [34] = (counter = 0),
    [35] = (counter = 0),
    [36] = (counter = 3920),
    [37] = (counter = 1400),