Case Study 4.1: Reading /proc/self/maps — Your Process's Memory Layout
A complete NASM program that reads and parses its own memory map
Overview
The /proc/self/maps file is one of Linux's most useful debugging interfaces. It shows the complete virtual address space of the calling process: every mapped region, its permissions, the backing file (if any), and the region name. Understanding this output is essential for debugging memory issues, understanding security mitigations, and reasoning about where data actually lives.
This case study extends the basic readmaps program from Chapter 4 into a more useful tool: one that categorizes regions, counts them by type, and prints a summary.
The /proc/self/maps Format
Each line of /proc/self/maps has this format:
start-end perms offset dev inode pathname
Example:
55f4a1234000-55f4a1235000 r-xp 00000000 08:02 1234567 /path/to/executable
Fields:
- start-end: virtual address range in hex
- perms: 4-character permissions: r/-, w/-, x/-, then s (shared) or p (private/copy-on-write)
- offset: offset into the backing file
- dev: device number (major:minor)
- inode: inode number (0 for anonymous mappings)
- pathname: file path, or [stack], [heap], [vdso], [vvar], [vsyscall], or empty
A More Complete Implementation
; maps_analyzer.asm
; Reads /proc/self/maps and categorizes each region
; Reports: total regions, executable regions, anonymous regions
;
; Build:
; nasm -f elf64 maps_analyzer.asm -o maps_analyzer.o
; ld maps_analyzer.o -o maps_analyzer
section .data
proc_path db "/proc/self/maps", 0
; Output strings
header db "=== Memory Map Analysis ===", 10
header_len equ $ - header
label_total db "Total regions : "
label_total_len equ $ - label_total
label_exec db "Executable : "
label_exec_len equ $ - label_exec
label_anon db "Anonymous : "
label_anon_len equ $ - label_anon
label_stack db "Stack : "
label_stack_len equ $ - label_stack
newline db 10
section .bss
mapfd resq 1 ; file descriptor
buffer resb 65536 ; 64KB read buffer
read_pos resq 1 ; current read position
read_end resq 1 ; end of valid data
; Counters
total_count resq 1
exec_count resq 1
anon_count resq 1
stack_count resq 1
; Number-to-string buffer
num_buf resb 24
section .text
global _start
; ============================================================
; print_string: print string at rsi, length rdx
; ============================================================
print_string:
mov rax, 1 ; sys_write
mov rdi, 1 ; stdout
syscall
ret
; ============================================================
; print_cstr: print null-terminated string at rdi
; ============================================================
print_cstr:
mov rcx, rdi
.find_end:
cmp BYTE [rcx], 0
je .found
inc rcx
jmp .find_end
.found:
sub rcx, rdi ; rcx = length
mov rsi, rdi
mov rdx, rcx
call print_string
ret
; ============================================================
; print_uint64: print 64-bit unsigned integer in rdi
; ============================================================
print_uint64:
push rbx
mov rax, rdi
lea rbx, [rel num_buf + 23] ; point to end of buffer
mov BYTE [rbx], 10 ; newline terminator
dec rbx
; Handle zero case
test rax, rax
jnz .convert
mov BYTE [rbx], '0'
dec rbx
jmp .print_it
.convert:
mov rcx, 10 ; divisor
.loop:
xor rdx, rdx
div rcx ; rax = quotient, rdx = remainder
add dl, '0'
mov [rbx], dl
dec rbx
test rax, rax
jnz .loop
.print_it:
inc rbx ; rbx now points to first digit
lea rsi, [rel num_buf + 24]
sub rsi, rbx ; wait, let me recalculate
; length = (num_buf+24) - rbx
lea rdx, [rel num_buf + 24]
sub rdx, rbx ; rdx = length including newline
mov rsi, rbx
call print_string
pop rbx
ret
; ============================================================
; read_line: read one line from the open file into a buffer
; Returns: rsi = line start, rcx = line length, CF=1 if EOF
; Uses: buffer, read_pos, read_end, mapfd
; ============================================================
read_line:
push rbx
push r12
push r13
; Start of line
mov r12, [rel read_pos]
mov r13, r12 ; r13 = line start
.scan_loop:
; Is there data in the buffer?
mov rbx, [rel read_end]
cmp r12, rbx ; read_pos >= read_end?
jb .check_newline
; Need to refill buffer
; But first, compact: copy remaining data to start of buffer
lea rsi, [rel buffer]
sub rbx, r13 ; remaining bytes = read_end - line_start
jz .do_read ; no remaining bytes: just read fresh
; memmove: copy rbx bytes from r13 to buffer
mov rcx, rbx
mov rdi, rsi ; dest = buffer start
mov rsi, r13 ; src = line_start
rep movsb
; Update positions
lea r12, [rel buffer]
add r12, rbx ; new read_pos = buffer + remaining
lea r13, [rel buffer] ; new line_start = buffer start
jmp .do_read_at_pos
.do_read:
lea r12, [rel buffer]
lea r13, [rel buffer]
mov QWORD [rel read_pos], r12
mov QWORD [rel read_end], r12
.do_read_at_pos:
mov rax, 0 ; sys_read
mov rdi, [rel mapfd]
mov rsi, r12 ; read into buffer at current position
lea rdx, [rel buffer + 65536]
sub rdx, r12 ; remaining buffer space
syscall
test rax, rax
jle .eof ; 0 = EOF, negative = error
; Update read_end
add r12, rax
mov [rel read_end], r12
mov r12, r13 ; reset read_pos to line start
.check_newline:
cmp BYTE [r12], 10 ; newline?
je .found_newline
inc r12
jmp .scan_loop
.found_newline:
mov rsi, r13 ; return: line start
mov rcx, r12 ; line length = newline pos - line start
sub rcx, r13
inc r12 ; advance past newline
mov [rel read_pos], r12 ; update position
clc ; CF = 0 (success)
jmp .done
.eof:
stc ; CF = 1 (EOF/error)
; Still return what we have if anything
mov rsi, r13
mov rcx, r12
sub rcx, r13
.done:
pop r13
pop r12
pop rbx
ret
; ============================================================
; has_permission: check if a line has a specific permission char
; Args: rsi = line, rcx = line length, dil = char to check
; Returns: ZF=1 if found, ZF=0 if not
; The permissions field is at offset 18 (after 2*12 hex digits, dash, space... actually:
; Format: "XXXXXXXXXXXX-YYYYYYYYYYYY rwxp ZZZZZ..."
; perms start at position 25 (12+1+12+1 = 26... actually varies by address size)
; We'll search for 'x' in the perms field (positions 25-28)
; ============================================================
has_exec_permission:
; The permissions field is the 2nd space-delimited field
; Find first space, then check positions 1-4 of the permissions
xor rbx, rbx ; position counter
; Find first space (end of address range)
mov r8, rsi
.find_space1:
cmp BYTE [r8 + rbx], ' '
je .found_space1
inc rbx
cmp rbx, rcx
jge .not_found
jmp .find_space1
.found_space1:
inc rbx ; skip the space
; Now we're at the permissions field
; Check next 4 characters for 'x'
mov al, dil ; char to find
xor rdx, rdx
.check_perms:
cmp rdx, 4
jge .not_found
cmp [r8 + rbx + rdx], al
je .found_perm
inc rdx
jmp .check_perms
.found_perm:
test rax, rax ; set ZF=0 (found); use OR with 1 to ensure ZF=0
or rax, 1 ; rax != 0, so ZF=0... actually let's use a different return
; Return: rax=1 if found, rax=0 if not
mov rax, 1
ret
.not_found:
xor eax, eax
ret
; ============================================================
; _start: main program
; ============================================================
_start:
; Initialize counters
mov QWORD [rel total_count], 0
mov QWORD [rel exec_count], 0
mov QWORD [rel anon_count], 0
mov QWORD [rel stack_count], 0
; Initialize buffer positions
lea rax, [rel buffer]
mov [rel read_pos], rax
mov [rel read_end], rax
; Open /proc/self/maps
mov rax, 2 ; sys_open
lea rdi, [rel proc_path]
xor esi, esi ; O_RDONLY
xor edx, edx
syscall
mov [rel mapfd], rax
test rax, rax
js .exit_error
.process_lines:
call read_line
jc .done_reading ; CF=1: EOF
; Count this region
inc QWORD [rel total_count]
; Check for 'x' permission
mov dil, 'x'
call has_exec_permission
test rax, rax
jz .check_anon
inc QWORD [rel exec_count]
.check_anon:
; Anonymous regions have no pathname (inode is 0, no path at end)
; Simple heuristic: check if line ends before a path is present
; (skip for now -- full implementation would parse inode field)
jmp .process_lines
.done_reading:
; Close file
mov rax, 3
mov rdi, [rel mapfd]
syscall
; Print header
mov rax, 1
mov rdi, 1
lea rsi, [rel header]
mov rdx, header_len
syscall
; Print total regions
mov rax, 1
mov rdi, 1
lea rsi, [rel label_total]
mov rdx, label_total_len
syscall
mov rdi, [rel total_count]
call print_uint64
; Print executable regions
mov rax, 1
mov rdi, 1
lea rsi, [rel label_exec]
mov rdx, label_exec_len
syscall
mov rdi, [rel exec_count]
call print_uint64
; Exit
mov rax, 60
xor rdi, rdi
syscall
.exit_error:
mov rax, 60
mov rdi, 1
syscall
Key Observations When Running
Running this on a typical Linux system, the output might look like:
=== Memory Map Analysis ===
Total regions : 14
Executable : 3
The 3 executable regions are typically:
1. The program's own .text section
2. libc.so.6's code section
3. The vDSO (virtual dynamic shared object)
The 14 total regions include: program text, program rodata, program data/bss, libc text, libc rodata, libc data, ld.so (dynamic linker) text/rodata/data, the stack, vvar, and vdso.
What Makes This Program Interesting
It reads its own memory map while running. When you see the entry for the program itself, that's the code you're currently executing reading information about itself from the kernel. The entry for [stack] shows the memory region that contains the current call stack — including the return addresses and local variables of _start, read_line, and the kernel's handler for /proc/self/maps reads.
It uses only system calls. No printf, no fgets, no strlen from libc. The entire program — file I/O, string printing, integer conversion — is implemented using raw system calls.
File I/O in assembly. The read_line function demonstrates buffered file reading: sys_read returns up to N bytes; we scan for newlines within the buffer and compact when the line spans a buffer boundary.
The rep movsb instruction in the compaction code is the x86-64 string move instruction. It copies RCX bytes from [RSI] to [RDI], advancing both pointers. This is the assembly-level memmove for small copies.
Extending the Program
Some extensions worth implementing as exercises:
-
Parse and print the address ranges: Extract the hexadecimal start and end addresses from each line and print them formatted.
-
Calculate total mapped size: Sum the sizes of all regions (end - start for each line).
-
Find and print the stack range: When the pathname field contains
[stack], print the start and end address as "Stack: 0x... - 0x...". -
Count private vs shared mappings: The 4th permission character is
p(private/CoW) ors(shared). Count how many regions of each type exist.
These extensions require parsing ASCII hex strings in assembly — a practical skill covered in Chapter 14 (string processing).