Last Updated: 2023-10-02 14:19:01 Monday
-- TOC --
测试用C代码如下:
#include <stdio.h>
int a = 1;
int b;
int f(){
static int c = 3;
static int d;
return c+d;
}
int main(){
printf("a+b=%d\n", a+b);
printf("c+d=%d\n", f());
return 0;
}
不开优化,汇编如下:
a:
# 4bytes, value=1
.long 1
b:
# 没找到参考,推测这行代码表示,
# 4bytes未初始化空间,
# 与.long 0不一样的是,后者表示初始化为0。
.zero 4
f()::c:
# 初始化的static变量c
# f()::c,暴露了这是用C++编译器编译C代码!
.long 3
f():
push rbp
mov rbp, rsp
mov edx, DWORD PTR f()::c[rip]
# 未初始化的static变量d,
# 在汇编中,只有这一行代码,直接取值。
mov eax, DWORD PTR f()::d[rip]
add eax, edx
pop rbp
ret
.LC0:
.string "a+b=%d\n"
.LC1:
.string "c+d=%d\n"
main:
push rbp
mov rbp, rsp
mov edx, DWORD PTR a[rip]
mov eax, DWORD PTR b[rip]
add eax, edx
mov esi, eax
mov edi, OFFSET FLAT:.LC0
mov eax, 0
call printf
call f()
mov esi, eax
mov edi, OFFSET FLAT:.LC1
mov eax, 0
call printf
mov eax, 0
pop rbp
ret
前面几篇x64汇编学习,我都不小心使用了C++编译器编译C代码!...
用C++编译器编译C代码得到的汇编,与用C编译器得到的汇编,基本一致,只有一些表达格式上的区别。下面是C编译器得到的汇编:
a:
.long 1
b:
.zero 4
f:
push rbp
mov rbp, rsp
mov edx, DWORD PTR c.1[rip]
mov eax, DWORD PTR d.0[rip]
add eax, edx
pop rbp
ret
.LC0:
.string "a+b=%d\n"
.LC1:
.string "c+d=%d\n"
main:
push rbp
mov rbp, rsp
mov edx, DWORD PTR a[rip]
mov eax, DWORD PTR b[rip]
add eax, edx
mov esi, eax
mov edi, OFFSET FLAT:.LC0
mov eax, 0
call printf
mov eax, 0
call f
mov esi, eax
mov edi, OFFSET FLAT:.LC1
mov eax, 0
call printf
mov eax, 0
pop rbp
ret
c.1:
.long 3
-O3
f:
mov eax, 3
ret
.LC0:
.string "a+b=%d\n"
.LC1:
.string "c+d=%d\n"
main:
sub rsp, 8
mov edi, OFFSET FLAT:.LC0
mov esi, DWORD PTR b[rip]
xor eax, eax
add esi, DWORD PTR a[rip]
call printf
mov esi, 3
mov edi, OFFSET FLAT:.LC1
xor eax, eax
call printf
xor eax, eax
add rsp, 8
ret
b:
.zero 4
a:
.long 1
优化将无效的计算去掉,减少寄存器对数据的中转。
未初始化的global和static变量,编译器保证它们的初始值为0,它们位于object文件的.bss section。global变量是全局符号,static变量是局部符号。
$ readelf -s t2.o
Symbol table '.symtab' contains 13 entries:
Num: Value Size Type Bind Vis Ndx Name
0: 0000000000000000 0 NOTYPE LOCAL DEFAULT UND
1: 0000000000000000 0 FILE LOCAL DEFAULT ABS t2.c
2: 0000000000000000 0 SECTION LOCAL DEFAULT 1 .text
3: 0000000000000000 0 SECTION LOCAL DEFAULT 3 .data
4: 0000000000000000 0 SECTION LOCAL DEFAULT 4 .bss
5: 0000000000000004 4 OBJECT LOCAL DEFAULT 3 c.1
6: 0000000000000004 4 OBJECT LOCAL DEFAULT 4 d.0
7: 0000000000000000 0 SECTION LOCAL DEFAULT 5 .rodata
8: 0000000000000000 4 OBJECT GLOBAL DEFAULT 3 a
9: 0000000000000000 4 OBJECT GLOBAL DEFAULT 4 b
10: 0000000000000000 20 FUNC GLOBAL DEFAULT 1 f
11: 0000000000000014 69 FUNC GLOBAL DEFAULT 1 main
12: 0000000000000000 0 NOTYPE GLOBAL DEFAULT UND printf
修饰为c.1和d.0,是因为有可能在当前的编译单元中的别的函数中,存在重名的static变量。
$ nm t2.o
0000000000000000 D a
0000000000000000 B b
0000000000000004 d c.1
0000000000000004 b d.0
0000000000000000 T f
0000000000000014 T main
U printf
nm这个命令看符号表,大写字母表示全局符号,小写表示局部符号。
下面是这段代码object文件的反汇编:
$ objdump -M intel -d t2.o
t2.o: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <f>:
0: 55 push rbp
1: 48 89 e5 mov rbp,rsp
4: 8b 15 00 00 00 00 mov edx,DWORD PTR [rip+0x0] # a <f+0xa>
a: 8b 05 00 00 00 00 mov eax,DWORD PTR [rip+0x0] # 10 <f+0x10>
10: 01 d0 add eax,edx
12: 5d pop rbp
13: c3 ret
0000000000000014 <main>:
14: 55 push rbp
15: 48 89 e5 mov rbp,rsp
18: 8b 15 00 00 00 00 mov edx,DWORD PTR [rip+0x0] # 1e <main+0xa>
1e: 8b 05 00 00 00 00 mov eax,DWORD PTR [rip+0x0] # 24 <main+0x10>
24: 01 d0 add eax,edx
26: 89 c6 mov esi,eax
28: bf 00 00 00 00 mov edi,0x0
2d: b8 00 00 00 00 mov eax,0x0
32: e8 00 00 00 00 call 37 <main+0x23>
37: b8 00 00 00 00 mov eax,0x0
3c: e8 00 00 00 00 call 41 <main+0x2d>
41: 89 c6 mov esi,eax
43: bf 00 00 00 00 mov edi,0x0
48: b8 00 00 00 00 mov eax,0x0
4d: e8 00 00 00 00 call 52 <main+0x3e>
52: b8 00 00 00 00 mov eax,0x0
57: 5d pop rbp
58: c3 ret
.text section对应的重定位section信息:
$ readelf -r t2.o
Relocation section '.rela.text' at offset 0x2c8 contains 9 entries:
Offset Info Type Sym. Value Sym. Name + Addend
000000000006 000300000002 R_X86_64_PC32 0000000000000000 .data + 0
00000000000c 000400000002 R_X86_64_PC32 0000000000000000 .bss + 0
00000000001a 000800000002 R_X86_64_PC32 0000000000000000 a - 4
000000000020 000900000002 R_X86_64_PC32 0000000000000000 b - 4
000000000029 00070000000a R_X86_64_32 0000000000000000 .rodata + 0
000000000033 000c00000004 R_X86_64_PLT32 0000000000000000 printf - 4
00000000003d 000a00000004 R_X86_64_PLT32 0000000000000000 f - 4
000000000044 00070000000a R_X86_64_32 0000000000000000 .rodata + 8
00000000004e 000c00000004 R_X86_64_PLT32 0000000000000000 printf - 4
R_X86_64_PC32表示相对寻址修正S+A-P
S+A-P值计算出来后,这个值存放在重定位的位置,运行时指令用这个值+rip!Addend存在意义是,rip值是下一条指令的地址,S-P就是偏移,但是相对P的,P与rip值的差,就是这个Addend。
R_X86_64_32表示绝对寻址修正S+A
这是.rodata区域的内容:
$ readelf -x5 t2.o
Hex dump of section '.rodata':
0x00000000 612b623d 25640a00 632b643d 25640a00 a+b=%d..c+d=%d..
两个字符串,地址分别对应.rodata+0和.rodata+8。
本文链接:https://cs.pynote.net/hd/asm/202302101/
-- EOF --
-- MORE --