x64汇编学习(5)-- Global & Static Variables

Last Updated: 2023-10-02 14:19:01 Monday

-- TOC --

测试用C代码如下:

#include <stdio.h>

int a = 1;
int b;

int f(){
    static int c = 3;
    static int d;
    return c+d;
}

int main(){
    printf("a+b=%d\n", a+b);
    printf("c+d=%d\n", f());
    return 0;
}

不开优化,汇编如下:

a:
        # 4bytes, value=1
        .long   1
b:
        # 没找到参考,推测这行代码表示,
        # 4bytes未初始化空间,
        # 与.long 0不一样的是,后者表示初始化为0。
        .zero   4
f()::c:
        # 初始化的static变量c
        # f()::c,暴露了这是用C++编译器编译C代码!
        .long   3
f():
        push    rbp
        mov     rbp, rsp
        mov     edx, DWORD PTR f()::c[rip]
        # 未初始化的static变量d,
        # 在汇编中,只有这一行代码,直接取值。
        mov     eax, DWORD PTR f()::d[rip]
        add     eax, edx
        pop     rbp
        ret
.LC0:
        .string "a+b=%d\n"
.LC1:
        .string "c+d=%d\n"
main:
        push    rbp
        mov     rbp, rsp
        mov     edx, DWORD PTR a[rip]
        mov     eax, DWORD PTR b[rip]
        add     eax, edx
        mov     esi, eax
        mov     edi, OFFSET FLAT:.LC0
        mov     eax, 0
        call    printf
        call    f()
        mov     esi, eax
        mov     edi, OFFSET FLAT:.LC1
        mov     eax, 0
        call    printf
        mov     eax, 0
        pop     rbp
        ret

前面几篇x64汇编学习,我都不小心使用了C++编译器编译C代码!...

用C++编译器编译C代码得到的汇编,与用C编译器得到的汇编,基本一致,只有一些表达格式上的区别。下面是C编译器得到的汇编:

a:
        .long   1
b:
        .zero   4
f:
        push    rbp
        mov     rbp, rsp
        mov     edx, DWORD PTR c.1[rip]
        mov     eax, DWORD PTR d.0[rip]
        add     eax, edx
        pop     rbp
        ret
.LC0:
        .string "a+b=%d\n"
.LC1:
        .string "c+d=%d\n"
main:
        push    rbp
        mov     rbp, rsp
        mov     edx, DWORD PTR a[rip]
        mov     eax, DWORD PTR b[rip]
        add     eax, edx
        mov     esi, eax
        mov     edi, OFFSET FLAT:.LC0
        mov     eax, 0
        call    printf
        mov     eax, 0
        call    f
        mov     esi, eax
        mov     edi, OFFSET FLAT:.LC1
        mov     eax, 0
        call    printf
        mov     eax, 0
        pop     rbp
        ret
c.1:
        .long   3

-O3

f:
        mov     eax, 3
        ret
.LC0:
        .string "a+b=%d\n"
.LC1:
        .string "c+d=%d\n"
main:
        sub     rsp, 8
        mov     edi, OFFSET FLAT:.LC0
        mov     esi, DWORD PTR b[rip]
        xor     eax, eax
        add     esi, DWORD PTR a[rip]
        call    printf
        mov     esi, 3
        mov     edi, OFFSET FLAT:.LC1
        xor     eax, eax
        call    printf
        xor     eax, eax
        add     rsp, 8
        ret
b:
        .zero   4
a:
        .long   1

优化将无效的计算去掉,减少寄存器对数据的中转。


未初始化的global和static变量,编译器保证它们的初始值为0,它们位于object文件的.bss section。global变量是全局符号,static变量是局部符号。

$ readelf -s t2.o

Symbol table '.symtab' contains 13 entries:
   Num:    Value          Size Type    Bind   Vis      Ndx Name
     0: 0000000000000000     0 NOTYPE  LOCAL  DEFAULT  UND
     1: 0000000000000000     0 FILE    LOCAL  DEFAULT  ABS t2.c
     2: 0000000000000000     0 SECTION LOCAL  DEFAULT    1 .text
     3: 0000000000000000     0 SECTION LOCAL  DEFAULT    3 .data
     4: 0000000000000000     0 SECTION LOCAL  DEFAULT    4 .bss
     5: 0000000000000004     4 OBJECT  LOCAL  DEFAULT    3 c.1
     6: 0000000000000004     4 OBJECT  LOCAL  DEFAULT    4 d.0
     7: 0000000000000000     0 SECTION LOCAL  DEFAULT    5 .rodata
     8: 0000000000000000     4 OBJECT  GLOBAL DEFAULT    3 a
     9: 0000000000000000     4 OBJECT  GLOBAL DEFAULT    4 b
    10: 0000000000000000    20 FUNC    GLOBAL DEFAULT    1 f
    11: 0000000000000014    69 FUNC    GLOBAL DEFAULT    1 main
    12: 0000000000000000     0 NOTYPE  GLOBAL DEFAULT  UND printf

修饰为c.1和d.0,是因为有可能在当前的编译单元中的别的函数中,存在重名的static变量。

$ nm t2.o
0000000000000000 D a
0000000000000000 B b
0000000000000004 d c.1
0000000000000004 b d.0
0000000000000000 T f
0000000000000014 T main
                 U printf

nm这个命令看符号表,大写字母表示全局符号,小写表示局部符号。


下面是这段代码object文件的反汇编:

$ objdump -M intel -d t2.o

t2.o:     file format elf64-x86-64


Disassembly of section .text:

0000000000000000 <f>:
   0:   55                      push   rbp
   1:   48 89 e5                mov    rbp,rsp
   4:   8b 15 00 00 00 00       mov    edx,DWORD PTR [rip+0x0]        # a <f+0xa>
   a:   8b 05 00 00 00 00       mov    eax,DWORD PTR [rip+0x0]        # 10 <f+0x10>
  10:   01 d0                   add    eax,edx
  12:   5d                      pop    rbp
  13:   c3                      ret

0000000000000014 <main>:
  14:   55                      push   rbp
  15:   48 89 e5                mov    rbp,rsp
  18:   8b 15 00 00 00 00       mov    edx,DWORD PTR [rip+0x0]        # 1e <main+0xa>
  1e:   8b 05 00 00 00 00       mov    eax,DWORD PTR [rip+0x0]        # 24 <main+0x10>
  24:   01 d0                   add    eax,edx
  26:   89 c6                   mov    esi,eax
  28:   bf 00 00 00 00          mov    edi,0x0
  2d:   b8 00 00 00 00          mov    eax,0x0
  32:   e8 00 00 00 00          call   37 <main+0x23>
  37:   b8 00 00 00 00          mov    eax,0x0
  3c:   e8 00 00 00 00          call   41 <main+0x2d>
  41:   89 c6                   mov    esi,eax
  43:   bf 00 00 00 00          mov    edi,0x0
  48:   b8 00 00 00 00          mov    eax,0x0
  4d:   e8 00 00 00 00          call   52 <main+0x3e>
  52:   b8 00 00 00 00          mov    eax,0x0
  57:   5d                      pop    rbp
  58:   c3                      ret

.text section对应的重定位section信息:

$ readelf -r t2.o

Relocation section '.rela.text' at offset 0x2c8 contains 9 entries:
  Offset          Info           Type           Sym. Value    Sym. Name + Addend
000000000006  000300000002 R_X86_64_PC32     0000000000000000 .data + 0
00000000000c  000400000002 R_X86_64_PC32     0000000000000000 .bss + 0
00000000001a  000800000002 R_X86_64_PC32     0000000000000000 a - 4
000000000020  000900000002 R_X86_64_PC32     0000000000000000 b - 4
000000000029  00070000000a R_X86_64_32       0000000000000000 .rodata + 0
000000000033  000c00000004 R_X86_64_PLT32    0000000000000000 printf - 4
00000000003d  000a00000004 R_X86_64_PLT32    0000000000000000 f - 4
000000000044  00070000000a R_X86_64_32       0000000000000000 .rodata + 8
00000000004e  000c00000004 R_X86_64_PLT32    0000000000000000 printf - 4

R_X86_64_PC32表示相对寻址修正S+A-P

S+A-P值计算出来后,这个值存放在重定位的位置,运行时指令用这个值+rip!Addend存在意义是,rip值是下一条指令的地址,S-P就是偏移,但是相对P的,P与rip值的差,就是这个Addend。

R_X86_64_32表示绝对寻址修正S+A

这是.rodata区域的内容:

$ readelf -x5 t2.o

Hex dump of section '.rodata':
  0x00000000 612b623d 25640a00 632b643d 25640a00 a+b=%d..c+d=%d..

两个字符串,地址分别对应.rodata+0和.rodata+8。

本文链接:https://cs.pynote.net/hd/asm/202302101/

-- EOF --

-- MORE --