实战x86-64汇编(Linux)

Last Updated: 2023-11-23 15:21:37 Thursday

-- TOC --

本文主体内容来自:https://cs.lmu.edu/~ray/notes/gasexamples/,略有修改,带上我自己的学习笔记,以及用Intel语法的重写。有一些x64 register方面的基础知识,参考x86-64汇编基础

Hello World

# -------------------------------------------------------------------------
# Writes "Hello, World" to the console using only system calls. Runs on 64-bit Linux only.
# To assemble and run:
#
#     gcc -c hello.s && ld hello.o && ./a.out
#
# or
#
#     gcc -nostdlib hello.s && ./a.out
# -------------------------------------------------------------------------

        .global _start

        .text
_start:
        # write(1, message, 13)
        mov     $1, %rax                # system call 1 is write
        mov     $1, %rdi                # file handle 1 is stdout
        mov     $message, %rsi          # address of string to output
        mov     $13, %rdx               # number of bytes
        syscall                         # invoke operating system to do the write

        # exit(0)
        mov     $60, %rax               # system call 60 is exit
        xor     %rdi, %rdi              # we want return code 0
        syscall                         # invoke operating system to exit
message:
        .ascii  "Hello, world\n"
root/arch/x86/entry/syscalls/syscall_32.tbl
root/arch/x86/entry/syscalls/syscall_64.tbl

还可以直接使用as编译:

$ as hello.s -o hello.o
$ ld hello.o -o hello
$ ./hello
Hello, world

用Intel语法重写,有一处关键,就是在提取地址的时候,要加上offset关键词。同时,下面的代码,还增加了对len的处理,通过exit调用,返回len的值。

.intel_syntax noprefix

.data
msg:
    .ascii "Hello asm\n"  # 10 chars
    len = . - msg

.global _start
.text
_start:
    mov rax, 1  # write
    mov rdi, 1  # stdout
    # 也可以写成这样:  offset flat: msg
    mov rsi, offset msg  # offset is key
    mov rdx, 10
    syscall

    mov rax, 60   # exit
    mov rdi, len
    syscall

Working with the C Library

# -------------------------------------------------------------------------
# Writes "Hola, mundo" to the console using a C library. Runs on Linux or any other system
# that does not use underscores for symbols in its C library. To assemble and run:
#
#     gcc hola.s && ./a.out
# -------------------------------------------------------------------------

        .global main

        .text
main:                             # This is called by C library's startup code
        mov     $message, %rdi    # First integer (or pointer) parameter in %rdi
        call    puts              # puts(message)
        ret                       # Return to C library code
message:
        .asciz "Hola, mundo"      # asciz puts a 0 byte at the end

按照calling convention的规定,在call之前,rsp必须16字节对齐,但是此例没有对齐,编译后还能正常运行。用Intel语法重写,增加保持16字节对齐:

.intel_syntax noprefix

.section .rodata
msg:
    .asciz "Hello IIT"

.text
.global main
main:
    sub  rsp, 8   # or push rbp
    mov  rdi, offset msg
    call puts
    add  rsp, 8   # or pop rbp
    ret

Calling Conventions for 64-bit C Code

x86-64 Call Conventions

打印fibonacci数的汇编代码:

# ----------------------------------------------------------------------
# A 64-bit Linux application that writes the first 90 Fibonacci numbers.  It
# needs to be linked with a C library.
#
# Assemble and Link:
#     gcc fib.s
# --------------------------------------------------------------------

        .global main

        .text
main:
        push    %rbx                    # we have to save this since we use it

        mov     $90, %ecx               # ecx will countdown to 0
        xor     %rax, %rax              # rax will hold the current number
        xor     %rbx, %rbx              # rbx will hold the next number
        inc     %rbx                    # rbx is originally 1
print:
        # We need to call printf, but we are using eax, ebx, and ecx.  printf
        # may destroy eax and ecx so we will save these before the call and
        # restore them afterwards.

        push    %rax                    # caller-save register
        push    %rcx                    # caller-save register

        mov     $format, %rdi           # set 1st parameter (format)
        mov     %rax, %rsi              # set 2nd parameter (current_number)
        xor     %rax, %rax              # because printf is varargs

        # Stack is already aligned because we pushed three 8 byte registers
        call    printf                  # printf(format, current_number)

        pop     %rcx                    # restore caller-save register
        pop     %rax                    # restore caller-save register

        mov     %rax, %rdx              # save the current number
        mov     %rbx, %rax              # next number is now current
        add     %rdx, %rbx              # get the new next number
        dec     %ecx                    # count down
        jnz     print                   # if not done counting, do some more

        pop     %rbx                    # restore rbx before returning
        ret
format:
        .asciz  "%20ld\n"

用Intel语法重写,使用不同的寄存器,通过判断CF作为结束条件,增加了按十六进制打印输出:

.intel_syntax noprefix

.section .rodata
fmt:
    .asciz "%4d:%24lu 0x%lX\n"

.text
.global main
main:
    push r12
    push r13
    push r14
    xor r12d, r12d
    mov r13d, 1
    mov r14d, 1
    # show zero
    mov rdi, offset fmt
    mov rsi, r14
    mov edx, 0
    mov ecx, 0
    xor eax, eax
    call printf
show:  # start from 1
    inc r14
    mov rdi, offset fmt
    mov rsi, r14
    mov rdx, r13
    mov rcx, rdx
    xor eax, eax  # no float param
    call printf
add:
    mov rdx, r12
    mov r12, r13
    add r13, rdx
    jc end   # check if carry
    jmp show
end:
    pop r14
    pop r13
    pop r12
    ret

Command Line Arguments

打印main的argv。

# -----------------------------------------------------------------------------
# A 64-bit program that displays its commandline arguments, one per line.
#
# On entry, %rdi will contain argc and %rsi will contain argv.
# -----------------------------------------------------------------------------

        .global main

        .text
main:
        push    %rdi                    # save registers that puts uses
        push    %rsi
        sub     $8, %rsp                # must align stack before call

        mov     (%rsi), %rdi            # the argument string to display
        call    puts                    # print it

        add     $8, %rsp                # restore %rsp to pre-aligned value
        pop     %rsi                    # restore registers puts used
        pop     %rdi

        add     $8, %rsi                # point to next argument
        dec     %rdi                    # count down
        jnz     main                    # if not done counting keep going

        ret

用Intel语法改写,功能升级:字符串转数字的x64汇编实现

calc power

# -----------------------------------------------------------------------------
# A 64-bit command line application to compute x^y.
#
# Syntax: power x y
# x and y are integers
# -----------------------------------------------------------------------------

        .global main

        .text
main:
        push    %r12                    # save callee-save registers
        push    %r13
        push    %r14
        # By pushing 3 registers our stack is already aligned for calls

        cmp     $3, %rdi                # must have exactly two arguments
        jne     error1

        mov     %rsi, %r12              # argv

# We will use ecx to count down form the exponent to zero, esi to hold the
# value of the base, and eax to hold the running product.

        mov     16(%r12), %rdi          # argv[2]
        call    atoi                    # y in eax
        cmp     $0, %eax                # disallow negative exponents
        jl      error2
        mov     %eax, %r13d             # y in r13d

        mov     8(%r12), %rdi           # argv
        call    atoi                    # x in eax
        mov     %eax, %r14d             # x in r14d

        mov     $1, %eax                # start with answer = 1
check:
        test    %r13d, %r13d            # we're counting y downto 0
        jz      gotit                   # done
        imul    %r14d, %eax             # multiply in another x
        dec     %r13d
        jmp     check
gotit:                                  # print report on success
        mov     $answer, %rdi
        movslq  %eax, %rsi
        xor     %rax, %rax
        call    printf
        jmp     done
error1:                                 # print error message
        mov     $badArgumentCount, %edi
        call    puts
        jmp     done
error2:                                 # print error message
        mov     $negativeExponent, %edi
        call    puts
done:                                   # restore saved registers
        pop     %r14
        pop     %r13
        pop     %r12
        ret

answer:
        .asciz  "%d\n"
badArgumentCount:
        .asciz  "Requires exactly two arguments\n"
negativeExponent:
        .asciz  "The exponent may not be negative\n"

用Intel语法重写:

.intel_syntax noprefix


.section .rodata
param_err:
    .string "I needs 2 parameters."
exp_err:
    .string "exponent may not be negative."
answer:
    .string "the power is: %ld\n"


.global main
.text
main:
    push r12
    push r13
    push r14
    cmp  edi, 3
    jne  error1
    # rsi would crash after atoi
    mov  r12, rsi
    mov  rdi, [r12+16]
    call atoi
    cmp  eax, 0
    jl   error2
    mov  r13, rax  # exponent
    mov  rdi, [r12+8]
    call atoi
    mov  r14, rax  # base
    # start from 1
    mov  eax, 1
check:
    test r13, r13
    jz   gotit
    imul rax, r14
    dec  r13
    jmp  check
gotit:
    mov  rdi, offset answer
    mov  rsi, rax
    xor  eax, eax
    call printf
    jmp  done
error1:
    mov  rdi, offset param_err
    call puts
    jmp  done
error2:
    mov  rdi, offset exp_err
    call puts
done:
    pop  r14
    pop  r13
    pop  r12
    ret

编译和运行输出:

$ gcc pow.s -o pow
$ ./pow 2 63
the power is: -9223372036854775808

Floating Point Instructions

用汇编写个计算浮点数array的sum的接口,然后用C语言调用。现在浮点数计算,都是用SSE相关指令,使用xmm寄存器。

# -----------------------------------------------------------------------------
# A 64-bit function that returns the sum of the elements in a floating-point
# array. The function has prototype:
#
#   double sum(double[] array, unsigned length)
# -----------------------------------------------------------------------------

        .global sum
        .text
sum:
        xorpd   %xmm0, %xmm0            # initialize the sum to 0
        cmp     $0, %rsi                # special case for length = 0
        je      done
next:
        addsd   (%rdi), %xmm0           # add in the current array element
        add     $8, %rdi                # move to next array element
        dec     %rsi                    # count down
        jnz     next                    # if not done counting, continue
done:
        ret                             # return value already in xmm0

Intel语法版本:

.intel_syntax noprefix

.global sum
.text
sum:
    xorpd xmm0, xmm0
    cmp rsi, 0
    jz done
next:
    addsd xmm0, [rdi]
    add rdi, 8
    dec rsi
    jnz next
done:
    ret

C代码:

#include <stdio.h>

double sum(double[], unsigned);

int main() {
    double test[] = {
        40.5, 26.7, 21.9, 1.5, -40.5, -23.4
    };
    printf("%20.7f\n", sum(test, 6));
    printf("%20.7f\n", sum(test, 2));
    printf("%20.7f\n", sum(test, 0));
    printf("%20.7f\n", sum(test, 3));
    return 0;
}

编译运行:

$ gcc callsum.c sum.s -o cs
$ ./cs
          26.7000000
          67.2000000
           0.0000000
          89.1000000

Data Sections

计算命令行上输入数的平均数。

# -----------------------------------------------------------------------------
# 64-bit program that treats all its command line arguments as integers and
# displays their average as a floating point number. This program uses a data
# section to store intermediate results, not that it has to, but only to
# illustrate how data sections are used.
# -----------------------------------------------------------------------------

        .globl  main

        .text
main:
        dec     %rdi                    # argc-1, since we don't count program name
        jz      nothingToAverage
        mov     %rdi, count             # save number of real arguments
accumulate:
        push    %rdi                    # save register across call to atoi
        push    %rsi
        mov     (%rsi,%rdi,8), %rdi     # argv[rdi]
        call    atoi                    # now rax has the int value of arg
        pop     %rsi                    # restore registers after atoi call
        pop     %rdi
        add     %rax, sum               # accumulate sum as we go
        dec     %rdi                    # count down
        jnz     accumulate              # more arguments?
average:
        cvtsi2sd sum, %xmm0
        cvtsi2sd count, %xmm1
        divsd   %xmm1, %xmm0            # xmm0 is sum/count
        mov     $format, %rdi           # 1st arg to printf
        mov     $1, %rax                # printf is varargs, there is 1 non-int argument

        sub     $8, %rsp                # align stack pointer
        call    printf                  # printf(format, sum/count)
        add     $8, %rsp                # restore stack pointer

        ret

nothingToAverage:
        mov     $error, %rdi
        xor     %rax, %rax
        call    printf
        ret

        .data
count:  .quad   0
sum:    .quad   0
format: .asciz  "%g\n"
error:  .asciz  "There are no command line arguments to average\n"

Intel语法重置版:

.intel_syntax noprefix


.section .rodata
nojob_msg:
    .string "nothing needs to be done."
avg_msg:
    .string "avg: %f\n"


.data
count: .quad 0
sum:   .quad 0


.global main
.text
main:
    push r12
    push r13
    push r14
    dec rdi
    jz nojob

    mov count, rdi
    mov r12, rdi
    mov r13, rsi
acc:
    mov rdi, [r13+r12*8]
    call atoi
    add sum, rax
    dec r12
    jnz acc

    cvtsi2sd xmm0, QWORD PTR [sum]
    cvtsi2sd xmm1, QWORD PTR [count]
    divsd xmm0, xmm1
    mov rdi, offset avg_msg
    mov rax, 1
    call printf
    jmp done
nojob:
    mov rdi, offset nojob_msg
    call puts
done:
    pop r14
    pop r13
    pop r12
    ret

运行效果:

$ gcc avg.s -o avg
$ ./avg 1 2 3 4 5
avg: 3.000000

Recursion

汇编也可以call自己。

# ----------------------------------------------------------------------------
# A 64-bit recursive implementation of the function
#
#     uint64_t factorial(unsigned n)
#
# implemented recursively
# ----------------------------------------------------------------------------

        .globl  factorial

        .text
factorial:
        cmp     $1, %rdi                # n <= 1?
        jnbe    L1                      # if not, go do a recursive call
        mov     $1, %rax                # otherwise return 1
        ret
L1:
        push    %rdi                    # save n on stack (also aligns %rsp!)
        dec     %rdi                    # n-1
        call    factorial               # factorial(n-1), result goes in %rax
        pop     %rdi                    # restore n
        imul    %rdi, %rax              # n * factorial(n-1), stored in %rax
        ret

Intel syntax version:

.intel_syntax noprefix

.global factorial
.text
factorial:
    cmp rdi, 1
    ja recur
    mov rax, 1
    ret
recur:
    push rdi
    dec rdi
    call factorial
    pop rdi
    imul rax, rdi
    ret

caller in C:

#include <stdio.h>
#include <inttypes.h>

uint64_t factorial(unsigned n);

int main() {
    for (unsigned i = 0; i < 10; i++) {
        printf("factorial(%2u) = %lu\n", i, factorial(i));
    }
}

运行效果:

$ gcc call_recur.c recur.s -o recur
$ ./recur
factorial( 0) = 1
factorial( 1) = 1
factorial( 2) = 2
factorial( 3) = 6
factorial( 4) = 24
factorial( 5) = 120
factorial( 6) = 720
factorial( 7) = 5040
factorial( 8) = 40320
factorial( 9) = 362880

本文链接:https://cs.pynote.net/hd/asm/202212093/

-- EOF --

-- MORE --