Last Updated: 2023-11-23 15:21:37 Thursday
-- TOC --
本文主体内容来自:https://cs.lmu.edu/~ray/notes/gasexamples/,略有修改,带上我自己的学习笔记,以及用Intel语法的重写。有一些x64 register方面的基础知识,参考x86-64汇编基础。
# -------------------------------------------------------------------------
# Writes "Hello, World" to the console using only system calls. Runs on 64-bit Linux only.
# To assemble and run:
#
# gcc -c hello.s && ld hello.o && ./a.out
#
# or
#
# gcc -nostdlib hello.s && ./a.out
# -------------------------------------------------------------------------
.global _start
.text
_start:
# write(1, message, 13)
mov $1, %rax # system call 1 is write
mov $1, %rdi # file handle 1 is stdout
mov $message, %rsi # address of string to output
mov $13, %rdx # number of bytes
syscall # invoke operating system to do the write
# exit(0)
mov $60, %rax # system call 60 is exit
xor %rdi, %rdi # we want return code 0
syscall # invoke operating system to exit
message:
.ascii "Hello, world\n"
#
是注释。rax
存放system call number。mov
后面register的位数,来确定自己的位数。(还有些x64汇编指令也具有这个特性)syscall
:system call,在32位的i386时代,用int $0x80
中断进入系统调用,性能较差。不同的调用指令,system call number不一样,所使用的register也不一样!root/arch/x86/entry/syscalls/syscall_32.tbl
root/arch/x86/entry/syscalls/syscall_64.tbl
call
:library call_start
,这是进程真正的入口,用.global
定义为全局符号。ld默认会搜索这个symbol作为入口,main在它里面调用。一般的C代码看不到这个入口,它在stdlib中。message
是一个local符号,它是代码的一部分,虽然只是只读数据。(代码和数据混在一起的情况,就是这样).ascii
用来定义ASCII字符串。gcc -nostdlib hello.s
,如果不指定-nostdlib
,会出现multiple definition of _start
的错误,而且,此时ld会寻找main入口,显然stdlib中的_start被使用了。还可以直接使用as
编译:
$ as hello.s -o hello.o
$ ld hello.o -o hello
$ ./hello
Hello, world
用Intel语法重写,有一处关键,就是在提取地址的时候,要加上offset关键词。同时,下面的代码,还增加了对len的处理,通过exit调用,返回len的值。
.intel_syntax noprefix
.data
msg:
.ascii "Hello asm\n" # 10 chars
len = . - msg
.global _start
.text
_start:
mov rax, 1 # write
mov rdi, 1 # stdout
# 也可以写成这样: offset flat: msg
mov rsi, offset msg # offset is key
mov rdx, 10
syscall
mov rax, 60 # exit
mov rdi, len
syscall
offset
前缀!大小写都OK!dsdc89
。# -------------------------------------------------------------------------
# Writes "Hola, mundo" to the console using a C library. Runs on Linux or any other system
# that does not use underscores for symbols in its C library. To assemble and run:
#
# gcc hola.s && ./a.out
# -------------------------------------------------------------------------
.global main
.text
main: # This is called by C library's startup code
mov $message, %rdi # First integer (or pointer) parameter in %rdi
call puts # puts(message)
ret # Return to C library code
message:
.asciz "Hola, mundo" # asciz puts a 0 byte at the end
does not use underscores for symbols in its C library
。.asciz
,自动在字符串后面加上\0
,结束字符串。ret
指令从栈中弹出返回地址给rip寄存器。(返回后由caller清栈,即对rsp最加法)。按照calling convention的规定,在call之前,rsp必须16字节对齐,但是此例没有对齐,编译后还能正常运行。用Intel语法重写,增加保持16字节对齐:
.intel_syntax noprefix
.section .rodata
msg:
.asciz "Hello IIT"
.text
.global main
main:
sub rsp, 8 # or push rbp
mov rdi, offset msg
call puts
add rsp, 8 # or pop rbp
ret
.section .rodata
,定义只读数据区。打印fibonacci数的汇编代码:
# ----------------------------------------------------------------------
# A 64-bit Linux application that writes the first 90 Fibonacci numbers. It
# needs to be linked with a C library.
#
# Assemble and Link:
# gcc fib.s
# --------------------------------------------------------------------
.global main
.text
main:
push %rbx # we have to save this since we use it
mov $90, %ecx # ecx will countdown to 0
xor %rax, %rax # rax will hold the current number
xor %rbx, %rbx # rbx will hold the next number
inc %rbx # rbx is originally 1
print:
# We need to call printf, but we are using eax, ebx, and ecx. printf
# may destroy eax and ecx so we will save these before the call and
# restore them afterwards.
push %rax # caller-save register
push %rcx # caller-save register
mov $format, %rdi # set 1st parameter (format)
mov %rax, %rsi # set 2nd parameter (current_number)
xor %rax, %rax # because printf is varargs
# Stack is already aligned because we pushed three 8 byte registers
call printf # printf(format, current_number)
pop %rcx # restore caller-save register
pop %rax # restore caller-save register
mov %rax, %rdx # save the current number
mov %rbx, %rax # next number is now current
add %rdx, %rbx # get the new next number
dec %ecx # count down
jnz print # if not done counting, do some more
pop %rbx # restore rbx before returning
ret
format:
.asciz "%20ld\n"
xor ecx, ecx
。用Intel语法重写,使用不同的寄存器,通过判断CF作为结束条件,增加了按十六进制打印输出:
.intel_syntax noprefix
.section .rodata
fmt:
.asciz "%4d:%24lu 0x%lX\n"
.text
.global main
main:
push r12
push r13
push r14
xor r12d, r12d
mov r13d, 1
mov r14d, 1
# show zero
mov rdi, offset fmt
mov rsi, r14
mov edx, 0
mov ecx, 0
xor eax, eax
call printf
show: # start from 1
inc r14
mov rdi, offset fmt
mov rsi, r14
mov rdx, r13
mov rcx, rdx
xor eax, eax # no float param
call printf
add:
mov rdx, r12
mov r12, r13
add r13, rdx
jc end # check if carry
jmp show
end:
pop r14
pop r13
pop r12
ret
bb12215
,上述代码用到了r12,r13和r14。(尝试了r10和r11,这两个寄存器一样会被printf调用破坏,通过gdb跟踪代码查看寄存器可观察到)打印main的argv。
# -----------------------------------------------------------------------------
# A 64-bit program that displays its commandline arguments, one per line.
#
# On entry, %rdi will contain argc and %rsi will contain argv.
# -----------------------------------------------------------------------------
.global main
.text
main:
push %rdi # save registers that puts uses
push %rsi
sub $8, %rsp # must align stack before call
mov (%rsi), %rdi # the argument string to display
call puts # print it
add $8, %rsp # restore %rsp to pre-aligned value
pop %rsi # restore registers puts used
pop %rdi
add $8, %rsi # point to next argument
dec %rdi # count down
jnz main # if not done counting keep going
ret
char**
类型,因此(%rsi)
才是指向参数字符串的开始地址。add $8, %rsi
,这是在内存中向高地址偏移。用Intel语法改写,功能升级:字符串转数字的x64汇编实现
# -----------------------------------------------------------------------------
# A 64-bit command line application to compute x^y.
#
# Syntax: power x y
# x and y are integers
# -----------------------------------------------------------------------------
.global main
.text
main:
push %r12 # save callee-save registers
push %r13
push %r14
# By pushing 3 registers our stack is already aligned for calls
cmp $3, %rdi # must have exactly two arguments
jne error1
mov %rsi, %r12 # argv
# We will use ecx to count down form the exponent to zero, esi to hold the
# value of the base, and eax to hold the running product.
mov 16(%r12), %rdi # argv[2]
call atoi # y in eax
cmp $0, %eax # disallow negative exponents
jl error2
mov %eax, %r13d # y in r13d
mov 8(%r12), %rdi # argv
call atoi # x in eax
mov %eax, %r14d # x in r14d
mov $1, %eax # start with answer = 1
check:
test %r13d, %r13d # we're counting y downto 0
jz gotit # done
imul %r14d, %eax # multiply in another x
dec %r13d
jmp check
gotit: # print report on success
mov $answer, %rdi
movslq %eax, %rsi
xor %rax, %rax
call printf
jmp done
error1: # print error message
mov $badArgumentCount, %edi
call puts
jmp done
error2: # print error message
mov $negativeExponent, %edi
call puts
done: # restore saved registers
pop %r14
pop %r13
pop %r12
ret
answer:
.asciz "%d\n"
badArgumentCount:
.asciz "Requires exactly two arguments\n"
negativeExponent:
.asciz "The exponent may not be negative\n"
call atoi
,命令行传进来的都是string。call printf
之前的xor是否可以不需要?应该不行,它用来告诉variadic printf,没有浮点数参数。(自己写测试代码,用gcc编译后检查汇编,可以看到,有几个浮点数,eax就是几)用Intel语法重写:
.intel_syntax noprefix
.section .rodata
param_err:
.string "I needs 2 parameters."
exp_err:
.string "exponent may not be negative."
answer:
.string "the power is: %ld\n"
.global main
.text
main:
push r12
push r13
push r14
cmp edi, 3
jne error1
# rsi would crash after atoi
mov r12, rsi
mov rdi, [r12+16]
call atoi
cmp eax, 0
jl error2
mov r13, rax # exponent
mov rdi, [r12+8]
call atoi
mov r14, rax # base
# start from 1
mov eax, 1
check:
test r13, r13
jz gotit
imul rax, r14
dec r13
jmp check
gotit:
mov rdi, offset answer
mov rsi, rax
xor eax, eax
call printf
jmp done
error1:
mov rdi, offset param_err
call puts
jmp done
error2:
mov rdi, offset exp_err
call puts
done:
pop r14
pop r13
pop r12
ret
编译和运行输出:
$ gcc pow.s -o pow
$ ./pow 2 63
the power is: -9223372036854775808
用汇编写个计算浮点数array的sum的接口,然后用C语言调用。现在浮点数计算,都是用SSE相关指令,使用xmm寄存器。
# -----------------------------------------------------------------------------
# A 64-bit function that returns the sum of the elements in a floating-point
# array. The function has prototype:
#
# double sum(double[] array, unsigned length)
# -----------------------------------------------------------------------------
.global sum
.text
sum:
xorpd %xmm0, %xmm0 # initialize the sum to 0
cmp $0, %rsi # special case for length = 0
je done
next:
addsd (%rdi), %xmm0 # add in the current array element
add $8, %rdi # move to next array element
dec %rsi # count down
jnz next # if not done counting, continue
done:
ret # return value already in xmm0
Intel语法版本:
.intel_syntax noprefix
.global sum
.text
sum:
xorpd xmm0, xmm0
cmp rsi, 0
jz done
next:
addsd xmm0, [rdi]
add rdi, 8
dec rsi
jnz next
done:
ret
C代码:
#include <stdio.h>
double sum(double[], unsigned);
int main() {
double test[] = {
40.5, 26.7, 21.9, 1.5, -40.5, -23.4
};
printf("%20.7f\n", sum(test, 6));
printf("%20.7f\n", sum(test, 2));
printf("%20.7f\n", sum(test, 0));
printf("%20.7f\n", sum(test, 3));
return 0;
}
编译运行:
$ gcc callsum.c sum.s -o cs
$ ./cs
26.7000000
67.2000000
0.0000000
89.1000000
计算命令行上输入数的平均数。
# -----------------------------------------------------------------------------
# 64-bit program that treats all its command line arguments as integers and
# displays their average as a floating point number. This program uses a data
# section to store intermediate results, not that it has to, but only to
# illustrate how data sections are used.
# -----------------------------------------------------------------------------
.globl main
.text
main:
dec %rdi # argc-1, since we don't count program name
jz nothingToAverage
mov %rdi, count # save number of real arguments
accumulate:
push %rdi # save register across call to atoi
push %rsi
mov (%rsi,%rdi,8), %rdi # argv[rdi]
call atoi # now rax has the int value of arg
pop %rsi # restore registers after atoi call
pop %rdi
add %rax, sum # accumulate sum as we go
dec %rdi # count down
jnz accumulate # more arguments?
average:
cvtsi2sd sum, %xmm0
cvtsi2sd count, %xmm1
divsd %xmm1, %xmm0 # xmm0 is sum/count
mov $format, %rdi # 1st arg to printf
mov $1, %rax # printf is varargs, there is 1 non-int argument
sub $8, %rsp # align stack pointer
call printf # printf(format, sum/count)
add $8, %rsp # restore stack pointer
ret
nothingToAverage:
mov $error, %rdi
xor %rax, %rax
call printf
ret
.data
count: .quad 0
sum: .quad 0
format: .asciz "%g\n"
error: .asciz "There are no command line arguments to average\n"
mov (%rsi,%rdi,8), %rdi
,%rsi是起始地址,%rdi是偏移,8表示单位长度,就像C代码中的指针+1,是移动sizeof(type)的长度。cvtsi2sd
,convert scalar int to scalar double。call printf
前后的sub和add必须要有,否则segmentation fault。Intel语法重置版:
.intel_syntax noprefix
.section .rodata
nojob_msg:
.string "nothing needs to be done."
avg_msg:
.string "avg: %f\n"
.data
count: .quad 0
sum: .quad 0
.global main
.text
main:
push r12
push r13
push r14
dec rdi
jz nojob
mov count, rdi
mov r12, rdi
mov r13, rsi
acc:
mov rdi, [r13+r12*8]
call atoi
add sum, rax
dec r12
jnz acc
cvtsi2sd xmm0, QWORD PTR [sum]
cvtsi2sd xmm1, QWORD PTR [count]
divsd xmm0, xmm1
mov rdi, offset avg_msg
mov rax, 1
call printf
jmp done
nojob:
mov rdi, offset nojob_msg
call puts
done:
pop r14
pop r13
pop r12
ret
运行效果:
$ gcc avg.s -o avg
$ ./avg 1 2 3 4 5
avg: 3.000000
汇编也可以call自己。
# ----------------------------------------------------------------------------
# A 64-bit recursive implementation of the function
#
# uint64_t factorial(unsigned n)
#
# implemented recursively
# ----------------------------------------------------------------------------
.globl factorial
.text
factorial:
cmp $1, %rdi # n <= 1?
jnbe L1 # if not, go do a recursive call
mov $1, %rax # otherwise return 1
ret
L1:
push %rdi # save n on stack (also aligns %rsp!)
dec %rdi # n-1
call factorial # factorial(n-1), result goes in %rax
pop %rdi # restore n
imul %rdi, %rax # n * factorial(n-1), stored in %rax
ret
Intel syntax version:
.intel_syntax noprefix
.global factorial
.text
factorial:
cmp rdi, 1
ja recur
mov rax, 1
ret
recur:
push rdi
dec rdi
call factorial
pop rdi
imul rax, rdi
ret
caller in C:
#include <stdio.h>
#include <inttypes.h>
uint64_t factorial(unsigned n);
int main() {
for (unsigned i = 0; i < 10; i++) {
printf("factorial(%2u) = %lu\n", i, factorial(i));
}
}
运行效果:
$ gcc call_recur.c recur.s -o recur
$ ./recur
factorial( 0) = 1
factorial( 1) = 1
factorial( 2) = 2
factorial( 3) = 6
factorial( 4) = 24
factorial( 5) = 120
factorial( 6) = 720
factorial( 7) = 5040
factorial( 8) = 40320
factorial( 9) = 362880
本文链接:https://cs.pynote.net/hd/asm/202212093/
-- EOF --
-- MORE --