写在前面
我们将首先从 afl-as 开始，这里是AFL的源码插桩部分。理论上来说应该从编译入手会更递进一些，但是插桩是发生在编译的过程中的，而且AFL的编译内容较多，先了解插桩部分会有助于编译部分的逻辑理解，也符合Fuzz的实际流程。
备注：AFL的源码分析网上已有很多公开文章，文章内容难免会有重合。我们希望大家能在这一系列中融入自己的思考，切实保证自己能体会到AFL的优秀之处。此外，如果大家有什么建议，欢迎交流。

afl-as.c

0. 效果

我们首先来观察一下使用 afl-gcc 编译的文件的样子。使用的源码如下：

#include <stdio.h>

void show(int a, int b){

	int  x, y, z= 0;
	x = a;
	y = b;
	if( x > y){
	  z = x + y;
	}else{
	  z = y - x;
	}
	printf("[+] Z is %d", z);
}

int main(){

  show(1,3);
  show(7,2);
  return 0;
}

使用 afl-gcc 对以上源码进行编译：

AFL_KEEP_ASSEMBLY=1 ./afl-gcc -g -o example example.c
# 这里使用 AFL_KEEP_ASSEMBLY 环境变量来保留中间生成的.s汇编文件

查看生成的中间 .s 文件：

	.file	"example.c"
	.text
.Ltext0:
	.file 0 "/home/v4ler1an/Documents/AFL_debug/AFL/cmake-build-debug/tmp" "example.c"
	.section	.rodata.str1.1,"aMS",@progbits,1
.LC0:
	.string	"[+] Z is %d"
	.text
	.p2align 4
	.globl	show
	.type	show, @function
show:
.LVL0:
.LFB23:
	.file 1 "example.c"
	.loc 1 3 24 view -0
	.cfi_startproc
	.loc 1 3 24 is_stmt 0 view .LVU1

/* --- AFL TRAMPOLINE (64-BIT) --- */

.align 4

leaq -(128+24)(%rsp), %rsp
movq %rdx,  0(%rsp)
movq %rcx,  8(%rsp)
movq %rax, 16(%rsp)
movq $0x00001fa9, %rcx
call __afl_maybe_log
movq 16(%rsp), %rax
movq  8(%rsp), %rcx
movq  0(%rsp), %rdx
leaq (128+24)(%rsp), %rsp

/* --- END --- */

	endbr64
	.loc 1 5 2 is_stmt 1 view .LVU2
.LVL1:
	.loc 1 6 2 view .LVU3
	.loc 1 7 2 view .LVU4
	.loc 1 8 2 view .LVU5
	.loc 1 9 6 is_stmt 0 view .LVU6
	movl	%esi, %edx
	leal	(%rdi,%rsi), %eax
	subl	%edi, %edx
	cmpl	%esi, %edi
.LBB12:

其中的 AFL TRAMPOLINE 部分就是 afl-as.h 文件中的桩代码。我们反汇编生成的二进制文件，首先看下main函数：

然后是show函数：

然后Bindiff看下两个程序的对比：

我们可以明显看到非原生代码 _afl_maybe_log() ，这就是AFL插入的桩代码，我们这里只在show函数中设置了一个if语句，所以插桩逻辑十分简单，只在main函数和show函数中各进行了一次插桩。该过程由 afl-as.c 中的逻辑来完成，其核心作用就是探测、反馈程序此时的状态，这会修改程序的原执行流。我们会在后续详细解释桩代码。

1. 文件描述

afl-as是AFL使用的汇编器，这里做成wrapper主要目的是为了进行插桩，AFL的插桩逻辑都在该文件中完成，而桩代码位于 afl-as.h 头文件中。

2. 文件架构

文件涉及的头文件调用关系如下：

与前面的 afl-gcc.c 文件基本相同，但多了对 afl-as.h 的包含，此外还多了几个与时间和进程相关的头文件。

afl-as.c 文件主要包含三个函数：main、edit_params 、add_instrumentation：

3. 源码分析

1. 部分关键变量

static u8** as_params;          /* Parameters passed to the real 'as'   */

static u8*  input_file;         /* Originally specified input file      */
static u8*  modified_file;      /* Instrumented file for the real 'as'  */

static u8   be_quiet,           /* Quiet mode (no stderr output)        */
            clang_mode,         /* Running in clang mode?               */
            pass_thru,          /* Just pass data through?              */
            just_version,       /* Just show version?                   */
            sanitizer;          /* Using ASAN / MSAN                    */

static u32  inst_ratio = 100,   /* Instrumentation probability (%)      */
            as_par_cnt = 1;     /* Number of params to 'as'             */

as_params 与 afl-gcc.c 中的 cc_params 一样，作为接收处理后的参数传递给as；input_file 是需要编译的输入文件；modified_file 是经过插桩的源码文件；接下来的几个 u8 类型变量是一些模式参数；inst_ratio 是插桩百分比，该变量可以控制在源码中的插桩密度，需要注意的是插桩越多，编译速度越慢；as_par_cnt 是最终传递给as的所有参数的总量。

2. main函数

main函数主要作为程序入口，进行一些基本处理，其调用的函数关系如下：

主要是调用 edit_params 和 add_instrumentation 函数完成程序的主要功能。在此之外，还调用了一下系统库中的函数进行辅助处理。下面通过源码来梳理它的处理流程：

/* Main entry point */

int main(int argc, char** argv) {
	... ...
  u8* inst_ratio_str = getenv("AFL_INST_RATIO");

	... ...
  clang_mode = !!getenv(CLANG_ENV_VAR);

  if (isatty(2) && !getenv("AFL_QUIET")) {

    SAYF(cCYA "afl-as " cBRI VERSION cRST " by <lcamtuf@google.com>\n");
 
  } else be_quiet = 1;

  if (argc < 2) {
	... ...
  }

  gettimeofday(&tv, &tz);
  rand_seed = tv.tv_sec ^ tv.tv_usec ^ getpid();
  srandom(rand_seed);
  edit_params(argc, argv);

  if (inst_ratio_str) {
    if (sscanf(inst_ratio_str, "%u", &inst_ratio) != 1 || inst_ratio > 100) 
      FATAL("Bad value of AFL_INST_RATIO (must be between 0 and 100)");
  }

  if (getenv(AS_LOOP_ENV_VAR))
    FATAL("Endless loop when calling 'as' (remove '.' from your PATH)");

  setenv(AS_LOOP_ENV_VAR, "1", 1);

  /* When compiling with ASAN, we don't have a particularly elegant way to skip
     ASAN-specific branches. But we can probabilistically compensate for
     that... */

  if (getenv("AFL_USE_ASAN") || getenv("AFL_USE_MSAN")) {
    sanitizer = 1;
    inst_ratio /= 3;
  }

  if (!just_version) add_instrumentation();

  if (!(pid = fork())) {
		
    // 打印处理完之后的参数
    printf("\n");
    for (int i =0 ; i < sizeof(as_params); i++){
        printf("as_params[%d]:%s\n", i, as_params[i]);
    }
    execvp(as_params[0], (char**)as_params);
    FATAL("Oops, failed to execute '%s' - check your PATH", as_params[0]);
  }

  if (pid < 0) PFATAL("fork() failed");
  if (waitpid(pid, &status, 0) <= 0) PFATAL("waitpid() failed");
  if (!getenv("AFL_KEEP_ASSEMBLY")) unlink(modified_file);
  exit(WEXITSTATUS(status));
}

获取环境变量 AFL_INST_RATIO，赋值给 inst_ratio_str，该环境变量主要控制检测每个分支的概率，取值为0到100%，设置为0时则只检测函数入口的跳转，而不会检测函数分支的跳转；
通过 “当前时间+当前进程id”的方式获取一个随机数传给sradom()函数，生成随机数种子；
调用 edit_params(argc, argv) 函数进行参数处理；
判断 inst_ratio_str 是否进行了设置，如果没有则设置为100；
设置 AS_LOOP_ENV_VAR 环境变量的值为1，这是一个内部环境变量；
读取环境变量 AFL_USE_ASAN 和 AFL_USE_MSAN 的值，如果其中有一个为1，则设置sanitizer为1，且将inst_ratio除3。因为在进行ASAN的编译时，AFL无法识别出ASAN特定的分支，导致插入很多无意义的桩代码，所以直接暴力地将插桩概率除以3；
调用 add_instrumentation() 函数进行插桩；
fork 一个子进程来执行 execvp(as_params[0], (char**)as_params);。这里采用的是 fork 一个子进程的方式来执行插桩。这是因为 execvp 执行的时候，会用 as_params[0] 来完全替换掉当前进程空间中的程序，这样就可以在执行完成之后 unlink 掉经过插桩的 modified_file（其实就是中间产生的.s汇编文件）；
调用 waitpid(pid, &status, 0) 等待子进程执行结束；
读取环境变量 AFL_KEEP_ASSEMBLY 的值，如果没有设置这个环境变量，就 unlink 掉 modified_file(已插完桩的文件)。设置该环境变量主要是为了防止 afl-as 删掉插桩后的汇编文件，设置为1则会保留插桩后的汇编文件。

main 函数的主要功能还是处理各种环境变量和数据，对参数的处理在 edit_params函数中，插桩功能在 add_instrumentation 函数中。main 函数把程序的执行放在了 fork 出的子进程中，这样就可以“优雅”地处理中间文件。

3. edit_params函数

该函数的主要职责还是在运行真正的as之前先处理一下参数选项，最后存放在 as_params 中。此外，还会设置一下 use_64bit/modified_file 的值。

/* Examine and modify parameters to pass to 'as'. Note that the file name
   is always the last parameter passed by GCC, so we exploit this property
   to keep the code simple. */

static void edit_params(int argc, char** argv) {

  u8 *tmp_dir = getenv("TMPDIR"), *afl_as = getenv("AFL_AS");
	... ...
  if (!tmp_dir) tmp_dir = getenv("TEMP");
  if (!tmp_dir) tmp_dir = getenv("TMP");
  if (!tmp_dir) tmp_dir = "/tmp";

  as_params = ck_alloc((argc + 32) * sizeof(u8*));
  as_params[0] = afl_as ? afl_as : (u8*)"as";
  as_params[argc] = 0;

  for (i = 1; i < argc - 1; i++) {
    if (!strcmp(argv[i], "--64")) use_64bit = 1;
    else if (!strcmp(argv[i], "--32")) use_64bit = 0;
		... ...
    as_params[as_par_cnt++] = argv[i];

  }

		... ...
  input_file = argv[argc - 1];

  if (input_file[0] == '-') {
    if (!strcmp(input_file + 1, "-version")) {
      just_version = 1;
      modified_file = input_file;
      goto wrap_things_up;
    }

    if (input_file[1]) FATAL("Incorrect use (not called through afl-gcc?)");
      else input_file = NULL;

  } else {

    if (strncmp(input_file, tmp_dir, strlen(tmp_dir)) &&
        strncmp(input_file, "/var/tmp/", 9) &&
        strncmp(input_file, "/tmp/", 5)) pass_thru = 1;

  }

  modified_file = alloc_printf("%s/.afl-%u-%u.s", tmp_dir, getpid(),
                               (u32)time(NULL));

wrap_things_up:

  as_params[as_par_cnt++] = modified_file;
  as_params[as_par_cnt]   = NULL;
}

依次检查环境变量 TMPDIR/TEMP/TMP, 确定 tmp_dir 的路径，都没有则设置为 /tmp，获取环境变量 AFL_AS 给到 afl_as；
ck_alloc((argc + 32) * sizeof(u8*)) 为 as_params 分配内存空间；
设置 afl-as 路径：as_params[0] = afl_as ? afl_as : (u8*)"as";
设置 as_params[argc] = 0; ，as_par_cnt 初始值为1；
通过一个 for 循环来检查参数中是否有 --64，如果有则设置 use_64bit=1；如果有 --32 则设置 use_64bit=0。最后，as_params[as_par_cnt++] = argv[i];设置as_params的值为argv对应的参数值，结束for循环；
设置 input_file 变量：input_file = argv[argc - 1];，把最后一个参数的值作为 input_file：
1. 如果 input_file 的首字符为-：
  1. 如果后续为 -version，则 just_version = 1, modified_file = input_file，然后跳转到wrap_things_up。这里就只是做version的查询；
  2. 如果后续不为 -version，抛出异常；
2. 如果 input_file 首字符不为-，比较 input_file 和 tmp_dir、/var/tmp 、/tmp/的前 strlen(tmp_dir)/9/5个字节是否相同，如果不相同，就设置 pass_thru 为1；
设置modified_file的值为alloc_printf("%s/.afl-%u-%u.s", tmp_dir, getpid(),(u32) time(NULL));,简单的说就是tmp_dir/.afl-pid-time.s这样的字符串。

4. add_instrumentation函数

该函数执行了插桩操作，桩代码来自于 afl-as.h 文件中。函数源码如下：

static void add_instrumentation(void) {

	... ...
  if (input_file) {

    inf = fopen(input_file, "r");
    if (!inf) PFATAL("Unable to read '%s'", input_file);

  } else inf = stdin;

  outfd = open(modified_file, O_WRONLY | O_EXCL | O_CREAT, 0600);

  if (outfd < 0) PFATAL("Unable to write to '%s'", modified_file);

  outf = fdopen(outfd, "w");

  if (!outf) PFATAL("fdopen() failed");  

  while (fgets(line, MAX_LINE, inf)) {

    if (!pass_thru && !skip_intel && !skip_app && !skip_csect && instr_ok &&
        instrument_next && line[0] == '\t' && isalpha(line[1])) {

      fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32,
              R(MAP_SIZE));

      instrument_next = 0;
      ins_lines++;

    }

    fputs(line, outf);

    if (pass_thru) continue;
    if (line[0] == '\t' && line[1] == '.') {
      if (!clang_mode && instr_ok && !strncmp(line + 2, "p2align ", 8) &&
          isdigit(line[10]) && line[11] == '\n') skip_next_label = 1;
      if (!strncmp(line + 2, "text\n", 5) ||
          !strncmp(line + 2, "section\t.text", 13) ||
          !strncmp(line + 2, "section\t__TEXT,__text", 21) ||
          !strncmp(line + 2, "section __TEXT,__text", 21)) {
        instr_ok = 1;
        continue; 
      }
      if (!strncmp(line + 2, "section\t", 8) ||
          !strncmp(line + 2, "section ", 8) ||
          !strncmp(line + 2, "bss\n", 4) ||
          !strncmp(line + 2, "data\n", 5)) {
        instr_ok = 0;
        continue;
      }
    }
    if (strstr(line, ".code")) {
      if (strstr(line, ".code32")) skip_csect = use_64bit;
      if (strstr(line, ".code64")) skip_csect = !use_64bit;
    }
    if (strstr(line, ".intel_syntax")) skip_intel = 1;
    if (strstr(line, ".att_syntax")) skip_intel = 0;
    if (line[0] == '#' || line[1] == '#') {
      if (strstr(line, "#APP")) skip_app = 1;
      if (strstr(line, "#NO_APP")) skip_app = 0;
    }
    if (skip_intel || skip_app || skip_csect || !instr_ok ||
        line[0] == '#' || line[0] == ' ') continue;
    if (line[0] == '\t') {
      if (line[1] == 'j' && line[2] != 'm' && R(100) < inst_ratio) {
        fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32,
                R(MAP_SIZE));
        ins_lines++;
      }
      continue;
    }
... ...
    if (strstr(line, ":")) {
      if (line[0] == '.') {
... ...
        if ((isdigit(line[2]) || (clang_mode && !strncmp(line + 1, "LBB", 3)))
            && R(100) < inst_ratio) {
          if (!skip_next_label) instrument_next = 1; else skip_next_label = 0;
        }
      } else {
        instrument_next = 1;
      }
    }
  }

  if (ins_lines)
    fputs(use_64bit ? main_payload_64 : main_payload_32, outf);
  if (input_file) fclose(inf);
  fclose(outf);
  if (!be_quiet) {
    if (!ins_lines) WARNF("No instrumentation targets found%s.",
                          pass_thru ? " (pass-thru mode)" : "");
    else OKF("Instrumented %u locations (%s-bit, %s mode, ratio %u%%).",
             ins_lines, use_64bit ? "64" : "32",
             getenv("AFL_HARDEN") ? "hardened" : 
             (sanitizer ? "ASAN/MSAN" : "non-hardened"),
             inst_ratio);
   }
}

fopen 打开输入文件，失败抛出异常，成功则读取标准输入，最终获取 FILE* 指针给 inf；
打开 modified_file ，获取fd赋值给 outfd，失败返回异常；进一步验证该文件是否可写，不可写返回异常；
检查 !pass_thru && !skip_intel && !skip_app && !skip_csect && instr_ok &&instrument_next && line[0] == '\t' && isalpha(line[1])，上述条件都满足的情况，是希望将桩代码插入到所有的label、macros、comments之后，此时直接使用 fprintf 将桩代码 trampoline_fmt_* 写入到 outf 中，并设置 instrument_next = 0，插桩计数器 ins_lines++；
设置一个while 循环，读取 inf 指向的输入文件的每一行到 line 数组中，每行最多 MAX_LINE = 8192 个字节（包含末尾的‘\0’）。从line数组里将读取到的内容写入到 outf 指向的文件，然后进入到真正的插桩逻辑。这里需要注意的是，插桩只向 .text 段插入：
1. 首先判断读入的行是否以‘\t’ 开头，本质上是在匹配.s文件中声明的段，然后判断line[1]是否为.；
  1. 检查 clang_mode和 instr_ok变量，instr_ok 变量指定了是否位于 .text 节。检查是否为 p2align 指令，如果是，则设置 skip_next_label = 1；
  2. 尝试匹配 "text\n" "section\t.text" "section\t__TEXT,__text" "section __TEXT,__text" 其中任意一个，匹配成功，设置 instr_ok = 1，表示位于 .text 段中，continue 跳出 while，读取下一行数据到 line 数组；
  3. 尝试匹配"section\t" "section " "bss\n" "data\n" 其中任意一个，匹配成功，设置 instr_ok = 0，表位于其他段中，continue 跳出 while，读取下一行数据到 line 数组；
2. 使用4个 if 语句处理格式，来设置一些标志信息，包括 off-flavor assembly，Intel/AT&T的块处理方式、ad-hoc __asm__块的处理方式等；
3. AFL在插桩时重点关注的内容包括：^main, ^.L0, ^.LBB0_0, ^\tjnz foo （_main函数， gcc和clang下的分支标记，条件跳转分支标记），这些内容通常标志了程序的流程变化，因此AFL会重点在这些位置进行插桩；
4. 条件跳转分支插桩（jnz等），会插在分支后面以及分支目标标签位置
  1. 匹配形如\tj[^m].格式的指令，即条件跳转指令，且R(100)产生的随机数小于插桩密度inst_ratio；
  2. 使用fprintf将trampoline_fmt_64(插桩部分的指令)写入 outf 指向的文件，写入大小为小于 MAP_SIZE的随机数R(MAP_SIZE)。写入32位还是64位根据 use_64bit 变量进行判断；
  3. 插桩计数ins_lines加一，continue 跳出，进行下一次遍历；
5. label插桩，有些label会是一些branch的跳转位置
  1. 首先判断是否以.L开始，然后判断L之后是否为数字或者是否满足在clang mode下，line 为 LBB（L<num> / LBB<num>）
    1. 如果匹配到，在满足插桩密度以及未设置 skip_next_label 的情况下，instrument_next = 1，即设置成 defer mode；否则设置 skip_next_label = 0
  2. 如果只匹配到了 line 中存在 : 但是并不是以 L 开头，说明是 Function label，instrument_next = 1
6. 完成后进入 while 的下一次循环，在下一次循环的开头，对于以 deferred mode 进行插桩的位置调用 fprintf 进行插桩。
如果插桩计数器 ins_lines 不为0，就在完全拷贝 input_file 之后，根据是32位还是64位向 outf 中写入 main_payload_64 或者 main_payload_32，然后关闭 inf 和 outf 文件。

通过上面的插桩过程，我们可以看到 AFL 判断在哪些地方进行插桩的判断依据是汇编指令的前导命令。我们最后通过对比图来看一下插桩前后的不同。首先是 show 函数：

然后是 main 函数：

最后在文件结尾插入 AFL MAIN PAYLOAD:

afl-as.h

1. 文件描述

该文件包含了桩代码 instrumentation trampoline，这些代码会根据适当的场景插入到待fuzz程序中。桩代码会以经过异或的数据对的格式保存当前执行分支的标识和前一个分支的标识，表示起来就是 shm_trace_map[cur_loc ^ prev_loc]++。

桩代码包含32位和64位，除了Apple平台之外，其他平台理论上都可以成功插桩。

2. 文件架构

因为是头文件，所以主要是各结构的定义：

3. 源码分析

在进行函数源码分析前，先介绍几个bss段的变量：

.AFL_VARS:

  .lcomm   __afl_area_ptr, 8						->	共享内存地址
  .lcomm   __afl_prev_loc, 8						->	上一个插桩位置（R(MAP_SIZE)随机数的值）
  .lcomm   __afl_fork_pid, 4						->  fork生成的子进程id
  .lcomm   __afl_temp, 4								->	buffer
  .lcomm   __afl_setup_failure, 1				->	判定setup是否成功的标志位，置位表示失败，直接退出
  .comm    __afl_global_area_ptr, 8, 8	->	全局指针

1. trampoline_fmt_64

备注：这里我们分析64位的桩代码，32位只是使用的寄存器和数据对齐的偏移不同。

/* --- AFL TRAMPOLINE (64-BIT) --- */

.align 4

leaq -(128+24)(%rsp), %rsp
movq %rdx,  0(%rsp)
movq %rcx,  8(%rsp)
movq %rax, 16(%rsp)
movq $0x%08x, %rcx
call __afl_maybe_log
movq 16(%rsp), %rax
movq  8(%rsp), %rcx
movq  0(%rsp), %rdx
leaq (128+24)(%rsp), %rsp

/* --- END --- */

首先保存 rdx、rcx、rax寄存器到栈上；
设置 rcx 的值为 fprintf() 要打印的变量内容
调用 __afl_maybe_log
恢复先前保存的寄存器

主要就是为了调用 __afl_maybe_log 进行各种设置，而 __afl_maybe_log 放在 AFL MAIN PAYLOAD 中，是一段由汇编编写的功能函数。

这里我们解释一下 movq $0x%08x, %rcx这条指令的含义。首先我们回顾 afl-as.c 中对 fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32, R(MAP_SIZE)) 的调用，rcx 中最终保存的值为 R(MAP_SIZE) 。R(x) 函数定义在 types.h 文件中，#define R(x) (random() % (x))。 MAP_SIZE 的定义在config.h 文件中：
#define MAP_SIZE_POW2       16
#define MAP_SIZE            (1 << MAP_SIZE_POW2)
所以这里调用 R(MAP_SIZE) 的意思就是生成一个 0到 MAP_SIZE 之间的随机数。

在处理到某个分支进行插桩时，afl-as 生成一个随机数，然后运行时保存在 rcx 寄存器中，作为代码块的标识。后续介绍该标识的使用。

2. 整体流程

3. __afl_maybe_log

__afl_maybe_log:

  lahf
  seto  %al

  /* Check if SHM region is already mapped. */

  movq  __afl_area_ptr(%rip), %rdx
  testq %rdx, %rdx
  je    __afl_setup

使用 lahf 将标识寄存器低8位放入AF，即保存 FLAGS 中的SF、ZF、AF、PF、CF五个标志位到AH，然后使用 seto 溢出置位；
检查 __afl_area_ptr 是否为空，该变量主要判断共享内存是否进行了设置。为空表示共享内存还没有进行映射设置；
1. 如果 __afl_area_ptr为0，跳转到 __afl_setup 进行设置
2. 如果不为0，表示已设置共享内存，继续运行

3. __afl_setup

该部分主要做 __afl_area_ptr 的检查和设置。从这里可以看出，只有在运行到第一个桩代码的时候才会进行本次的初始化。

__afl_setup:

  /* Do not retry setup if we had previous failures. */

  cmpb $0, __afl_setup_failure(%rip)
  jne __afl_return

  /* Check out if we have a global pointer on file. */

  movq  __afl_global_area_ptr@GOTPCREL(%rip), %rdx
  movq  (%rdx), %rdx
  testq %rdx, %rdx
  je    __afl_setup_first

  movq %rdx, __afl_area_ptr(%rip)
  jmp  __afl_store

判断 __afl_setup_failure 是否为真，如果为真表示 setup 工作已经失败了一次，不进行第二次尝试，直接跳转到 __afl_return 返回；
判断全局指针 __afl_global_area_ptr ，如果不为空则赋值给 __afl_area_ptr，跳转到 __afl_store ；如果为空则跳转到 __afl_setup_first 先去进行第一次的 setup 工作。

4. __afl_setup_first

进行第一次的 setup 工作：

__afl_setup_first:

  /* Save everything that is not yet saved and that may be touched by
     getenv() and several other libcalls we'll be relying on. */

  leaq -352(%rsp), %rsp

  movq %rax,   0(%rsp)
	... ...
  movq %r11,  64(%rsp)

  movq %xmm0,  96(%rsp)
	... ...
  movq %xmm15, 336(%rsp)

  /* Map SHM, jumping to __afl_setup_abort if something goes wrong. */

  /* The 64-bit ABI requires 16-byte stack alignment. We'll keep the
     original stack ptr in the callee-saved r12. */

  pushq %r12
  movq  %rsp, %r12
  subq  $16, %rsp
  andq  $0xfffffffffffffff0, %rsp

  leaq .AFL_SHM_ENV(%rip), %rdi
call getenv@PLT

  testq %rax, %rax
  je    __afl_setup_abort

  movq  %rax, %rdi
call atoi@PLT

  xorq %rdx, %rdx   /* shmat flags    */
  xorq %rsi, %rsi   /* requested addr */
  movq %rax, %rdi   /* SHM ID         */
call shmat@PLT

  cmpq $-1, %rax
  je   __afl_setup_abort

  /* Store the address of the SHM region. */

  movq %rax, %rdx
  movq %rax, __afl_area_ptr(%rip)

  movq __afl_global_area_ptr@GOTPCREL(%rip), %rdx
  movq %rax, (%rdx)
  movq %rax, %rdx

保存所有寄存器，包括 xmm 寄存器组；
rsp 对齐；
通过 getenv 获取环境变量 AFL_SHM_ENV 的值，该环境变量存放的是共享内存的id。如果获取失败，跳转到 __afl_setup_abort ;
调用 shmat 启用对共享内存的访问，启用失败跳转到 __afl_setup_abort;
将 shmat 返回的共享内存地址存储在 __afl_area_ptr 和 __afl_global_area_ptr 变量中；
没有其他错误则开始进入 __afl_forkserver

5. __afl_forkserver

启动 fork server：

__afl_forkserver:

  /* Enter the fork server mode to avoid the overhead of execve() calls. We
     push rdx (area ptr) twice to keep stack alignment neat. */

  pushq %rdx
  pushq %rdx

  /* Phone home and tell the parent that we're OK. (Note that signals with
     no SA_RESTART will mess it up). If this fails, assume that the fd is
     closed because we were execve()d from an instrumented binary, or because
     the parent doesn't want to use the fork server. */

  movq $4, %rdx               /* length    */
  leaq __afl_temp(%rip), %rsi /* data      */
  movq $(198 + 1), %rdi       /* file desc */
call write@PLT

  cmpq $4, %rax
  jne  __afl_fork_resume

首先通过两次调用 pushq 指令来对齐栈；
然后调用 write(199, &__afl_temp, 4) 向 FORKSRV_FD+1也就是199号描述符（这里表示的是状态管道）中写出 __afl_temp 中的4个字节，告知 afl 进程 fork server已经启动成功；
根据返回结果判断是否跳转到 __afl_fork_resume
接下来会进入到 _-afl_fork_wait_loop

6. __afl_fork_wait_loop

  __afl_fork_wait_loop:
  
    /* Wait for parent by reading from the pipe. Abort if read fails. */
  
    movq $4, %rdx               /* length    */
    leaq __afl_temp(%rip), %rsi /* data      */
    movq $ STRINGIFY(FORKSRV_FD) , %rdi             /* file desc */
  CALL_L64(read)
    cmpq $4, %rax
    jne  __afl_die
  
    /* Once woken up, create a clone of our process. This is an excellent use
       case for syscall(__NR_clone, 0, CLONE_PARENT), but glibc boneheadedly
       caches getpid() results and offers no way to update the value, breaking
       abort(), raise(), and a bunch of other things :-( */
  
  CALL_L64(fork)
    cmpq $0, %rax
    jl   __afl_die
    je   __afl_fork_resume
  
    /* In parent process: write PID to pipe, then wait for child. */
  
    movl %eax, __afl_fork_pid(%rip)
  
    movq $4, %rdx                   /* length    */
    leaq __afl_fork_pid(%rip), %rsi /* data      */
    movq $ STRINGIFY((FORKSRV_FD + 1)) , %rdi             /* file desc */
  CALL_L64(write)
  
    movq $0, %rdx                   /* no flags  */
    leaq __afl_temp(%rip), %rsi     /* status    */
    movq __afl_fork_pid(%rip), %rdi /* PID       */
  CALL_L64(waitpid)
    cmpq $0, %rax
    jle  __afl_die
  
    /* Relay wait status to pipe, then loop back. */
  
    movq $4, %rdx               /* length    */
    leaq __afl_temp(%rip), %rsi /* data      */
    movq $ STRINGIFY((FORKSRV_FD + 1)) , %rdi         /* file desc */
  CALL_L64(write)
  
    jmp  __afl_fork_wait_loop

read 从 FORKSRV_FD （即198，控制管道）中读取字节到 __afl_temp 中，读取失败直接跳转到 __afl_die 中结束循环；读取成功则继续；
fork 一个子进程，原来的父进程充当 fork server 和fuzz进行通信，子进程跳转到 __afl_fork_resume继续执行 target；
父进程将子进程的 pid 赋值给 __afl_fork_pid，并写入到 FORKSRV_FD+1 状态管道，通知给fuzz；
父进程即fork server等待子进程结束，并保存其执行结果到_afl_temp中，然后将子进程的执行结果，从_afl_temp写入到状态管道，告知fuzz；
父进程不断轮询__afl_fork_wait_loop循环，不断从控制管道读取，直到fuzz端命令fork server进行新一轮测试。

7. __afl_form_resume

__afl_fork_resume:
  
    /* In child process: close fds, resume execution. */
  
    movq $ STRINGIFY(FORKSRV_FD) , %rdi
  CALL_L64(close)
  
    movq $ STRINGIFY((FORKSRV_FD + 1)) , %rdi
  CALL_L64(close)
  
    popq %rdx
    popq %rdx
  
    movq %r12, %rsp
    popq %r12
  
    movq  0(%rsp), %rax
		... ...
    movq 64(%rsp), %r11
  
    movq  96(%rsp), %xmm0
		... ...
    movq 336(%rsp), %xmm15
  
    leaq 352(%rsp), %rsp
  
    jmp  __afl_store

关闭子进程的fd
回复子进程的寄存器状态
跳转到 __afl_store 去执行

8. __afl_store

该部分是在共享内存设置完成后，进行插桩标识计算的逻辑：

__afl_store:
 
  /* Calculate and store hit for the code location specified in rcx. */
 
  xorq __afl_prev_loc(%rip), %rcx
  xorq %rcx, __afl_prev_loc(%rip)
  shrq $1, __afl_prev_loc(%rip)
 
  incb (%rdx, %rcx, 1)

上述代码的反编译的结果如下：

这里的 rcx 其实使用的是 a4，而 a4 来自于 __afl_maybe_log 传入：

我们根据 __afl_maybe_log 的内部实现可知，rcx 表示的值是 fprintf() 插桩时产生的随机数，也就是每个桩代码的标识。

那么__afl_store 的逻辑就变成：

cur_location = <COMPILE_TIME_RANDOM>;
shared_mem[cur_location ^ prev_location]++; 
prev_location = cur_location >> 1;

首先获取当前桩标识 cur_location；
然后与上一个桩标识(prev_location) 进行异或操作，并使共享内存中对应的槽的值加1；
最后将 prev_location 设置为 cur_location >> 1。

总结起来就是 AFL 会为每个代码块生成一个随机数，将其作为代码块"位置"的标识；然后，对分支处的“源位置”和“目标位置”进行异或操作，并将结果作为该分支的key，保存每个分支的执行次数。用于保存执行次数的本质上是一个hash table。大小为 MAP_SIZE=64K。

为什么要右移一位？

AFL主要考虑如下情况：如果此分支是A->A和B->B这样的情况那么异或之后就会都变成0，进而使得无法区分。亦或者考虑：A->B与B->A的情况，异或后的key也是一样的，难以区分。

9. __afl_maybe_log的反编译结果

如果感觉纯汇编理解有难度，可以通过 __afl_maybe_log 的反编译结果来理解整个过程。

char __fastcall _afl_maybe_log(__int64 a1, __int64 a2, __int64 a3, __int64 a4)
{
  char v4; // of
  char v5; // al
  __int64 v6__afl_area_ptr; // rdx
  __int64 tmp; // rcx
  char *v9; // rax
  int v10; // eax
  void *shared_memory; // rax
  int FORKSRV_FD; // edi
  __int64 v13; // rax
  __int64 v14; // rax
  __int64 v15; // [rsp-10h] [rbp-180h]
  char v16; // [rsp+10h] [rbp-160h]
  __int64 v17; // [rsp+18h] [rbp-158h]

  v5 = v4;
  v6__afl_area_ptr = _afl_area_ptr;
  if ( !_afl_area_ptr )
  {
    if ( _afl_setup_failure )
      return v5 + 127;
    v6__afl_area_ptr = _afl_global_area_ptr;    // 尝试去获取 __afl_global_area_ptr 变量
    if ( _afl_global_area_ptr )
    {
      _afl_area_ptr = _afl_global_area_ptr;
    }
    else                                        // 如果没有__afl_global_area_ptr
    {
      v16 = v4;
      v17 = a4;
      v9 = getenv("__AFL_SHM_ID");
      if ( !v9 || (v10 = atoi(v9), shared_memory = shmat(v10, 0LL, 0), shared_memory == (void *)-1LL) )
      {
        ++_afl_setup_failure;
        v5 = v16;
        return v5 + 127;
      }
      _afl_area_ptr = (__int64)shared_memory;
      _afl_global_area_ptr = shared_memory;
      v15 = (__int64)shared_memory;
      if ( write(199, &_afl_temp, 4uLL) == 4 )  // 向 FORKSRV_FD+1 （状态管道）中写4字节数据
      {
        while ( 1 )                             // 轮询
        {
          FORKSRV_FD = 198;
          if ( read(198, &_afl_temp, 4uLL) != 4 )// 从控制管道读取4字节数据
            break;
          LODWORD(v13) = fork();                // 起一个子进程
          if ( v13 < 0 )
            break;
          if ( !v13 )
            goto __afl_fork_resume;
          _afl_fork_pid = v13;
          write(199, &_afl_fork_pid, 4uLL);     // 把子进程pid写入状态管道，通知fuzz
          FORKSRV_FD = _afl_fork_pid;
          LODWORD(v14) = waitpid(_afl_fork_pid, &_afl_temp, 0);// 将执行结果存放到_afl_temp
          if ( v14 <= 0 )
            break;
          write(199, &_afl_temp, 4uLL);         // 将结果写入状态管道，通知fuzz
        }
        _exit(FORKSRV_FD);
      }
__afl_fork_resume:
      close(198);
      close(199);
      v6__afl_area_ptr = v15;
      v5 = v16;
      a4 = v17;
    }
  }
  tmp = _afl_prev_loc ^ a4;                     // cur_location = <COMPILE_TIME_RANDOM>;
  _afl_prev_loc ^= tmp;
  _afl_prev_loc = (unsigned __int64)_afl_prev_loc >> 1;// prev_location = cur_location >> 1;
  ++*(_BYTE *)(v6__afl_area_ptr + tmp);         // shared_mem[cur_location ^ prev_location]++; 
  return v5 + 127;
}

AFL源码分析系列（一）-- afl-as

afl-as.c

0. 效果

1. 文件描述

2. 文件架构

3. 源码分析

1. 部分关键变量

2. main函数

3. edit_params函数

4. add_instrumentation函数

afl-as.h

1. 文件描述

2. 文件架构

3. 源码分析

1. trampoline_fmt_64

2. 整体流程

3. __afl_maybe_log

3. __afl_setup

4. __afl_setup_first

5. __afl_forkserver

6. __afl_fork_wait_loop

7. __afl_form_resume

8. __afl_store

9. __afl_maybe_log的反编译结果

参与评论

全部评论 4