dyld
通过更新Mach-O二进制文件中特定__DATA
段的指针来绑定惰性和非惰性符号。fishhook通过传递给rebind_symbols
的符号名来确定需要更新的位置,然后用相应的替换项重新绑定这些符号。
对于给定的镜像,__DATA
段可以包含与动态符号绑定相关的两个部分:__nl_symbol_ptr
和__la_symbol_ptr
。
__nl_symbol_ptr
是指向非延迟绑定数据的指针数组(这些指针在加载库时绑定)。
__la_symbol_ptr
是指向导入函数的指针数组,通常在第一次调用该符号时由名为dyld_stub_binder
的例程填充(也可以在启动时告诉dyld
绑定这些指针)。
为了找到对应于这些部分中某个特定位置的符号的名称,我们需要通过几个间接层来进行查看。
对于两个相关部分,section header
(<mach-o/loader.h>
中声明的struct section
)提供一个偏移量(在reserved1
字段中)到所谓的间接符号表中。
间接符号表位于二进制文件的__LINKEDIT
段中,它只是符号表(也在__LINKEDIT
中)中的索引数组,其顺序与非惰性和惰性符号部分中的指针顺序相同。因此,struct section nl_symbol_ptr
,该部分中第一个地址的符号表中的对应索引是indirect_symbol_table[nl_symbol_ptr->reserved1]
。
符号表本身是一个struct nlist
数组(请参见<mach-o/nlist.h>
),每个nlist
都包含一个指向__LINKEDIT
中字符串表的索引,其中存储了实际的符号名。因此,对于每个指针__nl_symbol_ptr
和__la_symbol_ptr
,我们都可以找到相应的符号,然后找到相应的字符串与请求的符号名进行比较,如果有匹配项,我们用替换项替换节中的指针。
//---------------------------------更改NSLog----------- //函数指针 static void(*sys_nslog)(NSString * format,...); //定义一个新的函数 void my_nslog(NSString * format,...){ format = [format stringByAppendingString:@"你咋又来了 \n"]; //调用原始的 sys_nslog(format); } @implementation ViewController - (void)viewDidLoad { [super viewDidLoad]; NSLog(@"log来了,老弟"); struct rebinding nslog; nslog.name = "NSLog"; nslog.replacement = my_nslog; nslog.replaced = (void *)&sys_nslog; struct rebinding rebs[1] = {nslog}; rebind_symbols(rebs, 1); NSLog(@"log来了,老弟"); } @end 复制代码
运行结果:
2020-03-16 09:47:38.526862+0800 Demo[28657:5210895] log来了,老弟 2020-03-16 09:47:38.536892+0800 Demo[28657:5210895] log来了,老弟你咋又来了 复制代码
MachOView会弹出输入框让你输入PID。
这个PID在Xcode的Show the Debug navigator菜单下,可以用⌘ + 7快速切过来。这里我们可以看到进程的PID,输入到上面的框中。
struct rebindings_entry { struct rebinding *rebindings; size_t rebindings_nel; struct rebindings_entry *next; }; static struct rebindings_entry *_rebindings_head; // 给需要rebinding的方法结构体开辟出对应的空间 // 生成对应的链表结构(rebindings_entry) static int prepend_rebindings(struct rebindings_entry **rebindings_head, struct rebinding rebindings[], size_t nel) { // 开辟一个rebindings_entry大小的空间 struct rebindings_entry *new_entry = (struct rebindings_entry *) malloc(sizeof(struct rebindings_entry)); if (!new_entry) { return -1; } // 一共有nel个rebinding new_entry->rebindings = (struct rebinding *) malloc(sizeof(struct rebinding) * nel); if (!new_entry->rebindings) { free(new_entry); return -1; } // 将rebinding赋值给new_entry->rebindings memcpy(new_entry->rebindings, rebindings, sizeof(struct rebinding) * nel); // 继续赋值nel new_entry->rebindings_nel = nel; // 每次都将new_entry插入头部 new_entry->next = *rebindings_head; // rebindings_head重新指向头部 *rebindings_head = new_entry; return 0; } 复制代码
这里定义了rebindings_entry链表。每次进行绑定的时候,会传入struct rebinding rebindings[]数组,创建一个新的rebindings_entry结构,然后把这个结构插入链表头部。
static void _rebind_symbols_for_image(const struct mach_header *header, intptr_t slide) { // 找到对应的符号,进行重绑定 rebind_symbols_for_image(_rebindings_head, header, slide); } // 在知道确定的MachO,可以使用该方法 int rebind_symbols_image(void *header, intptr_t slide, struct rebinding rebindings[], size_t rebindings_nel) { struct rebindings_entry *rebindings_head = NULL; int retval = prepend_rebindings(&rebindings_head, rebindings, rebindings_nel); rebind_symbols_for_image(rebindings_head, (const struct mach_header *) header, slide); if (rebindings_head) { free(rebindings_head->rebindings); } free(rebindings_head); return retval; } int rebind_symbols(struct rebinding rebindings[], size_t rebindings_nel) { int retval = prepend_rebindings(&_rebindings_head, rebindings, rebindings_nel); if (retval < 0) { return retval; } // 如果这是第一次调用,请为image添加注册回调(这也会为现有image调用,否则,只在现有image上运行 if (!_rebindings_head->next) { // 向每个image注册_rebind_symbols_for_image函数,并且立即触发一次 _dyld_register_func_for_add_image(_rebind_symbols_for_image); } else { // _dyld_image_count() 获取image数量 uint32_t c = _dyld_image_count(); for (uint32_t i = 0; i < c; i++) { // _dyld_get_image_header(i) 获取第i个image的header指针 // _dyld_get_image_vmaddr_slide(i) 获取第i个image的基址 _rebind_symbols_for_image(_dyld_get_image_header(i), _dyld_get_image_vmaddr_slide(i)); } } return retval; } 复制代码
rebind_symbols_image和rebind_symbols是两个公开的方法,用于重新绑定符号。rebind_symbols_image用于指定镜像的符号绑定,rebind_symbols对所有镜像进行处理。
不管是哪个方法,最后都是调用rebind_symbols_for_image去获取相关部分的地址。
static void rebind_symbols_for_image(struct rebindings_entry *rebindings, const struct mach_header *header, intptr_t slide) { Dl_info info; // 判断当前macho是否在进程里,如果不在则直接返回 if (dladdr(header, &info) == 0) { return; } // 定义好几个变量,后面去遍历查找 segment_command_t *cur_seg_cmd; // MachO中Load Commons中的linkedit segment_command_t *linkedit_segment = NULL; // MachO中LC_SYMTAB struct symtab_command* symtab_cmd = NULL; // MachO中LC_DYSYMTAB struct dysymtab_command* dysymtab_cmd = NULL; // header的首地址+mach_header的内存大小 // 得到跳过mach_header的地址,也就是直接到Load Commons的地址 uintptr_t cur = (uintptr_t)header + sizeof(mach_header_t); // 遍历Load Commons 找到上面三个遍历 for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) { cur_seg_cmd = (segment_command_t *)cur; // 如果是LC_SEGMENT_64 if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) { // 找到linkedit if (strcmp(cur_seg_cmd->segname, SEG_LINKEDIT) == 0) { linkedit_segment = cur_seg_cmd; } } // 如果是LC_SYMTAB,就找到了symtab_cmd else if (cur_seg_cmd->cmd == LC_SYMTAB) { symtab_cmd = (struct symtab_command*)cur_seg_cmd; } // 如果是LC_DYSYMTAB,就找到了dysymtab_cmd else if (cur_seg_cmd->cmd == LC_DYSYMTAB) { dysymtab_cmd = (struct dysymtab_command*)cur_seg_cmd; } } // 下面其中任何一个值没有都直接return // 因为image不是需要找的image if (!symtab_cmd || !dysymtab_cmd || !linkedit_segment || !dysymtab_cmd->nindirectsyms) { return; } // Find base symbol/string table addresses // 找到linkedit的头地址 uintptr_t linkedit_base = (uintptr_t)slide + linkedit_segment->vmaddr - linkedit_segment->fileoff; // 获取symbol_table的真实地址 nlist_t *symtab = (nlist_t *)(linkedit_base + symtab_cmd->symoff); // 获取string_table的真实地址 char *strtab = (char *)(linkedit_base + symtab_cmd->stroff); // Get indirect symbol table (array of uint32_t indices into symbol table) // 获取indirect_symtab的真实地址 uint32_t *indirect_symtab = (uint32_t *)(linkedit_base + dysymtab_cmd->indirectsymoff); // 同样的,得到跳过mach_header的地址,得到Load Commons的地址 cur = (uintptr_t)header + sizeof(mach_header_t); // 遍历Load Commons,找到对应符号进行重新绑定 for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) { cur_seg_cmd = (segment_command_t *)cur; if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) { // 如果不是__DATA段,也不是__DATA_CONST段,直接跳过 if (strcmp(cur_seg_cmd->segname, SEG_DATA) != 0 && strcmp(cur_seg_cmd->segname, SEG_DATA_CONST) != 0) { continue; } // 遍历所有的section for (uint j = 0; j < cur_seg_cmd->nsects; j++) { section_t *sect = (section_t *)(cur + sizeof(segment_command_t)) + j; // 找懒加载表S_LAZY_SYMBOL_POINTERS if ((sect->flags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS) { // 重绑定的真正函数 perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab); } // 找非懒加载表S_NON_LAZY_SYMBOL_POINTERS if ((sect->flags & SECTION_TYPE) == S_NON_LAZY_SYMBOL_POINTERS) { // 重绑定的真正函数 perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab); } } } } } 复制代码
最上面,通过header
指针和header
大小获取到加载指令的基址。然后遍历获取3个数据结构:
// MachO中Load Commons中的linkedit segment_command_t *linkedit_segment = NULL; // MachO中LC_SYMTAB struct symtab_command* symtab_cmd = NULL; // MachO中LC_DYSYMTAB struct dysymtab_command* dysymtab_cmd = NULL; 复制代码
下面是比较核心的代码:
// 找到linkedit的头地址 uintptr_t linkedit_base = (uintptr_t)slide + linkedit_segment->vmaddr - linkedit_segment->fileoff; 复制代码
我们来看看linkedit_segment->vmaddr对应4294995968
,linkedit_segment->fileoff对应28672
。这样可能看不太出来这是基地址,我们格式化一下:
(lldb) p/x 4294995968 (long) $0 = 0x0000000100007000 (lldb) p/x 28672 (int) $1 = 0x00007000 (lldb) p/x 4294995968 - 28672 (long) $2 = 0x0000000100000000 复制代码
我们可以看出这个部分就是拿到了image对应的内存基址。
// 获取symbol_table的真实地址 nlist_t *symtab = (nlist_t *)(linkedit_base + symtab_cmd->symoff); // 获取string_table的真实地址 char *strtab = (char *)(linkedit_base + symtab_cmd->stroff); 复制代码
从struct symtab_command结构中获取到符号表的字符表的偏移量,然后加载基址就是内存中两个表的地址了。
(lldb) p/x 0x0000000100000000 + 30200 (long) $3 = 0x00000001000075f8 (lldb) p/x 0x0000000100000000 + 33408 (long) $4 = 0x0000000100008280 复制代码
通过MachOView我们也验证了这两个地址是正确的。
// 获取indirect_symtab的真实地址 uint32_t *indirect_symtab = (uint32_t *)(linkedit_base + dysymtab_cmd->indirectsymoff); 复制代码
通过struct dysymtab_command获取间接符号表。
(lldb) p/x 0x0000000100000000 + 33224 (long) $5 = 0x00000001000081c8 复制代码
间接符号表的地址我们也获得了。
// 同样的,得到跳过mach_header的地址,得到Load Commons的地址 cur = (uintptr_t)header + sizeof(mach_header_t); // 遍历Load Commons,找到对应符号进行重新绑定 for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) { cur_seg_cmd = (segment_command_t *)cur; if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) { // 如果不是__DATA段,也不是__DATA_CONST段,直接跳过 if (strcmp(cur_seg_cmd->segname, SEG_DATA) != 0 && strcmp(cur_seg_cmd->segname, SEG_DATA_CONST) != 0) { continue; } // 遍历所有的section for (uint j = 0; j < cur_seg_cmd->nsects; j++) { section_t *sect = (section_t *)(cur + sizeof(segment_command_t)) + j; // 找懒加载表S_LAZY_SYMBOL_POINTERS if ((sect->flags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS) { // 重绑定的真正函数 perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab); } // 找非懒加载表S_NON_LAZY_SYMBOL_POINTERS if ((sect->flags & SECTION_TYPE) == S_NON_LAZY_SYMBOL_POINTERS) { // 重绑定的真正函数 perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab); } } } } 复制代码
对于给定的image
,__DATA
段包含与动态符号绑定相关的两个部分:__nl_symbol_ptr
和__la_symbol_ptr
。遍历找到这个两个部分,然后进行符号重新绑定。
static void perform_rebinding_with_section(struct rebindings_entry *rebindings, section_t *section, intptr_t slide, nlist_t *symtab, char *strtab, uint32_t *indirect_symtab) { // reserved1对应的的是indirect_symbol中的offset,也就是indirect_symbol的真实地址 // indirect_symtab+offset就是indirect_symbol_indices(indirect_symbol的数组) uint32_t *indirect_symbol_indices = indirect_symtab + section->reserved1; // 函数地址,addr就是section的偏移地址 void **indirect_symbol_bindings = (void **)((uintptr_t)slide + section->addr); // 遍历section中的每个符号 for (uint i = 0; i < section->size / sizeof(void *); i++) { // 访问indirect_symbol,symtab_index就是indirect_symbol中data的值 uint32_t symtab_index = indirect_symbol_indices[i]; if (symtab_index == INDIRECT_SYMBOL_ABS || symtab_index == INDIRECT_SYMBOL_LOCAL || symtab_index == (INDIRECT_SYMBOL_LOCAL | INDIRECT_SYMBOL_ABS)) { continue; } // 访问symbol_table,根据symtab_index获取到symbol_table中的偏移offset uint32_t strtab_offset = symtab[symtab_index].n_un.n_strx; // 访问string_table,根据strtab_offset获取symbol_name char *symbol_name = strtab + strtab_offset; // string_table中的所有函数名都是以"."开始的,所以一个函数一定有两个字符 bool symbol_name_longer_than_1 = symbol_name[0] && symbol_name[1]; struct rebindings_entry *cur = rebindings; // 已经存入的rebindings_entry while (cur) { // 循环每个entry中需要重绑定的函数 for (uint j = 0; j < cur->rebindings_nel; j++) { // 判断symbol_name是否是一个正确的函数名 // 需要被重绑定的函数名是否与当前symbol_name相等 if (symbol_name_longer_than_1 && strcmp(&symbol_name[1], cur->rebindings[j].name) == 0) { // 判断replaced是否存在 // 判断replaced和老的函数是否是一样的 if (cur->rebindings[j].replaced != NULL && indirect_symbol_bindings[i] != cur->rebindings[j].replacement) { // 将原函数的地址给新函数replaced *(cur->rebindings[j].replaced) = indirect_symbol_bindings[i]; } // 将replacement赋值给刚刚找到的 indirect_symbol_bindings[i] = cur->rebindings[j].replacement; goto symbol_loop; } } // 继续下一个需要绑定的函数 cur = cur->next; } symbol_loop:; } } 复制代码
这个部分就像fishhook
原理里面提到的:
indirect_symbol_indices[nl_symbol_ptr->reserved1]
拿到间接符号表的函数起始地址。indirect_symbol_bindings
是nl_symbol_ptr
中对应的函数指针数组。.
开头的,所以至少有2个字符。 symbol_name[1] 是去掉开头.
的字符串。replaced
中的函数指针,再将原来函数的地址替换为我们要绑定的replacement
函数地址。如果觉得本文对你有所帮助,给我点个赞吧~