背景
crash 监控发现有大量的新增崩溃,堆栈如下
libsystem_platform.dylib __os_unfair_lock_corruption_abort() libsystem_platform.dylib __os_unfair_lock_lock_slow() foundation __nssetboolvalueandnotify()
分析堆栈
__os_unfair_lock_corruption_abort
log 翻译:lock 已损坏
_os_unfair_lock_corruption_abort(os_ulock_value_t current) { __libplatform_client_crash__(current, "os_unfair_lock is corrupt"); }
__os_unfair_lock_lock_slow
在这个方法里面 __ulock_wait 返回 eownerdead 调用 corruption abort 方法。
int ret = __ulock_wait(ul_unfair_lock | ulf_no_errno | options, l, current, 0); if (unlikely(ret < 0)) { switch (-ret) { case eintr: case efault: continue; case eownerdead: _os_unfair_lock_corruption_abort(current); break; default: __libplatform_internal_crash__(-ret, "ulock_wait failure"); } }
eownerdead 的定义
#define eownerdead 105 /* previous owner died */
到这里猜测是 lock 的 owner 已经野指针了,继续向下看。
__nssetboolvalueandnotify
google 下这个方法是在 kvo 里面修改属性的时候调用,伪代码:
int __nssetboolvalueandnotify(int arg0, int arg1, int arg2) { r31 = r31 - 0x90; var_30 = r24; stack[-56] = r23; var_20 = r22; stack[-40] = r21; var_10 = r20; stack[-24] = r19; saved_fp = r29; stack[-8] = r30; r20 = arg2; r21 = arg1; r19 = arg0; r0 = object_getclass(arg0); r0 = object_getindexedivars(r0); // 理清这个崩溃的关键方法,这里和汇编代码不一致,汇编代码的入参是 r0 + 0x20 r23 = r0; os_unfair_recursive_lock_lock_with_options(); cfdictionarygetvalue(*(r23 + 0x18), r21); r22 = _objc_msgsend$copywithzone:(); os_unfair_recursive_lock_unlock(); if (*(int8_t *)(r23 + 0x28) != 0x0) { _objc_msgsend$willchangevalueforkey:(); (class_getmethodimplementation(*r23, r21))(r19, r21, r20); _objc_msgsend$didchangevalueforkey:(); } else { _objc_msgsend$_changevalueforkey:key:key:usingblock:(); } var_38 = **qword_9590e8; r0 = objc_release_x22(); if (**qword_9590e8 != var_38) { r0 = __stack_chk_fail(); } return r0; }
os_unfair_recursive_lock_lock_with_options
崩溃调用栈中间还有这一层的内联调用 os_unfair_recursive_lock_lock_with_options。这里的 lock owner 有个比较赋值的操作,如果 oul_value 等于 os_lock_no_owner 则赋值 self 然后 return。崩溃时这里继续向下执行了,那这里的 oul_value 的取值只能是 lock->oul_value。到这里猜测崩溃的原因是 lock->oul_value 野指针了。
void os_unfair_recursive_lock_lock_with_options(os_unfair_recursive_lock_t lock, os_unfair_lock_options_t options) { os_lock_owner_t cur, self = _os_lock_owner_get_self(); _os_unfair_lock_t l = (_os_unfair_lock_t)&lock->ourl_lock; if (likely(os_atomic_cmpxchgv2o(l, oul_value, os_lock_no_owner, self, &cur, acquire))) { return; } if (os_ulock_owner(cur) == self) { lock->ourl_count++; return; } return _os_unfair_lock_lock_slow(l, self, options); } os_always_inline os_const static inline os_lock_owner_t _os_lock_owner_get_self(void) { os_lock_owner_t self; self = (os_lock_owner_t)_os_tsd_get_direct(__tsd_mach_thread_self); return self; }
object_getindexedivars
__nssetboolvalueandnotify 里面的获取 lock 的方法,这个函数非常关键。
/** * returns a pointer to any extra bytes allocated with an instance given object. * * @param obj an objective-c object. * * @return a pointer to any extra bytes allocated with \e obj. if \e obj was * not allocated with any extra bytes, then dereferencing the returned pointer is undefined. * * @note this function returns a pointer to any extra bytes allocated with the instance * (as specified by \c class_createinstance with extrabytes>0). this memory follows the * object's ordinary ivars, but may not be adjacent to the last ivar. * @note the returned pointer is guaranteed to be pointer-size aligned, even if the area following * the object's last ivar is less aligned than that. alignment greater than pointer-size is never * guaranteed, even if the area following the object's last ivar is more aligned than that. * @note in a garbage-collected environment, the memory is scanned conservatively. /** * returns a pointer immediately after the instance variables declared in an * object. this is a pointer to the storage specified with the extrabytes * parameter given when allocating an object. */ void *object_getindexedivars(id obj) { uint8_t *base = (uint8_t *)obj; if (_objc_istaggedpointerornil(obj)) return nil; if (!obj->isclass()) return base + obj->isa()->alignedinstancesize(); class cls = (class)obj; if (!cls->isanyswift()) return base + sizeof(objc_class); swift_class_t *swcls = (swift_class_t *)cls; return base - swcls->classaddressoffset + word_align(swcls->classsize); }
上层调用 __nssetboolvalueandnotify 里面:
r0 = object_getclass(arg0),arg0 是实例对象,r0 是类对象,因为这里是个 kvo 的调用,那正常情况下r0 是 nskvonotifying_xxx。
对于 kvo 类,object_getindexedivars 返回的地址是 (uint8_t *)obj + sizeof(objc_class)。根据函数的注释,这个地址指向创建类时附在类空间后 extrabytes 大小的一块内存。
debug 调试
object_getindexedivars
__nssetboolvalueandnotify 下的调用
object_getindexedivars 入参是 nskvonotifying_kvobject,object_getclass 获取的是 kvo class。
objc_allocateclasspair
动态创建 kvo 类的方法。
thread #8, queue = 'com.apple.root.default-qos', stop reason = breakpoint 1.1 * frame #0: 0x000000018143a088 libobjc.a.dylib`objc_allocateclasspair frame #1: 0x000000018259cd94 foundation`_nskvonotifyingcreateinfowithoriginalclass + 152 frame #2: 0x00000001825b8fd0 foundation`_nskeyvaluecontainerclassgetnotifyinginfo + 56 frame #3: 0x000000018254b7dc foundation`-[nskeyvalueunnestedproperty _isaforautonotifying] + 44 frame #4: 0x000000018254b504 foundation`-[nskeyvalueunnestedproperty isaforautonotifying] + 88 frame #5: 0x000000018254b32c foundation`-[nsobject(nskeyvalueobserverregistration) _addobserver:forproperty:options:context:] + 404 frame #6: 0x000000018254b054 foundation`-[nsobject(nskeyvalueobserverregistration) addobserver:forkeypath:options:context:] + 136 frame #7: 0x00000001040d1860 test`__29-[viewcontroller viewdidload]_block_invoke(.block_descriptor=0x0000000282a55170) at viewcontroller.m:28:13 frame #8: 0x00000001043d05a8 libdispatch.dylib`_dispatch_call_block_and_release + 32 frame #9: 0x00000001043d205c libdispatch.dylib`_dispatch_client_callout + 20 frame #10: 0x00000001043d4b94 libdispatch.dylib`_dispatch_queue_override_invoke + 1052 frame #11: 0x00000001043e6478 libdispatch.dylib`_dispatch_root_queue_drain + 408 frame #12: 0x00000001043e6e74 libdispatch.dylib`_dispatch_worker_thread2 + 196 frame #13: 0x00000001d515fdbc libsystem_pthread.dylib`_pthread_wqthread + 228
_nskvonotifyingcreateinfowithoriginalclass
objc_allocateclasspair 的上层调用。 allocate 之前的 context w2 是个固定值 0x30,即创建 kvo class 入参 extrabytes 的大小是 0x30
0x18259cd78 <+124>: mov x1, x21 0x18259cd7c <+128>: mov x2, x22 0x18259cd80 <+132>: bl 0x188097080 0x18259cd84 <+136>: mov x0, x20 0x18259cd88 <+140>: mov x1, x19 0x18259cd8c <+144>: mov w2, #0x30 0x18259cd90 <+148>: bl 0x1880961f0 // objc_allocateclasspair 0x18259cd94 <+152>: cbz x0, 0x18259ce24 ; <+296> 0x18259cd98 <+156>: mov x21, x0 0x18259cd9c <+160>: bl 0x188096410 // objc_registerclasspair 0x18259cda0 <+164>: mov x0, x19 0x18259cda4 <+168>: bl 0x182b45f44 ; symbol stub for: free 0x18259cda8 <+172>: mov x0, x21 0x18259cdac <+176>: bl 0x1880967e0 // object_getindexedivars 0x18259cdb0 <+180>: mov x19, x0 0x18259cdb4 <+184>: stp x20, x21, [x0]
_nskvonotifyingcreateinfowithoriginalclass+184 处将 x20 和 x21 写入 [x0],此时 x0 指向的是大小为 extrabytes 的内存,打印 x20 和 x21 的值
x20 = 0x00000001117caa10 (void *)0x00000001117caa38: kvobject(向上回溯这个值取自 _nskvonotifyingcreateinfowithoriginalclass 的入参 x0)
x21 nskvonotifying_kvobject
根据这里可以看出 object_getindexedivars 返回的地址,依次存储了 kvobject(origin class) 和 nskvonotifying_kvobject(kvo class)。
查看 _nskvonotifyingcreateinfowithoriginalclass 的伪代码,对 [x0] 有 5 次写入的操作,并且最终这个方法返回的是 x0 的地址。
function __nskvonotifyingcreateinfowithoriginalclass { r31 = r31 - 0x50; stack[32] = r22; stack[40] = r21; stack[48] = r20; stack[56] = r19; stack[64] = r29; stack[72] = r30; r20 = r0; if (*(int8_t *)0x993e78 != 0x0) { os_unfair_lock_assert_owner(0x993e7c); } r0 = class_getname(r20); r22 = strlen(r0) + 0x10; r0 = malloc(r22); r19 = r0; strlcpy(r0, "nskvonotifying_", r22); strlcat(r19, r21, r22); r0 = objc_allocateclasspair(r20, r19, 0x30); if (r0 != 0x0) { objc_registerclasspair(r0); free(r19); r0 = object_getindexedivars(r21); r19 = r0; *(int128_t *)r0 = r20; // 第一次写入 class *(int128_t *)(r0 + 0x8) = r21; // 第二次写入 class *(r19 + 0x10) = cfsetcreatemutable(0x0, 0x0, *qword_9592d8); // 第三次写入 cfset *(int128_t *)(r19 + 0x18) = cfdictionarycreatemutable(0x0, 0x0, 0x0, *qword_959598); // 第四次写入 cfdictionary *(int128_t *)(r19 + 0x20) = 0x0; // 第五次写入空值 if (*qword_9fc560 != -0x1) { dispatch_once(0x9fc560, 0x8eaf98); } if (class_getmethodimplementation(*r19, @selector(willchangevalueforkey:)) != *qword_9fc568) { r8 = 0x1; } else { r0 = *r19; r0 = class_getmethodimplementation(r0, @selector(didchangevalueforkey:)); r8 = *qword_9fc570; if (r0 != r8) { r8 = *qword_9fc570; if (cpu_flags & ne) { r8 = 0x1; } } } *(int8_t *)(r19 + 0x28) = r8; _nskvonotifyingsetmethodimplementation(r19, @selector(_iskvoa), 0x44fab4, 0x0); _nskvonotifyingsetmethodimplementation(r19, @selector(dealloc), 0x44fabc, 0x0); _nskvonotifyingsetmethodimplementation(r19, @selector(class), 0x44fd2c, 0x0); } else { if (*qword_9fc558 != -0x1) { dispatch_once(0x9fc558, 0x8eaf78); } if (os_log_type_enabled(*0x9fc550, 0x10) != 0x0) { _os_log_error_impl(0x0, *0x9fc550, 0x10, "kvo failed to allocate class pair for name %s, automatic key-value observing will not work for this class", &stack[0], 0xc); } free(r19); r19 = 0x0; } if (**qword_9590e8 == **qword_9590e8) { r0 = r19; } else { r0 = __stack_chk_fail(); } return r0; }
_nskvonotifyingcreateinfowithoriginalclass 的上层调用,入参是 [x19, #0x8],返回的参数写入 [x19, #0x28]
0x1825b8fc0 <+40>: ldr x0, [x19, #0x28] 0x1825b8fc4 <+44>: b 0x1825b8fd4 ; <+60> 0x1825b8fc8 <+48>: ldr x0, [x19, #0x8] -> 0x1825b8fcc <+52>: bl 0x18259ccfc ; _nskvonotifyingcreateinfowithoriginalclass 0x1825b8fd0 <+56>: str x0, [x19, #0x28] 0x1825b8fd4 <+60>: ldp x29, x30, [sp, #0x10] 0x1825b8fd8 <+64>: ldp x20, x19, [sp], #0x20
打印 x19 是一个 nskeyvaluecontainerclass 类型的实例对象,这个对象类的 ivars layout
ivars 0x99f3c0 __objc_$_instance_variables_nskeyvaluecontainerclass entsize 32 count 5 offset 0x9e6048 _objc_ivar_$_nskeyvaluecontainerclass._originalclass 8 name 0x90bd27 _originalclass type 0x929ae6 # alignment 3 size 8 offset 0x9e6050 _objc_ivar_$_nskeyvaluecontainerclass._cachedobservationinfoimplementation 16 name 0x90bd36 _cachedobservationinfoimplementation type 0x92bb88 ^? alignment 3 size 8 offset 0x9e6058 _objc_ivar_$_nskeyvaluecontainerclass._cachedsetobservationinfoimplementation 24 name 0x90bd5b _cachedsetobservationinfoimplementation type 0x92bb88 ^? alignment 3 size 8 offset 0x9e6060 _objc_ivar_$_nskeyvaluecontainerclass._cachedsetobservationinfotakesanobject 32 name 0x90bd83 _cachedsetobservationinfotakesanobject type 0x92a01a b alignment 0 size 1 offset 0x9e6068 _objc_ivar_$_nskeyvaluecontainerclass._notifyinginfo 40 name 0x90bdaa _notifyinginfo type 0x92bdd7 ^{?=##^{__cfset}^{__cfdictionary}{os_unfair_recursive_lock_s={os_unfair_lock_s=i}i}b} alignment 3 size 8
offset 0x8 name:_originalclass type:class
offset 0x28 name:_notifyinginfo type:struct
_notifyinginfo 结构体
{ class, class, __cfset, __cfdictionary, os_unfair_recursive_lock_s }
type encoding:
developer.apple.com/library/arc…
从 context 可以看出_nskvonotifyingcreateinfowithoriginalclass 这个方法入参是 objc_ivar_nskeyvaluecontainerclass._originalclass。
返回值 x0 是 _objc_ivar__nskeyvaluecontainerclass._notifyinginfo。5 次对 [x0] 的写入是在初始化 _notifyinginfo。
崩溃时的 context:
0x1825231f0 <+56>: bl 0x1880967c0 // object_getclass 0x1825231f4 <+60>: bl 0x1880967e0 // object_getindexedivars 0x1825231f8 <+64>: mov x23, x0 // x0 == _notifyinginfo 0x1825231fc <+68>: add x24, x0, #0x20 // x24 == os_unfair_recursive_lock_s 0x182523200 <+72>: mov x0, x24 0x182523204 <+76>: mov w1, #0x0 0x182523208 <+80>: bl 0x188096910 // os_unfair_recursive_lock_lock_with_options crash 调用栈
调用 object_getclass 获取 class,调用 object_getindexedivars 获取到 _notifyinginfo,_notifyinginfo + 偏移量 0x20 获取 os_unfair_recursive_lock_s,崩溃的原因是这把锁的 owner 损坏了,lock 也是一个结构体,ower 也是根据 offset 获取的。
结论
从崩溃的上下文来看,最可能出问题的是获取 _notifyinginfo,因为只有 kvo class 才能获取到 _notifyinginfo 这个结构体,如果在调用 __nssetboolvalueandnotify 的过程中,在其它线程监听被移除,此时 object_getclass 取到的不是 kvo class 那后续再根据 offset 去取 lock,这个时候就有可能发生上述崩溃。
线下暴力复现验证了上述猜测。
- (void)start { __block kvobject *obj = [kvobject new]; dispatch_async(dispatch_get_global_queue(0, 0x0), ^{ for (int i = 0; i < 100000; i++) { [obj addobserver:self forkeypath:@"value" options:0x7 context:nil]; [obj removeobserver:self forkeypath:@"value"]; } }); dispatch_async(dispatch_get_global_queue(0, 0x0), ^{ for (int i = 0; i < 100000; i++) { obj.value = yes; obj.value = no; } }); } - (void)observevalueforkeypath:(nsstring *)keypath ofobject:(id)object change:(nsdictionary<nskeyvaluechangekey,id> *)change context:(void *)context {}
解决这个问题的思路就是保证线程安全,我们在线上断点找到了 removeobserver 的代码,将 removeobserver 和触发监听的代码放在了同一个串行队列。当然如果 removeobserver 在 dealloc 里面,理论上也不会出现这类问题。
__nssetxxxvalueandnotify 系列方法都有可能会触发这个崩溃,类似的问题可以按照相同的思路解决。
00000000004e05cd t __nssetboolvalueandnotify 00000000004e0707 t __nssetcharvalueandnotify 00000000004e097b t __nssetdoublevalueandnotify 00000000004e0abc t __nssetfloatvalueandnotify 00000000004e0bfd t __nssetintvalueandnotify 00000000004e10e7 t __nssetlonglongvalueandnotify 00000000004e0e6f t __nssetlongvalueandnotify 00000000004e0491 t __nssetobjectvalueandnotify 00000000004e15d5 t __nssetpointvalueandnotify 00000000004e1734 t __nssetrangevalueandnotify 00000000004e188a t __nssetrectvalueandnotify 00000000004e135f t __nssetshortvalueandnotify 00000000004e19e8 t __nssetsizevalueandnotify 00000000004e0841 t __nssetunsignedcharvalueandnotify 00000000004e0d36 t __nssetunsignedintvalueandnotify 00000000004e1223 t __nssetunsignedlonglongvalueandnotify 00000000004e0fab t __nssetunsignedlongvalueandnotify 00000000004e149a t __nssetunsignedshortvalueandnotify 00000000004de834 t __nssetvalueandnotifyforkeyinivar
以上就是99% ios开发都不知道的kvo崩溃分析详解的详细内容,更多关于ios开发kvo崩溃的资料请关注代码网其它相关文章!
发表评论