1. 什么是 descriptor(描述符)
在 python 中,描述符是实现了描述符协议的对象。描述符协议由三个方法组成:
__get__(self, obj, objtype=none)→ 获取属性值__set__(self, obj, value)→ 设置属性值__delete__(self, obj)→ 删除属性
只要一个对象定义了以上任意一个方法,它就是一个描述符。描述符是 python 属性访问机制的底层基础,property、classmethod、staticmethod、slot 等都依赖描述符实现。
2. member descriptor 的本质
member descriptor(member_descriptor)是 cpython 内部的一种描述符类型,当类使用 __slots__ 时,python 为每个 slot 自动生成一个 member_descriptor 对象。它直接操作实例的内存布局,无需 __dict__,因此访问速度极快。
class point:
__slots__ = ('x', 'y')
# 查看类属性
print(type(point.x))
# <class 'member_descriptor'>
print(type(point.y))
# <class 'member_descriptor'>
member_descriptor 在 c 层面对应 pymemberdescrobject,定义在 objects/descrobject.c 中。它通过固定偏移量(offset)直接访问实例内存中的字段,绕过了字典查找。
3. member descriptor vs 其他描述符类型
python 内置了多种描述符类型,它们的区别如下:
| 类型 | 来源 | 实现 |
|---|---|---|
| member_descriptor | __slots__ | c 层面,按偏移量存取 |
| property | @property 装饰器 | python 层面,调用 getter/setter |
| getset_descriptor | c 扩展类型的 tp_getset | c 层面,调用 getter/setter 函数指针 |
| wrapper_descriptor | c 类型的方法(如 list.append) | c 层面 |
class withslots:
__slots__ = ('value',)
class withproperty:
@property
def value(self):
return self._value
import types
print(type(withslots.value)) # <class 'member_descriptor'>
print(type(withproperty.value)) # <class 'property'>
# getset_descriptor 的例子(内置类型)
print(type(type.__dict__['__dict__'])) # <class 'getset_descriptor'>
4. 描述符协议的调用机制
当我们访问 obj.attr 时,python 的属性查找遵循以下优先级:
- data descriptor(同时定义
__get__和__set__)优先于实例__dict__ - 实例
__dict__优先于 non-data descriptor(只定义__get__) - 如果以上都没找到,调用
__getattr__
member_descriptor 是一个 data descriptor,因为它同时实现了 __get__、__set__ 和 __delete__:
class demo:
__slots__ = ('name',)
d = demo()
# __set__
demo.name.__set__(d, "hello")
print(d.name) # hello
# __get__
print(demo.name.__get__(d, demo)) # hello
# __delete__
demo.name.__delete__(d)
# print(d.name) # attributeerror: name
5. 内存布局与性能优势
member_descriptor 直接通过内存偏移量访问数据,这带来了显著的性能优势:
import sys
class withdict:
def __init__(self, x, y):
self.x = x
self.y = y
class withslots:
__slots__ = ('x', 'y')
def __init__(self, x, y):
self.x = x
self.y = y
d = withdict(1, 2)
s = withslots(1, 2)
print(sys.getsizeof(d) + sys.getsizeof(d.__dict__)) # ~152 bytes (取决于版本)
print(sys.getsizeof(s)) # ~56 bytes
# 性能基准测试
import timeit
setup_dict = "from __main__ import withdict; obj = withdict(1, 2)"
setup_slots = "from __main__ import withslots; obj = withslots(1, 2)"
t_dict = timeit.timeit("obj.x", setup=setup_dict, number=10_000_000)
t_slots = timeit.timeit("obj.x", setup=setup_slots, number=10_000_000)
print(f"dict access: {t_dict:.3f}s")
print(f"slots access: {t_slots:.3f}s")
# slots 通常快 10-30%
cpython 在编译 class 时会为每个 slot 分配一个 py_ssize_t offset,member_descriptor 使用这个偏移量直接计算指针位置:
// cpython 内部伪代码
static pyobject *
member_get(pymemberdescrobject *descr, pyobject *obj) {
char *addr = (char *)obj + descr->d_member->offset;
return *(pyobject **)addr;
}
6. 自定义实现一个类似 member descriptor 的描述符
理解了底层机制后,我们可以用纯 python 模拟 member_descriptor 的行为:
class memberdescriptor:
"""模拟 cpython 的 member_descriptor"""
# 用于区分 "未设置" 和 "设置为 none"
_missing = object()
def __init__(self, name):
self.name = name
self.internal_name = f"_slot_{name}"
def __set_name__(self, owner, name):
"""python 3.6+ 自动调用,获取属性名"""
self.name = name
self.internal_name = f"_slot_{name}"
def __get__(self, obj, objtype=none):
if obj is none:
# 通过类访问时返回描述符本身
return self
value = obj.__dict__.get(self.internal_name, self._missing)
if value is self._missing:
raise attributeerror(
f"'{type(obj).__name__}' object has no attribute '{self.name}'"
)
return value
def __set__(self, obj, value):
obj.__dict__[self.internal_name] = value
def __delete__(self, obj):
if self.internal_name not in obj.__dict__:
raise attributeerror(
f"'{type(obj).__name__}' object has no attribute '{self.name}'"
)
del obj.__dict__[self.internal_name]
def __repr__(self):
return f"<member '{self.name}'>"
class vector:
x = memberdescriptor('x')
y = memberdescriptor('y')
def __init__(self, x, y):
self.x = x
self.y = y
v = vector(3, 4)
print(v.x) # 3
print(vector.x) # <member 'x'>
del v.x
try:
print(v.x)
except attributeerror as e:
print(e) # 'vector' object has no attribute 'x'
7. member descriptor 与继承
__slots__ 和 member_descriptor 在继承场景下有特殊行为:
class base:
__slots__ = ('x',)
class child(base):
__slots__ = ('y',)
c = child()
c.x = 1
c.y = 2
# 每个类只拥有自己声明的 slot 对应的 member_descriptor
print('x' in base.__dict__) # true
print('x' in child.__dict__) # false — 继承自 base
print('y' in child.__dict__) # true
# 重复声明 slot 会创建独立的 member_descriptor(浪费内存!)
class badchild(base):
__slots__ = ('x', 'z') # x 重复了
print(base.__dict__['x']) # <member 'x' of 'base' objects>
print(badchild.__dict__['x']) # <member 'x' of 'badchild' objects>
# 两个不同的 descriptor,base.x 被 badchild.x 遮蔽
8. member descriptor 的元信息
每个 member_descriptor 携带了描述性元信息:
class config:
__slots__ = ('host', 'port')
desc = config.__dict__['host']
print(desc.__objclass__) # <class 'config'> — 所属类
print(desc.__name__) # 'host' — 属性名
print(desc.__doc__) # none(可通过 __slots__ = {'host': 'the hostname'} 设置)
# 使用 dict 形式的 __slots__ 添加文档
class configdoc:
__slots__ = {
'host': 'the server hostname',
'port': 'the server port number',
}
print(configdoc.host.__doc__) # 'the server hostname'
print(configdoc.port.__doc__) # 'the server port number'
9. 与inspect模块的交互
import inspect
class entity:
__slots__ = ('id', 'name')
# 判断是否为 data descriptor
def is_data_descriptor(obj):
return hasattr(obj, '__get__') and (hasattr(obj, '__set__') or hasattr(obj, '__delete__'))
print(is_data_descriptor(entity.id)) # true
# inspect.getmembers_static 可以避免触发描述符的 __get__
for name, value in inspect.getmembers_static(entity):
if isinstance(value, type(entity.id)): # member_descriptor
print(f" slot: {name}")
# 输出:
# slot: id
# slot: name
10. 实际应用:结合__slots__与描述符的高性能数据类
from typing import any
class typedslot:
"""带类型检查的 slot 描述符"""
def __init__(self, expected_type: type, default: any = none):
self.expected_type = expected_type
self.default = default
self.name = none
def __set_name__(self, owner, name):
self.name = name
def __get__(self, obj, objtype=none):
if obj is none:
return self
return getattr(obj, f"_{self.name}", self.default)
def __set__(self, obj, value):
if not isinstance(value, self.expected_type):
raise typeerror(
f"'{self.name}' expects {self.expected_type.__name__}, "
f"got {type(value).__name__}"
)
object.__setattr__(obj, f"_{self.name}", value)
def __delete__(self, obj):
try:
object.__delattr__(obj, f"_{self.name}")
except attributeerror:
raise attributeerror(f"'{self.name}' is not set")
class connection:
__slots__ = ('_host', '_port', '_timeout')
host = typedslot(str, default="localhost")
port = typedslot(int, default=8080)
timeout = typedslot((int, float), default=30.0)
def __init__(self, host: str, port: int, timeout: float = 30.0):
self.host = host
self.port = port
self.timeout = timeout
conn = connection("192.168.1.1", 443, 60.0)
print(conn.host) # 192.168.1.1
print(conn.port) # 443
print(conn.timeout) # 60.0
try:
conn.port = "not_a_number"
except typeerror as e:
print(e) # 'port' expects int, got str
11. cpython 源码层面的实现
在 cpython 源码中(objects/descrobject.c),member_descriptor 的核心结构如下:
// include/cpython/descrobject.h
typedef struct {
pydescrobject d_common;
struct pymemberdef *d_member; // 包含 name, type, offset
} pymemberdescrobject;
// include/structmember.h
typedef struct pymemberdef {
const char *name;
int type; // t_object, t_int, t_string 等
py_ssize_t offset; // 在实例结构体中的偏移量
int flags; // readonly 等标志
const char *doc;
} pymemberdef;
关键执行路径:
// 简化的 __get__ 实现
static pyobject *
member_get(pymemberdescrobject *descr, pyobject *obj, pyobject *type)
{
if (obj == null || obj == py_none) {
py_incref(descr);
return (pyobject *)descr;
}
return pymember_getone((char *)obj, descr->d_member);
}
// pymember_getone 根据 offset 和 type 读取值
pyobject *
pymember_getone(const char *obj_char, pymemberdef *l)
{
pyobject *v;
switch (l->type) {
case t_object:
v = *(pyobject **)(obj_char + l->offset);
if (v == null)
// 尚未赋值 → attributeerror
...
break;
case t_int:
v = pylong_fromlong(*(int *)(obj_char + l->offset));
break;
// ... 其他类型
}
return v;
}
12. 总结
| 特性 | 说明 |
|---|---|
| 本质 | c 层描述符,通过偏移量直接访问实例内存 |
| 触发条件 | 类定义 __slots__ |
| 描述符类型 | data descriptor(实现 __get__ + __set__ + __delete__) |
| 优先级 | 高于实例 __dict__(但 slots 类通常无 __dict__) |
| 性能 | 比 __dict__ 快 10-30%,内存占用显著降低 |
| 元信息 | __name__、__objclass__、__doc__ |
| 文档化 | 使用 __slots__ = {'name': 'docstring'} 字典形式 |
member_descriptor 是 python 对象模型中最底层、最高效的属性访问机制之一。理解它不仅有助于写出更高效的代码,也是深入理解 python 描述符协议、属性查找链和 cpython 内部实现的重要一环。
到此这篇关于深入解析python member descriptor 的文章就介绍到这了,更多相关python member descriptor 内容请搜索代码网以前的文章或继续浏览下面的相关文章希望大家以后多多支持代码网!
发表评论