我的原始sql
select s.department_name as departmentname,
cps.purchase_type as purchasetype
from settlement_records s
left join common_products_specification cps
on cps.org_id = s.purchase_org_id and cps.specification_system_sn = s.specification_system_sn and
cps.delete_flag = 0
where s.delete_flag = 0
and s.purchase_org_id = 1540
and s.purchase_org_type = 1;
两张表在关键字段上都有索引
create index idx_settlement_join on settlement_records (purchase_org_id, purchase_org_type, delete_flag, product_id, vendor_id); create index idx_settlement_org_del on settlement_records (purchase_org_id, purchase_org_type, delete_flag);
create index idx_cps_org_spec_del on common_products_specification (org_id, specification_system_sn, delete_flag);
explain结果分析
[
{
"id": 1,
"select_type": "simple",
"table": "s",
"partitions": null,
"type": "ref",
"possible_keys": "idx_settlement_org_del,idx_settlement_join",
"key": "idx_settlement_org_del",
"key_len": "13",
"ref": "const,const,const",
"rows": 31780,
"filtered": 100,
"extra": null
},
{
"id": 1,
"select_type": "simple",
"table": "cps",
"partitions": null,
"type": "ref",
"possible_keys": "idx_cps_org_spec_del",
"key": "idx_cps_org_spec_del",
"key_len": "8",
"ref": "const",
"rows": 6469,
"filtered": 100,
"extra": "using where"
}
]
可以看到走了 idx_cps_org_spec_del 索引,
但 key_len=8,这个很关键,说明索引只用到了org_id列,这一列的数据类型是bigint,长度刚好是8。
extra: using where 表示索引没覆盖 join 的所有条件,还需要额外过滤 specification_system_sn 和 delete_flag。
慢的原因:
mysql 拿着 31780 行 s 的结果,去 cps 里扫 6469 行,做 n × m 的匹配,代价就非常大了。
idx_cps_org_spec_del (org_id, specification_system_sn, delete_flag) 索引没用完整,explain 里只用到了 org_id,说明 specification_system_sn 和 delete_flag 没被成功利用。
为什么复合索引只匹配到了org_id
那这就很奇怪了,我的索引明明是复合索引,为什么只会用到前面的org_id?
最终通过如下语句发现原来是specification_system_sn字段的字符集不一致的原因
show full columns from settlement_records like 'specification_system_sn'; -- 结果:utf8mb4_0900_ai_ci show full columns from common_products_specification like 'specification_system_sn'; -- 结果:utf8mb3_general_ci
mysql 的 字符集和排序规则不一致 是导致索引只用到 org_id 的直接原因。
mysql 在 join 时也认为两边的列类型不完全匹配,因此 无法在索引上做完整匹配,只能先用 org_id 扫一遍,再在内存里过滤 specification_system_sn。
修改字符集
alter table common_products_specification modify specification_system_sn varchar(50) character set utf8mb4 collate utf8mb4_0900_ai_ci;
修改了之后就很快了,一秒不要就出结果。
为什么两个字符集不一致呢?
这其实是 mysql 的历史产物和版本差异在作怪。
mysql 字符集演进:
utf8mb3:原来的“utf8”,每个字符最多 3 个字节。mysql 5.5 以前是主流,很多老表默认就是 utf8mb3_general_ci。utf8mb4:从 mysql 5.5 开始推荐,用来完整支持 unicode(比如 emoji、少数民族字符),每个字符最多 4 个字节。utf8mb4_0900_ai_ci:mysql 8.0 默认的新 collation,基于 unicode 9.0,比 utf8mb4_general_ci 排序更标准,支持更多 unicode 特性。
所以原因就只有2个:
- 有人建表时指定了字符集和排序规则
- 进行过数据库迁移,原来用的5,后面迁移到8了,mysql迁移时会默认保留原字符集和排序规则
如何把整个库的所有表及字段的字符集都统一为utf8mb4_0900_ai_ci
因为我用的是mysql8,所以可以统一字符集规则为utf8mb4_0900_ai_ci,怎么做呢?
- 查询并修改现在数据库默认的字符集
-- 查询数据库默认字符集
select
schema_name as database_name,
default_character_set_name as character_set,
default_collation_name as collation
from information_schema.schemata
where schema_name = 'datebase_name';
-- 如不是则修改
alter database your_database_name
character set = utf8mb4
collate = utf8mb4_0900_ai_ci;
- 查询现有的不是这个字符集的表并生成修改语句
-- 生成修改字符集的语句
select concat(
'alter table `', table_name, '` convert to character set utf8mb4 collate utf8mb4_0900_ai_ci;'
) as alter_sql
from information_schema.tables
where table_schema = 'datebase_name'
and table_type = 'base table'
and (table_collation != 'utf8mb4_0900_ai_ci' or table_collation is null);
-- 改完之后可以查询一下现在表的字符集
-- 查询所有表字符集
select
table_name,
table_collation
from information_schema.tables
where table_schema = 'datebase_name'
and table_type = 'base table';
执行上面的语句
关于qrtz框架的特殊处理
因为这个框架的表有外键约束,无法直接改,需要先删除约束,改完了再创建约束,完整sql如下:
alter table `qrtz_triggers` drop foreign key `qrtz_triggers_ibfk_1`; alter table `qrtz_simple_triggers` drop foreign key `qrtz_simple_triggers_ibfk_1`; alter table `qrtz_cron_triggers` drop foreign key `qrtz_cron_triggers_ibfk_1`; alter table `qrtz_simprop_triggers` drop foreign key `qrtz_simprop_triggers_ibfk_1`; alter table `qrtz_blob_triggers` drop foreign key `qrtz_blob_triggers_ibfk_1`; alter table `qrtz_blob_triggers` convert to character set utf8mb4 collate utf8mb4_0900_ai_ci; alter table `qrtz_calendars` convert to character set utf8mb4 collate utf8mb4_0900_ai_ci; alter table `qrtz_cron_triggers` convert to character set utf8mb4 collate utf8mb4_0900_ai_ci; alter table `qrtz_fired_triggers` convert to character set utf8mb4 collate utf8mb4_0900_ai_ci; alter table `qrtz_job_details` convert to character set utf8mb4 collate utf8mb4_0900_ai_ci; alter table `qrtz_locks` convert to character set utf8mb4 collate utf8mb4_0900_ai_ci; alter table `qrtz_paused_trigger_grps` convert to character set utf8mb4 collate utf8mb4_0900_ai_ci; alter table `qrtz_scheduler_state` convert to character set utf8mb4 collate utf8mb4_0900_ai_ci; alter table `qrtz_simple_triggers` convert to character set utf8mb4 collate utf8mb4_0900_ai_ci; alter table `qrtz_simprop_triggers` convert to character set utf8mb4 collate utf8mb4_0900_ai_ci; alter table `qrtz_triggers` convert to character set utf8mb4 collate utf8mb4_0900_ai_ci; alter table `qrtz_triggers` add constraint `qrtz_triggers_ibfk_1` foreign key (`sched_name`) references `qrtz_job_details`(`sched_name`); alter table `qrtz_simple_triggers` add constraint `qrtz_simple_triggers_ibfk_1` foreign key (`sched_name`, `trigger_name`, `trigger_group`) references `qrtz_triggers`(`sched_name`, `trigger_name`, `trigger_group`); alter table `qrtz_cron_triggers` add constraint `qrtz_cron_triggers_ibfk_1` foreign key (`sched_name`, `trigger_name`, `trigger_group`) references `qrtz_triggers`(`sched_name`, `trigger_name`, `trigger_group`); alter table `qrtz_simprop_triggers` add constraint `qrtz_simprop_triggers_ibfk_1` foreign key (`sched_name`, `trigger_name`, `trigger_group`) references `qrtz_triggers`(`sched_name`, `trigger_name`, `trigger_group`); alter table `qrtz_blob_triggers` add constraint `qrtz_blob_triggers_ibfk_1` foreign key (`sched_name`, `trigger_name`, `trigger_group`) references `qrtz_triggers`(`sched_name`, `trigger_name`, `trigger_group`);
总结
改字符集有风险,如数据量大表会锁表,建议低峰期修改且先备份,数据无价,谨慎操作
以上为个人经验,希望能给大家一个参考,也希望大家多多支持代码网。
发表评论