基于Python实现字符串规范检查与修复程序_Python

在python开发中，代码风格的统一性对于项目的可维护性至关重要。虽然pep 8没有强制规定字符串使用单引号还是双引号，但许多团队会选择其中一种作为编码规范。本文介绍一个智能的python字符串引号规范自动修复程序，它能够自动检测并修复代码中的字符串引号使用不一致问题。

完整实现代码

#!/usr/bin/env python3
"""
python字符串引号规范检查与修复工具
自动检查单引号字符串并建议替换为双引号
"""

import ast
import tokenize
import argparse
import os
import sys
import json
from pathlib import path
from typing import list, dict, tuple, set, any
import fnmatch

class stringquotechecker:
    """字符串引号检查器"""
    
    def __init__(self):
        self.stats = {
            'files_processed': 0,
            'total_strings': 0,
            'single_quote_strings': 0,
            'replaced_strings': 0,
            'skipped_strings': 0,
            'error_files': 0,
            'issues_found': 0
        }
        self.issues = []
        
    def is_excluded_directory(self, filepath: str, exclude_dirs: list[str]) -> bool:
        """检查是否在排除目录中"""
        path = path(filepath)
        for exclude_dir in exclude_dirs:
            if fnmatch.fnmatch(str(path), exclude_dir) or exclude_dir in path.parts:
                return true
        return false
    
    def is_whitelisted(self, filepath: str, whitelist: list[str]) -> bool:
        """检查是否在白名单中"""
        if not whitelist:
            return false
        path = path(filepath)
        for pattern in whitelist:
            if fnmatch.fnmatch(str(path), pattern):
                return true
        return false
    
    def is_docstring(self, filepath: str, line_no: int) -> bool:
        """检查是否为模块、类或函数的docstring"""
        try:
            with open(filepath, 'r', encoding='utf-8') as f:
                content = f.read()
            
            tree = ast.parse(content)
            
            # 检查模块级docstring
            if (isinstance(tree.body[0], ast.expr) and 
                isinstance(tree.body[0].value, ast.str) and 
                tree.body[0].lineno == line_no):
                return true
            
            # 检查类和函数的docstring
            for node in ast.walk(tree):
                if (isinstance(node, (ast.functiondef, ast.classdef, ast.asyncfunctiondef)) and
                    node.body and 
                    isinstance(node.body[0], ast.expr) and 
                    isinstance(node.body[0].value, ast.str) and 
                    node.body[0].lineno == line_no):
                    return true
                    
        except exception:
            pass
            
        return false
    
    def analyze_file(self, filepath: str) -> list[dict[str, any]]:
        """分析单个文件中的字符串引号使用"""
        issues = []
        
        try:
            with open(filepath, 'r', encoding='utf-8') as f:
                content = f.read()
            
            # 使用tokenize进行分词分析
            f.seek(0)
            tokens = list(tokenize.generate_tokens(f.readline))
            
            for token in tokens:
                if token.type == tokenize.string:
                    self.stats['total_strings'] += 1
                    string_value = token.string
                    
                    # 跳过空字符串
                    if len(string_value) <= 2:
                        continue
                    
                    # 检查是否为单引号字符串（排除双引号包含单引号的情况）
                    if (string_value.startswith("'") and string_value.endswith("'") and
                        not ('"' in string_value and string_value.count('"') >= 2)):
                        
                        # 检查前缀
                        prefix = ''
                        if string_value[0] in 'rubf' or string_value.startswith(('fr', 'rf', 'br', 'rb')):
                            # 提取前缀
                            quote_start = string_value.find("'")
                            if quote_start > 0:
                                prefix = string_value[:quote_start]
                        
                        # 跳过docstring
                        if self.is_docstring(filepath, token.start[0]):
                            self.stats['skipped_strings'] += 1
                            continue
                        
                        self.stats['single_quote_strings'] += 1
                        
                        issues.append({
                            'file': filepath,
                            'line': token.start[0],
                            'column': token.start[1],
                            'original_string': string_value,
                            'suggested_string': prefix + '"' + string_value[len(prefix)+1:-1] + '"',
                            'prefix': prefix,
                            'content': string_value[len(prefix)+1:-1]
                        })
                        
        except exception as e:
            self.stats['error_files'] += 1
            print(f"错误分析文件 {filepath}: {e}")
            
        return issues
    
    def replace_string_in_file(self, filepath: str, replacements: list[dict[str, any]]) -> int:
        """在文件中替换字符串"""
        replaced_count = 0
        
        try:
            with open(filepath, 'r', encoding='utf-8') as f:
                lines = f.readlines()
            
            # 按行号降序排序，避免替换时影响行号
            replacements.sort(key=lambda x: x['line'], reverse=true)
            
            for replacement in replacements:
                line_no = replacement['line'] - 1  # 转换为0-based索引
                original = replacement['original_string']
                suggested = replacement['suggested_string']
                
                # 获取当前行内容
                line_content = lines[line_no]
                
                # 替换字符串
                new_line = line_content.replace(original, suggested, 1)
                
                if new_line != line_content:
                    lines[line_no] = new_line
                    replaced_count += 1
                    print(f"替换: {original} -> {suggested}")
                else:
                    print(f"警告: 无法替换 {original}")
            
            # 写回文件
            if replaced_count > 0:
                with open(filepath, 'w', encoding='utf-8') as f:
                    f.writelines(lines)
                    
        except exception as e:
            print(f"错误替换文件 {filepath}: {e}")
            
        return replaced_count
    
    def process_directory(self, root_dir: str, exclude_dirs: list[str], 
                         whitelist: list[str], auto_fix: bool = false) -> none:
        """处理目录中的所有python文件"""
        root_path = path(root_dir)
        
        for py_file in root_path.rglob("*.py"):
            if self.is_excluded_directory(str(py_file), exclude_dirs):
                continue
                
            if whitelist and not self.is_whitelisted(str(py_file), whitelist):
                continue
                
            self.stats['files_processed'] += 1
            print(f"\n分析文件: {py_file}")
            
            issues = self.analyze_file(str(py_file))
            
            if issues:
                self.issues.extend(issues)
                self.stats['issues_found'] += len(issues)
                
                print(f"发现 {len(issues)} 个单引号字符串:")
                
                replacements = []
                for issue in issues:
                    print(f"  行 {issue['line']}: {issue['original_string']}")
                    
                    if auto_fix:
                        replacements.append(issue)
                    else:
                        # 交互式确认
                        response = input(f"替换为 {issue['suggested_string']}? (y/n/a): ").lower()
                        if response == 'y':
                            replacements.append(issue)
                        elif response == 'a':
                            auto_fix = true
                            replacements.append(issue)
                
                # 执行替换
                if replacements:
                    replaced = self.replace_string_in_file(str(py_file), replacements)
                    self.stats['replaced_strings'] += replaced
                    print(f"成功替换 {replaced} 个字符串")
    
    def save_stats(self, output_file: str) -> none:
        """保存统计信息到json文件"""
        stats_data = {
            'summary': self.stats,
            'issues': self.issues
        }
        
        with open(output_file, 'w', encoding='utf-8') as f:
            json.dump(stats_data, f, indent=2, ensure_ascii=false)
        
        print(f"\n统计信息已保存到: {output_file}")

def main():
    parser = argparse.argumentparser(description='python字符串引号规范检查与修复工具')
    parser.add_argument('path', help='要检查的文件或目录路径')
    parser.add_argument('--exclude', nargs='*', default=[], help='排除的目录模式')
    parser.add_argument('--whitelist', nargs='*', default=[], help='白名单文件模式')
    parser.add_argument('--auto-fix', action='store_true', help='自动修复模式')
    parser.add_argument('--output', default='string_quote_stats.json', help='统计输出文件')
    
    args = parser.parse_args()
    
    checker = stringquotechecker()
    
    if os.path.isfile(args.path):
        # 处理单个文件
        if args.whitelist and not checker.is_whitelisted(args.path, args.whitelist):
            print("文件不在白名单中")
            return
        
        issues = checker.analyze_file(args.path)
        if issues:
            replacements = []
            for issue in issues:
                print(f"行 {issue['line']}: {issue['original_string']}")
                
                if args.auto_fix:
                    replacements.append(issue)
                else:
                    response = input(f"替换为 {issue['suggested_string']}? (y/n): ").lower()
                    if response == 'y':
                        replacements.append(issue)
            
            if replacements:
                replaced = checker.replace_string_in_file(args.path, replacements)
                checker.stats['replaced_strings'] += replaced
                print(f"成功替换 {replaced} 个字符串")
    
    elif os.path.isdir(args.path):
        # 处理目录
        checker.process_directory(args.path, args.exclude, args.whitelist, args.auto_fix)
    
    else:
        print(f"路径不存在: {args.path}")
        return
    
    # 输出统计信息
    print("\n" + "="*50)
    print("统计摘要:")
    for key, value in checker.stats.items():
        print(f"  {key}: {value}")
    
    # 保存详细统计
    checker.save_stats(args.output)

if __name__ == "__main__":
    main()

这个python编码规范自动修复程序展示了如何结合多种python标准库来构建一个实用的代码质量工具。通过智能的字符串识别、安全的替换机制和详细的统计报告，它能够在保证代码安全的前提下，有效地统一代码风格。该工具的设计思路和技术实现也可以为其他代码质量工具的开发提供参考。

程序的模块化设计使得它易于扩展，未来可以添加更多的代码规范检查功能，如行长度检查、导入顺序整理等，成为一个全面的python代码质量工具套件。

程序架构设计

1. 核心组件

stringquotechecker类是整个程序的核心，负责：

文件遍历和过滤
字符串语法分析
问题检测和修复
统计信息收集

2. 技术栈选择

ast库：用于解析python抽象语法树，准确识别docstring位置
tokenize库：进行词法分析，精确提取字符串token
argparse库：提供友好的命令行接口
pathlib库：跨平台路径处理

关键技术实现

1. 智能字符串识别

def analyze_file(self, filepath: str) -> list[dict[str, any]]:
    # 使用tokenize精确识别字符串
    tokens = list(tokenize.generate_tokens(f.readline))
    
    for token in tokens:
        if token.type == tokenize.string:
            # 处理带前缀的字符串：r、u、f、b等
            prefix = ''
            if string_value[0] in 'rubf' or string_value.startswith(('fr', 'rf', 'br', 'rb')):
                quote_start = string_value.find("'")
                if quote_start > 0:
                    prefix = string_value[:quote_start]

2. docstring智能排除

def is_docstring(self, filepath: str, line_no: int) -> bool:
    # 使用ast分析识别模块、类、函数的docstring
    tree = ast.parse(content)
    
    # 检查模块级docstring
    if (isinstance(tree.body[0], ast.expr) and 
        isinstance(tree.body[0].value, ast.str) and 
        tree.body[0].lineno == line_no):
        return true

3. 安全的文件替换机制

def replace_string_in_file(self, filepath: str, replacements: list[dict[str, any]]) -> int:
    # 按行号降序排序，避免替换时影响行号
    replacements.sort(key=lambda x: x['line'], reverse=true)
    
    for replacement in replacements:
        # 精确替换，避免误操作
        new_line = line_content.replace(original, suggested, 1)

功能特性

1. 智能过滤

自动忽略docstring
支持排除目录模式匹配
提供白名单机制
正确处理字符串前缀

2. 安全修复

交互式确认或自动修复模式
替换前备份检查
详细的替换日志

3. 全面统计

json格式的详细报告
处理进度跟踪
错误处理记录

测试用例详细说明

测试文件示例 (test_example.py)

'''模块docstring - 应该被忽略'''
# 单行单引号字符串 - 应该被替换
single_quote = 'hello world'

# 双引号字符串 - 应该保持不变
double_quote = "hello world"

# 包含单引号的双引号字符串 - 应该保持不变
contains_single = "it's a test"

# 包含双引号的单引号字符串 - 应该被替换
contains_double = 'he said "hello"'

# 带前缀的字符串
raw_string = r'raw string'
unicode_string = u'unicode string'
bytes_string = b'bytes string'
formatted_string = f'formatted {single_quote}'

class myclass:
    '''类docstring - 应该被忽略'''
    
    def __init__(self):
        '''方法docstring - 应该被忽略'''
        self.message = 'instance attribute'

def my_function():
    '''函数docstring - 应该被忽略'''
    local_var = 'local variable'
    return 'return value'

测试命令

# 交互式模式
python string_quote_checker.py test_example.py

# 自动修复模式
python string_quote_checker.py test_example.py --auto-fix

# 目录处理模式
python string_quote_checker.py ./src --exclude "*/migrations/*" --whitelist "*.py" --output stats.json

预期输出结果

修复后的test_example.py：

'''模块docstring - 应该被忽略'''
# 单行单引号字符串 - 应该被替换
single_quote = "hello world"

# 双引号字符串 - 应该保持不变
double_quote = "hello world"

# 包含单引号的双引号字符串 - 应该保持不变
contains_single = "it's a test"

# 包含双引号的单引号字符串 - 应该被替换
contains_double = "he said \"hello\""

# 带前缀的字符串
raw_string = r"raw string"
unicode_string = u"unicode string"
bytes_string = b"bytes string"
formatted_string = f"formatted {single_quote}"

class myclass:
    '''类docstring - 应该被忽略'''
    
    def __init__(self):
        '''方法docstring - 应该被忽略'''
        self.message = "instance attribute"

def my_function():
    '''函数docstring - 应该被忽略'''
    local_var = "local variable"
    return "return value"

统计输出示例 (stats.json)

{
  "summary": {
    "files_processed": 1,
    "total_strings": 15,
    "single_quote_strings": 8,
    "replaced_strings": 6,
    "skipped_strings": 3,
    "error_files": 0,
    "issues_found": 6
  },
  "issues": [
    {
      "file": "test_example.py",
      "line": 3,
      "column": 16,
      "original_string": "'hello world'",
      "suggested_string": "\"hello world\"",
      "prefix": "",
      "content": "hello world"
    }
  ]
}

技术挑战与解决方案

1. 字符串前缀处理

挑战：python支持多种字符串前缀(r, u, f, b等)，需要正确识别和保留。

解决方案：通过分析字符串开始部分，提取前缀并确保在替换时正确保留。

2. docstring准确识别

挑战：需要区分普通字符串和docstring。

解决方案：结合ast分析和行号定位，精确识别模块、类、函数级别的docstring。

3. 安全替换机制

挑战：避免在替换过程中破坏代码结构。

解决方案：使用精确的字符串替换，按行号降序处理，避免行号变化影响。

应用场景

代码规范统一：在大型项目中统一字符串引号风格

代码审查：在ci/cd流程中自动检查代码规范

遗留代码迁移：帮助迁移旧代码到新的编码标准

教学工具：帮助学生理解python编码规范

到此这篇关于基于python实现字符串规范检查与修复程序的文章就介绍到这了,更多相关python字符串规范检查与修复内容请搜索代码网以前的文章或继续浏览下面的相关文章希望大家以后多多支持代码网！

基于Python实现字符串规范检查与修复程序

2025年10月31日 • Python •我要评论