Python处理.nfo文件格式的完整教程_Python

掌握媒体元数据文件的操作技巧

一、什么是 .nfo 文件？

.nfo 文件是媒体文件的元数据容器，通常用于存储电影、电视剧、音乐等多媒体信息的结构化数据。它们本质上是 xml 格式的文本文件，包含如标题、演员、剧情简介等关键信息。

典型 .nfo 文件结构

<?xml version="1.0" encoding="utf-8"?>
<movie>
  <title>黑客帝国</title>
  <originaltitle>the matrix</originaltitle>
  <year>1999</year>
  <plot>一名年轻的网络黑客发现看似正常的现实世界实际上是由名为"矩阵"的计算机人工智能系统控制的...</plot>
  <director>莉莉·沃卓斯基</director>
  <rating>8.7</rating>
  <genre>科幻</genre>
  <genre>动作</genre>
  <actor>
    <name>基努·里维斯</name>
    <role>尼奥</role>
    <thumb>https://example.com/keanu.jpg</thumb>
  </actor>
</movie>

二、核心库介绍

1. 标准库解决方案

import xml.etree.elementtree as et

2. 第三方库推荐

pip install lxml beautifulsoup4 pynfo

三、完整处理流程

1. 读取 .nfo 文件

def read_nfo(file_path):
    """读取并解析 .nfo 文件"""
    try:
        tree = et.parse(file_path)
        root = tree.getroot()
        return root
    except et.parseerror as e:
        print(f"解析错误: {e}")
        return none
    except filenotfounderror:
        print(f"文件不存在: {file_path}")
        return none

# 使用示例
movie_nfo = read_nfo("the.matrix.nfo")

2. 提取基本信息

def extract_movie_info(root):
    """提取电影基本信息"""
    if root.tag != 'movie':
        return none
    
    info = {
        'title': root.findtext('title'),
        'year': root.findtext('year'),
        'plot': root.findtext('plot'),
        'director': root.findtext('director'),
        'rating': root.findtext('rating'),
        'genres': [genre.text for genre in root.findall('genre')],
        'actors': []
    }
    
    # 提取演员信息
    for actor in root.findall('actor'):
        info['actors'].append({
            'name': actor.findtext('name'),
            'role': actor.findtext('role'),
            'thumb': actor.findtext('thumb')
        })
    
    return info

# 使用示例
movie_info = extract_movie_info(movie_nfo)
print(f"电影标题: {movie_info['title']}")

3. 修改 .nfo 文件

def update_nfo_rating(file_path, new_rating):
    """更新电影评分"""
    tree = et.parse(file_path)
    root = tree.getroot()
    
    # 查找或创建 rating 元素
    rating_elem = root.find('rating')
    if rating_elem is none:
        rating_elem = et.subelement(root, 'rating')
    
    rating_elem.text = str(new_rating)
    
    # 保存修改
    tree.write(file_path, encoding='utf-8', xml_declaration=true)

# 使用示例
update_nfo_rating("the.matrix.nfo", 9.2)

4. 创建新的 .nfo 文件

def create_nfo_file(file_path, movie_data):
    """创建新的 .nfo 文件"""
    # 创建根元素
    movie = et.element('movie')
    
    # 添加子元素
    et.subelement(movie, 'title').text = movie_data['title']
    et.subelement(movie, 'year').text = str(movie_data['year'])
    et.subelement(movie, 'plot').text = movie_data['plot']
    
    # 添加类型
    for genre in movie_data['genres']:
        et.subelement(movie, 'genre').text = genre
    
    # 添加演员
    for actor in movie_data['actors']:
        actor_elem = et.subelement(movie, 'actor')
        et.subelement(actor_elem, 'name').text = actor['name']
        et.subelement(actor_elem, 'role').text = actor['role']
    
    # 创建 xml 树
    tree = et.elementtree(movie)
    
    # 写入文件
    tree.write(file_path, encoding='utf-8', xml_declaration=true)
    print(f"已创建 .nfo 文件: {file_path}")

# 使用示例
new_movie = {
    'title': '盗梦空间',
    'year': 2010,
    'plot': '一群能够潜入他人梦境窃取思想的盗贼...',
    'genres': ['科幻', '惊悚'],
    'actors': [
        {'name': '莱昂纳多·迪卡普里奥', 'role': '科布'},
        {'name': '约瑟夫·高登-莱维特', 'role': '亚瑟'}
    ]
}
create_nfo_file("inception.nfo", new_movie)

四、高级处理技巧

1. 使用 lxml 处理复杂文件

from lxml import etree

def parse_with_lxml(file_path):
    """使用 lxml 解析 .nfo 文件"""
    parser = etree.xmlparser(remove_blank_text=true)
    tree = etree.parse(file_path, parser)
    root = tree.getroot()
    
    # 使用 xpath 查询
    actors = root.xpath('//actor[name="莱昂纳多·迪卡普里奥"]')
    for actor in actors:
        print(f"角色: {actor.xpath('role/text()')[0]}")
    
    return tree

# 添加命名空间支持
def parse_with_namespace(file_path):
    ns = {'ns': 'http://www.example.com/nfo'}
    tree = etree.parse(file_path)
    title = tree.xpath('//ns:title', namespaces=ns)[0].text
    print(f"带命名空间的标题: {title}")

2. 处理非标准 .nfo 文件

def handle_non_standard_nfo(file_path):
    """处理非标准格式的 .nfo 文件"""
    from bs4 import beautifulsoup
    
    with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
        content = f.read()
    
    # 修复常见格式问题
    content = content.replace('&', '&amp;')  # 修复未转义的 & 符号
    
    # 使用 beautifulsoup 解析
    soup = beautifulsoup(content, 'lxml-xml')
    
    # 提取信息
    title = soup.find('title').text if soup.find('title') else none
    
    return {
        'title': title,
        'soup': soup  # 返回 beautifulsoup 对象供进一步处理
    }

3. 批量处理 .nfo 文件

import os
from pathlib import path

def batch_process_nfo(directory):
    """批量处理目录中的 .nfo 文件"""
    nfo_files = path(directory).glob('*.nfo')
    results = []
    
    for nfo_file in nfo_files:
        try:
            tree = et.parse(nfo_file)
            root = tree.getroot()
            info = extract_movie_info(root)
            results.append((nfo_file.name, info))
        except exception as e:
            print(f"处理 {nfo_file} 失败: {e}")
    
    return results

# 使用 pandas 导出结果
import pandas as pd

def export_to_csv(nfo_dir, output_file):
    """导出 .nfo 信息到 csv"""
    data = batch_process_nfo(nfo_dir)
    df = pd.dataframe({
        'file': [item[0] for item in data],
        'title': [item[1]['title'] for item in data],
        'year': [item[1]['year'] for item in data],
        'rating': [item[1]['rating'] for item in data]
    })
    df.to_csv(output_file, index=false)

五、实际应用案例

1. 媒体库元数据同步

def sync_with_media_library(nfo_dir, media_dir):
    """将 .nfo 信息同步到媒体文件"""
    for nfo_file in path(nfo_dir).glob('*.nfo'):
        # 解析 .nfo
        tree = et.parse(nfo_file)
        root = tree.getroot()
        title = root.findtext('title')
        
        # 查找对应的媒体文件
        media_file = find_media_file(media_dir, title)
        
        if media_file:
            # 使用 mutagen 更新媒体文件元数据
            from mutagen import file
            audio = file(media_file)
            audio['title'] = title
            audio['artist'] = root.findtext('director')
            audio.save()

2. 生成 html 报告

def generate_html_report(nfo_files, output_file):
    """从 .nfo 文件生成 html 报告"""
    html = """
    <html>
    <head>
        <title>媒体库报告</title>
        <style>
            table { border-collapse: collapse; width: 100%; }
            th, td { border: 1px solid #ddd; padding: 8px; }
            tr:nth-child(even) { background-color: #f2f2f2; }
        </style>
    </head>
    <body>
        <h1>媒体库报告</h1>
        <table>
            <tr>
                <th>标题</th>
                <th>年份</th>
                <th>导演</th>
                <th>评分</th>
            </tr>
    """
    
    for nfo_file in nfo_files:
        root = read_nfo(nfo_file)
        if root:
            html += f"""
            <tr>
                <td>{root.findtext('title')}</td>
                <td>{root.findtext('year')}</td>
                <td>{root.findtext('director')}</td>
                <td>{root.findtext('rating')}</td>
            </tr>
            """
    
    html += """
        </table>
    </body>
    </html>
    """
    
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write(html)

3. 自动下载缺失信息

import requests
from bs4 import beautifulsoup

def enrich_nfo_info(file_path):
    """补充缺失的电影信息"""
    tree = et.parse(file_path)
    root = tree.getroot()
    
    title = root.findtext('title')
    if not title:
        return
    
    # 从豆瓣api获取信息
    url = f"https://api.douban.com/v2/movie/search?q={title}"
    response = requests.get(url)
    data = response.json()
    
    if data['movies']:
        movie_data = data['movies'][0]
        
        # 更新缺失字段
        if not root.findtext('plot'):
            et.subelement(root, 'plot').text = movie_data['summary']
        
        if not root.findtext('rating'):
            et.subelement(root, 'rating').text = str(movie_data['rating']['average'])
        
        # 保存更新
        tree.write(file_path, encoding='utf-8', xml_declaration=true)

六、常见问题解决方案

1. 编码问题处理

def read_nfo_with_encoding(file_path):
    """自动检测编码读取 .nfo 文件"""
    encodings = ['utf-8', 'gbk', 'iso-8859-1']
    
    for enc in encodings:
        try:
            with open(file_path, 'r', encoding=enc) as f:
                content = f.read()
            return et.fromstring(content)
        except unicodedecodeerror:
            continue
    
    # 尝试二进制解析
    with open(file_path, 'rb') as f:
        return et.fromstring(f.read())

2. 处理特殊字符

def sanitize_nfo_content(content):
    """清理 .nfo 内容中的特殊字符"""
    replacements = {
        '&': '&amp;',
        '<': '&lt;',
        '>': '&gt;',
        '"': '&quot;',
        "'": '&apos;'
    }
    
    for char, entity in replacements.items():
        content = content.replace(char, entity)
    
    return content

3. 验证 .nfo 文件结构

def validate_nfo(file_path, schema_path='nfo_schema.xsd'):
    """使用 xml schema 验证 .nfo 文件"""
    from lxml import etree
    
    # 解析 xml
    xml_doc = etree.parse(file_path)
    
    # 加载 schema
    schema_doc = etree.parse(schema_path)
    schema = etree.xmlschema(schema_doc)
    
    # 验证
    if schema.validate(xml_doc):
        print("文件有效")
        return true
    else:
        print("文件无效:")
        for error in schema.error_log:
            print(f"行 {error.line}: {error.message}")
        return false

七、完整工具类实现

class nfoprocessor:
    """处理 .nfo 文件的工具类"""
    
    def __init__(self, file_path=none):
        self.file_path = file_path
        self.tree = none
        self.root = none
        
        if file_path:
            self.load(file_path)
    
    def load(self, file_path):
        """加载 .nfo 文件"""
        self.file_path = file_path
        try:
            self.tree = et.parse(file_path)
            self.root = self.tree.getroot()
        except et.parseerror:
            # 尝试使用 lxml 解析
            try:
                from lxml import etree
                parser = etree.xmlparser(recover=true)
                self.tree = etree.parse(file_path, parser)
                self.root = self.tree.getroot()
            except exception as e:
                raise valueerror(f"无法解析文件: {e}")
    
    def save(self, file_path=none):
        """保存 .nfo 文件"""
        save_path = file_path or self.file_path
        if not save_path:
            raise valueerror("未指定保存路径")
        
        if self.tree is not none:
            self.tree.write(save_path, encoding='utf-8', xml_declaration=true)
        else:
            raise valueerror("没有可保存的数据")
    
    def get_value(self, path):
        """获取指定路径的值"""
        elem = self.root.find(path)
        return elem.text if elem is not none else none
    
    def set_value(self, path, value):
        """设置指定路径的值"""
        parts = path.split('/')
        current = self.root
        
        # 创建或获取元素
        for part in parts:
            elem = current.find(part)
            if elem is none:
                elem = et.subelement(current, part)
            current = elem
        
        # 设置值
        current.text = str(value)
    
    def get_actors(self):
        """获取演员列表"""
        return [
            {
                'name': actor.findtext('name'),
                'role': actor.findtext('role'),
                'thumb': actor.findtext('thumb')
            }
            for actor in self.root.findall('actor')
        ]
    
    def add_actor(self, name, role, thumb=none):
        """添加演员"""
        actor = et.subelement(self.root, 'actor')
        et.subelement(actor, 'name').text = name
        et.subelement(actor, 'role').text = role
        if thumb:
            et.subelement(actor, 'thumb').text = thumb
    
    def to_dict(self):
        """转换为字典"""
        return {
            'title': self.get_value('title'),
            'year': self.get_value('year'),
            'plot': self.get_value('plot'),
            'director': self.get_value('director'),
            'rating': self.get_value('rating'),
            'genres': [g.text for g in self.root.findall('genre')],
            'actors': self.get_actors()
        }

# 使用示例
processor = nfoprocessor("the.matrix.nfo")
print(processor.get_value('title'))  # 输出: 黑客帝国
processor.set_value('rating', 9.0)
processor.add_actor('凯瑞-安·莫斯', '崔妮蒂')
processor.save()

八、总结与最佳实践

核心处理流程

读取：使用 xml.etree.elementtree 或 lxml 解析文件
提取：使用 find() 和 findall() 获取数据
修改：直接操作 xml 元素树
创建：使用 element 和 subelement 构建结构
保存：使用 write() 方法写入文件

最佳实践建议

编码处理：始终指定 utf-8 编码
错误处理：添加异常捕获处理格式错误
备份文件：修改前创建备份
使用 lxml：处理复杂文件时选择 lxml
验证结构：使用 xml schema 验证文件

Python处理.nfo文件格式的完整教程

2025年07月24日 • Python •我要评论

一、什么是 .nfo 文件？

典型 .nfo 文件结构

二、核心库介绍

1. 标准库解决方案

2. 第三方库推荐

三、完整处理流程

1. 读取 .nfo 文件

2. 提取基本信息

3. 修改 .nfo 文件

4. 创建新的 .nfo 文件

四、高级处理技巧

1. 使用 lxml 处理复杂文件

2. 处理非标准 .nfo 文件

3. 批量处理 .nfo 文件

五、实际应用案例

1. 媒体库元数据同步

2. 生成 html 报告

3. 自动下载缺失信息

六、常见问题解决方案

1. 编码问题处理

2. 处理特殊字符

3. 验证 .nfo 文件结构

七、完整工具类实现

八、总结与最佳实践

核心处理流程

最佳实践建议

推荐工具

相关文章:

使用Python实现在PDF中查找和高亮文字的方法

Python批量给excel文件加密的操作教程

发表评论


验证码：