当前位置: 代码网 > it编程>前端脚本>Python > 基于Python编写一个PDF转换工具箱

基于Python编写一个PDF转换工具箱

2024年12月30日 Python 我要评论
1.简介使用python自写的pdf工具箱,包括pdf转word,图片,合并,页面拆分,页面删除,页面提取、转换word,图片功能,支持文件拖入。2.功能介绍合并:添加顺序就是合并顺序,可多次添加。拆

1.简介

使用python自写的pdf工具箱,包括pdf转word,图片,合并,页面拆分,页面删除,页面提取、

转换word,图片功能,支持文件拖入。

2.功能介绍

合并:添加顺序就是合并顺序,可多次添加。

拆分:将输入页码的范围拆分成每个独立的pdf,单次可输入多个范围。

删除:将输入页码的范围删除,单次可输入多个范围,保存删除后的文件。

提取:将输入页码的范围提取成独立的pdf,单次可输入多个范围。

3.运行效果

4.相关源码

import os
import re
import sys
from pyqt5.qtwidgets import qapplication, qmainwindow, qpushbutton, qvboxlayout, qwidget, qfiledialog, qlistwidget, \
    qmessagebox, qlineedit, qhboxlayout
from pyqt5.qtcore import qthread, pyqtsignal
from pypdf2 import pdfreader, pdfwriter, pdfmerger
from pdf2docx import converter
import fitz  # 用于pdf转jpg的处理

class customlistwidget(qlistwidget):
    def __init__(self, parent=none):
        super().__init__(parent)
        self.setacceptdrops(true)
        self.parentwindow = parent

    def dragenterevent(self, event):
        if any(url.tostring().lower().endswith('.pdf') for url in event.mimedata().urls()):
            event.acceptproposedaction()

    def dragmoveevent(self, event):
        if any(url.tostring().lower().endswith('.pdf') for url in event.mimedata().urls()):
            event.acceptproposedaction()

    def dropevent(self, event):
        pdf_files = [url.tolocalfile() for url in event.mimedata().urls() if url.tostring().lower().endswith('.pdf')]
        for f in pdf_files:
            self.parentwindow.addpdffile(f)

class worker(qthread):
    finished = pyqtsignal(str)
    error = pyqtsignal(str)

    def __init__(self, pdf_files, range_str=none, save_path=none, operation=none):
        super().__init__()
        self.pdf_files = pdf_files
        self.range_str = range_str
        self.save_path = save_path
        self.operation = operation

    def run(self):
        try:
            if self.operation == 'merge':
                self.merge_pdfs()
            elif self.operation == 'split':
                self.split_pdfs()
            elif self.operation == 'delete':
                self.delete_pages()
            elif self.operation == 'extract':
                self.extract_pages()
            elif self.operation == 'jpg':
                self.pdf_to_jpg()
            elif self.operation == 'word':
                self.pdf_to_word()
        except exception as e:
            self.error.emit(str(e))

    def merge_pdfs(self):
        merger = pdfmerger()
        for pdf in self.pdf_files:
            merger.append(pdf)
        merger.write(self.save_path)
        merger.close()
        self.finished.emit('pdf文件已成功合并。')

    def split_pdfs(self):
        ranges = self.parse_ranges(self.range_str)
        reader = pdfreader(self.pdf_files[0])
        os.makedirs(self.save_path, exist_ok=true)  # 确保目标文件夹存在
        file_index = 1  # 用于创建唯一的文件名
        for range_index, (start_page, end_page) in enumerate(ranges):
            # 对于每个范围,拆分出来的每个页面为一个单独的pdf文件
            for page_num in range(start_page, end_page + 1):
                writer = pdfwriter()
                writer.add_page(reader.pages[page_num])
                # 使用文件索引来确保每个文件的名称都是唯一的
                split_save_path = os.path.join(self.save_path, f'split_page_{file_index}.pdf')
                with open(split_save_path, 'wb') as f:
                    writer.write(f)
                file_index += 1
        self.finished.emit('pdf文件已成功拆分并保存。')

    def delete_pages(self):
        ranges = self.parse_ranges(self.range_str)
        reader = pdfreader(self.pdf_files[0])
        writer = pdfwriter()
        pages_to_delete = {page for start, end in ranges for page in range(start, end + 1)}
        for i in range(len(reader.pages)):
            if i not in pages_to_delete:
                writer.add_page(reader.pages[i])
        with open(self.save_path, 'wb') as f:
            writer.write(f)
        self.finished.emit('指定页面已从pdf中删除。')

    def extract_pages(self):
        ranges = self.parse_ranges(self.range_str)
        reader = pdfreader(self.pdf_files[0])
        os.makedirs(self.save_path, exist_ok=true)  # 在循环外提前确保目录存在
        for i, (start_page, end_page) in enumerate(ranges):
            writer = pdfwriter()
            for page_num in range(start_page, end_page + 1):
                writer.add_page(reader.pages[page_num])
            extract_save_path = os.path.join(self.save_path, f'extract_{i + 1}.pdf')
            with open(extract_save_path, 'wb') as f:
                writer.write(f)
        self.finished.emit('指定页面已从pdf中提取。')

    def pdf_to_jpg(self):
        for file in self.pdf_files:
            pdf = fitz.open(file)
            img_folder = os.path.join(self.save_path, os.path.splitext(os.path.basename(file))[0])
            os.makedirs(img_folder, exist_ok=true)
            for pg in range(pdf.page_count):
                page = pdf[pg]
                trans = fitz.matrix(2, 2)  # 设置转换矩阵为放大2倍
                pm = page.get_pixmap(matrix=trans, alpha=false)
                pic_name = f'page_{pg + 1}.jpg'
                pic_path = os.path.join(img_folder, pic_name)
                pm.save(pic_path)
        self.finished.emit('pdf文件已成功转换为图片。')

    def pdf_to_word(self):
        for file in self.pdf_files:
            docx_name = os.path.splitext(file)[0] + '.docx'
            cv = converter(file)
            cv.convert(docx_name, start=0, end=none)
            cv.close()
        self.finished.emit('pdf文件已成功转换为word文档。')

    def parse_ranges(self, ranges_str):
        ranges = []
        for part in re.split(',|,', ranges_str):
            if '-' in part:
                start_page, end_page = map(int, part.split('-'))
                ranges.append((start_page - 1, end_page - 1))
            else:
                page = int(part)
                ranges.append((page - 1, page - 1))
        return ranges

class pdfmergerapp(qmainwindow):
    def __init__(self):
        super().__init__()
        self.initui()
        self.pdf_files = []

    def initui(self):
        self.setwindowtitle('pdf 工具箱')
        self.setgeometry(100, 100, 800, 600)

        mainlayout = qvboxlayout()

        self.addbutton = qpushbutton('添加 pdf', self)
        self.addbutton.clicked.connect(self.addpdf)
        mainlayout.addwidget(self.addbutton)

        self.listwidget = customlistwidget(self)
        mainlayout.addwidget(self.listwidget)

        deletelayout = qhboxlayout()
        self.removebutton = qpushbutton('删除选定', self)
        self.removebutton.clicked.connect(self.removeselected)
        deletelayout.addwidget(self.removebutton)

        self.removeallbutton = qpushbutton('删除全部', self)
        self.removeallbutton.clicked.connect(self.removeall)
        deletelayout.addwidget(self.removeallbutton)
        mainlayout.addlayout(deletelayout)

        convertlayout = qhboxlayout()
        self.convertjpgbutton = qpushbutton('转换为图片', self)
        self.convertjpgbutton.clicked.connect(self.converttojpg)
        convertlayout.addwidget(self.convertjpgbutton)

        self.convertwordbutton = qpushbutton('转换为word', self)
        self.convertwordbutton.clicked.connect(self.converttoword)
        convertlayout.addwidget(self.convertwordbutton)
        mainlayout.addlayout(convertlayout)

        self.mergebutton = qpushbutton('合并 pdfs', self)
        self.mergebutton.clicked.connect(self.mergepdfs)
        mainlayout.addwidget(self.mergebutton)

        splitlayout = qhboxlayout()
        self.splitinput = qlineedit(self)
        self.splitinput.setplaceholdertext('输入拆分页码范围可输入多个范围,如1,3-4,8-15')
        splitlayout.addwidget(self.splitinput)
        self.splitbutton = qpushbutton('拆分页面', self)
        self.splitbutton.clicked.connect(self.splitpdf)
        splitlayout.addwidget(self.splitbutton)
        mainlayout.addlayout(splitlayout)

        deletepagelayout = qhboxlayout()
        self.deleteinput = qlineedit(self)
        self.deleteinput.setplaceholdertext('输入删除页码范围可输入多个范围,如1,3-4,8-15')
        deletepagelayout.addwidget(self.deleteinput)
        self.deletebutton = qpushbutton('删除页面', self)
        self.deletebutton.clicked.connect(self.deletepages)
        deletepagelayout.addwidget(self.deletebutton)
        mainlayout.addlayout(deletepagelayout)

        extractlayout = qhboxlayout()
        self.extractinput = qlineedit(self)
        self.extractinput.setplaceholdertext('输入提取页码范围可输入多个范围,如1,3-4,8-15')
        extractlayout.addwidget(self.extractinput)
        self.extractbutton = qpushbutton('提取页面', self)
        self.extractbutton.clicked.connect(self.extractpages)
        extractlayout.addwidget(self.extractbutton)
        mainlayout.addlayout(extractlayout)



        container = qwidget()
        container.setlayout(mainlayout)
        self.setcentralwidget(container)

    def addpdf(self):
        files, _ = qfiledialog.getopenfilenames(self, '打开文件', '', 'pdf files (*.pdf)')
        for file_path in files:
            self.addpdffile(file_path)

    def addpdffile(self, file_path):
        if file_path and file_path not in self.pdf_files:
            self.pdf_files.append(file_path)
            self.listwidget.additem(file_path)

    def removeselected(self):
        for item in self.listwidget.selecteditems():
            self.pdf_files.remove(item.text())
            self.listwidget.takeitem(self.listwidget.row(item))

    def removeall(self):
        self.pdf_files.clear()
        self.listwidget.clear()

    def mergepdfs(self):
        save_path, _ = qfiledialog.getsavefilename(self, '保存文件', '', 'pdf files (*.pdf)')
        if save_path:
            self.thread = worker(self.pdf_files, save_path=save_path, operation='merge')
            self.thread.finished.connect(self.onfinished)
            self.thread.error.connect(self.onerror)
            self.thread.start()

    def splitpdf(self):
        if len(self.pdf_files) != 1:
            qmessagebox.warning(self, "错误", "请只选择一个pdf文件进行拆分。")
            return

        range_str = self.splitinput.text().strip()
        folder_path = self.getfoldername()
        if range_str and folder_path:
            self.thread = worker(self.pdf_files, range_str=range_str, save_path=folder_path, operation='split')
            self.thread.finished.connect(self.onfinished)
            self.thread.error.connect(self.onerror)
            self.thread.start()

    def deletepages(self):
        if len(self.pdf_files) != 1:
            qmessagebox.warning(self, "错误", "请只选择一个pdf文件进行删除操作。")
            return

        range_str = self.deleteinput.text().strip()
        save_path = qfiledialog.getsavefilename(self, '保存文件', '', 'pdf files (*.pdf)')[0]
        if save_path and range_str:
            self.thread = worker(self.pdf_files, range_str=range_str, save_path=save_path, operation='delete')
            self.thread.finished.connect(self.onfinished)
            self.thread.error.connect(self.onerror)
            self.thread.start()

    def extractpages(self):
        if len(self.pdf_files) != 1:
            qmessagebox.warning(self, "错误", "请只选择一个pdf文件进行提取操作。")
            return

        range_str = self.extractinput.text().strip()
        save_path = qfiledialog.getsavefilename(self, '保存文件', '', 'pdf files (*.pdf)')[0]
        if save_path and range_str:
            self.thread = worker(self.pdf_files, range_str=range_str, save_path=save_path, operation='extract')
            self.thread.finished.connect(self.onfinished)
            self.thread.error.connect(self.onerror)
            self.thread.start()

    def converttojpg(self):
        save_path = qfiledialog.getexistingdirectory(self, "选择保存图片的位置")
        if save_path:
            self.thread = worker(self.pdf_files, save_path=save_path, operation='jpg')
            self.thread.finished.connect(self.onfinished)
            self.thread.error.connect(self.onerror)
            self.thread.start()

    def converttoword(self):
        save_path = qfiledialog.getexistingdirectory(self, "选择保存word的位置")
        if save_path:
            self.thread = worker(self.pdf_files, save_path=save_path, operation='word')
            self.thread.finished.connect(self.onfinished)
            self.thread.error.connect(self.onerror)
            self.thread.start()

    def getfoldername(self):
        folder_path = qfiledialog.getexistingdirectory(self, "选择保存拆分文件的位置")
        return folder_path

    def onfinished(self, message):
        qmessagebox.information(self, "操作完成", message)
        self.clear_pdf_list()
        self.clear_text_inputs()

    def onerror(self, error_message):
        qmessagebox.warning(self, "操作失败", error_message)

    def clear_pdf_list(self):
        self.pdf_files.clear()
        self.listwidget.clear()

    def clear_text_inputs(self):
        # 清除所有的qlineedit控件内容
        self.splitinput.clear()
        self.deleteinput.clear()
        self.extractinput.clear()
def main():
    app = qapplication(sys.argv)
    ex = pdfmergerapp()
    ex.show()
    sys.exit(app.exec_())

if __name__ == '__main__':
    main()

到此这篇关于基于python编写一个pdf转换工具箱的文章就介绍到这了,更多相关python pdf转换内容请搜索代码网以前的文章或继续浏览下面的相关文章希望大家以后多多支持代码网!

(0)

相关文章:

版权声明:本文内容由互联网用户贡献,该文观点仅代表作者本人。本站仅提供信息存储服务,不拥有所有权,不承担相关法律责任。 如发现本站有涉嫌抄袭侵权/违法违规的内容, 请发送邮件至 2386932994@qq.com 举报,一经查实将立刻删除。

发表评论

验证码:
Copyright © 2017-2025  代码网 保留所有权利. 粤ICP备2024248653号
站长QQ:2386932994 | 联系邮箱:2386932994@qq.com