岚法律办公自动化程序「香港」
原创2025/8/28大约 4 分钟...约 1208 字
Step 1:将扫描的 PDF 文件转换成图片
用 PyMuPDF(fitz)把 PDF 按页导出为图片:
- 每个 PDF 导出到同名文件夹下(自动去掉
.pdf后缀) - 图片以页码命名(
1.png, 2.png, ...从 1 开始) - 支持批量处理目录、页码范围、输出格式(png/jpg)、清晰度(缩放倍数)
1. 安装依赖
pip install pymupdf2. 代码实现
# -*- coding: utf-8 -*-
# @Time : 2025/8/27 18:13
# @Author : AI悦创
# @FileName: pdf_to_img.py
# @Software: PyCharm
# @Blog :https://bornforthis.cn/
# code is far away from bugs with the god animal protecting
# I love animals. They taste delicious.
from pathlib import Path
from datetime import datetime
import traceback
# ====== 需要你根据实际情况修改的常量(路径直接写在代码中)======
INPUT_DIR = Path(r"./PDF-DATA") # 放 PDF 的文件夹
OUTPUT_ROOT = Path(r"./RESULT") # 总输出根目录:RESULT
RECURSIVE = False # 是否递归遍历子目录
DPI = 600 # “最高清”建议600;可调 300/600/1200
SAVE_PNG = True # 是否导出 PNG
SAVE_JPG = False # 是否导出 JPG
JPG_QUALITY = 95 # JPG 质量(1-100),95 接近无损但体积可控
LOG_FILE = OUTPUT_ROOT / "log.txt" # 失败日志
# =====================================================
# 依赖 PyMuPDF
try:
import fitz # PyMuPDF
except ImportError as e:
raise SystemExit("未安装 PyMuPDF,请先运行:pip install pymupdf") from e
def strip_pdf_suffixes(name):
"""多次去掉结尾的 .pdf(应对 xxx.pdf.pdf 这种情况)"""
base = name
while base.lower().endswith(".pdf"):
base = base[:-4]
return base or "untitled"
def log_error(msg):
LOG_FILE.parent.mkdir(parents=True, exist_ok=True)
ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
with open(LOG_FILE, "a", encoding="utf-8") as f:
f.write(f"[{ts}] {msg}\n")
def convert_pdf_to_images(pdf_path: Path):
"""
将单个PDF按页导出为 PNG+JPG。
出错会写日志并继续其它页 / 文件。
"""
try:
doc = fitz.open(pdf_path)
except Exception as e:
log_error(f"打开失败: {pdf_path} | {e}")
return
try:
base_name = strip_pdf_suffixes(pdf_path.name)
out_dir = OUTPUT_ROOT / base_name
out_dir.mkdir(parents=True, exist_ok=True)
zoom = DPI / 72.0
mat = fitz.Matrix(zoom, zoom)
total = len(doc)
for i in range(total):
page_no = i + 1 # 1-based
try:
page = doc.load_page(i)
pix = page.get_pixmap(matrix=mat, alpha=False) # alpha=False 更通用
if SAVE_PNG:
(out_dir / f"{page_no}.png").write_bytes(pix.tobytes("png"))
if SAVE_JPG:
# PyMuPDF 支持 jpg_quality 参数
pix.save(str(out_dir / f"{page_no}.jpg"), jpg_quality=JPG_QUALITY)
except Exception as e_page:
# 单页失败,记录后继续下一页
log_error(f"页面失败: {pdf_path} | 页 {page_no}/{total} | {e_page}\n{traceback.format_exc()}")
continue
finally:
try:
doc.close()
except Exception:
pass
def find_all_pdfs(root, recursive=False):
if root.is_file() and root.suffix.lower() == ".pdf":
return [root]
if root.is_dir():
return sorted(root.rglob("*.pdf") if recursive else root.glob("*.pdf"))
return []
def main():
if not INPUT_DIR.exists():
raise SystemExit(f"输入目录不存在:{INPUT_DIR}")
OUTPUT_ROOT.mkdir(parents=True, exist_ok=True)
# 本次运行写一个分隔头
log_error("=== 本次转换开始 ===")
pdfs = find_all_pdfs(INPUT_DIR, recursive=RECURSIVE)
if not pdfs:
log_error(f"未找到PDF:{INPUT_DIR}(递归={RECURSIVE})")
print("未找到任何PDF。")
return
print(f"共找到 {len(pdfs)} 个PDF,开始转换(DPI={DPI},PNG={SAVE_PNG},JPG={SAVE_JPG}, Q={JPG_QUALITY})...")
for idx, pdf in enumerate(pdfs, 1):
print(f"[{idx}/{len(pdfs)}] {pdf}")
try:
convert_pdf_to_images(pdf)
except Exception as e_file:
# 理论上 convert 已经自带容错,这里兜底
log_error(f"文件失败: {pdf} | {e_file}\n{traceback.format_exc()}")
continue
log_error("=== 本次转换结束 ===")
print("全部完成。失败详情见:", LOG_FILE)
if __name__ == '__main__':
main()Step 2:把图片转换成文本进行存储起来
1. 基础调用测试
from openai import OpenAI
key = "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"
client = OpenAI(api_key=key)
response = client.responses.create(
model="gpt-5",
input="写一个关于独角兽的一句话睡前故事。"
)
print(response.output_text)
# ---output---
夜色如绸,独角兽在月光下轻踏露草,将温柔的星尘悄悄撒进你的梦里,陪你一路安睡到第一缕晨光。2. Passing a Base64 encoded image
import base64
from openai import OpenAI
client = OpenAI()
# Function to encode the image
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode("utf-8")
# Path to your image
image_path = "path_to_your_image.jpg"
# Getting the Base64 string
base64_image = encode_image(image_path)
response = client.responses.create(
model="gpt-4.1", # gpt-4o
input=[
{
"role": "user",
"content": [
{ "type": "input_text", "text": "what's in this image?" },
{
"type": "input_image",
"image_url": f"data:image/jpeg;base64,{base64_image}",
},
],
}
],
)
print(response.output_text)3. Passing a file ID
from openai import OpenAI
client = OpenAI()
# Function to create a file with the Files API
def create_file(file_path):
with open(file_path, "rb") as file_content:
result = client.files.create(
file=file_content,
purpose="vision",
)
return result.id
# Getting the file ID
file_id = create_file("path_to_your_image.jpg")
response = client.responses.create(
model="gpt-4.1-mini", # gpt-4o
input=[{
"role": "user",
"content": [
{"type": "input_text", "text": "what's in this image?"},
{
"type": "input_image",
"file_id": file_id,
},
],
}],
)
print(response.output_text)4.
公众号:AI悦创【二维码】

AI悦创·编程一对一
AI悦创·推出辅导班啦,包括「Python 语言辅导班、C++ 辅导班、java 辅导班、算法/数据结构辅导班、少儿编程、pygame 游戏开发、Web、Linux」,招收学员面向国内外,国外占 80%。全部都是一对一教学:一对一辅导 + 一对一答疑 + 布置作业 + 项目实践等。当然,还有线下线上摄影课程、Photoshop、Premiere 一对一教学、QQ、微信在线,随时响应!微信:Jiabcdefh
C++ 信息奥赛题解,长期更新!长期招收一对一中小学信息奥赛集训,莆田、厦门地区有机会线下上门,其他地区线上。微信:Jiabcdefh
方法一:QQ
方法二:微信:Jiabcdefh

更新日志
2025/9/3 07:06
查看所有更新日志
4a001-于06f1e-于44fef-于d755c-于79091-于75481-于87164-于b2b61-于24617-于70353-于ae8a4-于9bfdc-于4d098-于1c35a-于cbb3a-于76989-于86c50-于027da-于
贡献者
AndersonHJBAI悦创