Spaces:
Sleeping
Sleeping
| import requests | |
| import os | |
| import time | |
| # API端点 | |
| api_url = "https://blueskyxn-ocrmypdf-hfs.hf.space/ocr/" | |
| pdf_path = r"F:\Download\20250401-113339.pdf" | |
| output_path = r"F:\Download\ocr_result_python.pdf" | |
| # 准备文件和参数 | |
| files = {"pdf_file": open(pdf_path, "rb")} | |
| data = { | |
| "language": "eng+chi_sim", | |
| "deskew": "true", | |
| "optimize": "1" | |
| } | |
| print(f"开始处理文件: {pdf_path}") | |
| print(f"文件大小: {os.path.getsize(pdf_path)/1024/1024:.2f} MB") | |
| start_time = time.time() | |
| try: | |
| # 发送请求 | |
| print("正在发送请求到OCR API...") | |
| response = requests.post(api_url, files=files, data=data) | |
| # 处理响应 | |
| if response.status_code == 200: | |
| # 保存处理后的PDF | |
| with open(output_path, "wb") as f: | |
| f.write(response.content) | |
| print(f"PDF处理成功!耗时: {time.time() - start_time:.2f}秒") | |
| print(f"结果已保存到: {output_path}") | |
| else: | |
| print(f"处理失败! 状态码: {response.status_code}") | |
| try: | |
| error_details = response.json() | |
| print(f"错误详情: {error_details}") | |
| except: | |
| print(f"响应内容: {response.text[:500]}...") | |
| finally: | |
| # 确保关闭文件 | |
| files["pdf_file"].close() |