minerU / app.py
uatjonas's picture
Update app.py
1711781 verified
import os
import json
import sys
from huggingface_hub import snapshot_download
import subprocess
# 1. Setup Configuration Paths
HOME = os.environ.get("HOME", "/home/user")
CONFIG_FILE = os.path.join(HOME, "magic-pdf.json")
MODEL_DIR = os.path.join(HOME, "models")
print("--- Starting MinerU Setup ---")
# 2. Download Models (if not present)
if not os.path.exists(MODEL_DIR):
print(f"Downloading models to {MODEL_DIR}...")
try:
snapshot_download(
"opendatalab/PDF-Extract-Kit-1.0",
local_dir=MODEL_DIR,
max_workers=4
)
print("Model download complete.")
except Exception as e:
print(f"Error downloading models: {e}")
sys.exit(1)
else:
print("Models found. Skipping download.")
# 3. Generate magic-pdf.json Config
# FIX: Define REAL_MODEL_DIR before using it
REAL_MODEL_DIR = os.path.join(MODEL_DIR, "models")
config_data = {
"models-dir": REAL_MODEL_DIR, # Now this variable exists
"device-mode": "cpu",
"table-config": {
"model": "TableMaster",
"is_table_recog_enable": False,
"max_time": 400
}
}
print(f"Writing configuration to {CONFIG_FILE}...")
with open(CONFIG_FILE, "w") as f:
json.dump(config_data, f, indent=4)
# 4. Launch the MinerU REST API
print("Launching MinerU REST API...")
command = [
"mineru-api",
"--host", "0.0.0.0",
"--port", "7860"
]
subprocess.run(command)