File size: 1,435 Bytes
307fda1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1711781
 
 
307fda1
1711781
 
307fda1
 
1711781
307fda1
 
 
 
 
 
 
 
 
 
 
 
 
 
1711781
307fda1
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import os
import json
import sys
from huggingface_hub import snapshot_download
import subprocess

# 1. Setup Configuration Paths
HOME = os.environ.get("HOME", "/home/user")
CONFIG_FILE = os.path.join(HOME, "magic-pdf.json")
MODEL_DIR = os.path.join(HOME, "models")

print("--- Starting MinerU Setup ---")

# 2. Download Models (if not present)
if not os.path.exists(MODEL_DIR):
    print(f"Downloading models to {MODEL_DIR}...")
    try:
        snapshot_download(
            "opendatalab/PDF-Extract-Kit-1.0",
            local_dir=MODEL_DIR,
            max_workers=4
        )
        print("Model download complete.")
    except Exception as e:
        print(f"Error downloading models: {e}")
        sys.exit(1)
else:
    print("Models found. Skipping download.")

# 3. Generate magic-pdf.json Config
# FIX: Define REAL_MODEL_DIR before using it
REAL_MODEL_DIR = os.path.join(MODEL_DIR, "models")

config_data = {
    "models-dir": REAL_MODEL_DIR,  # Now this variable exists
    "device-mode": "cpu",
    "table-config": {
        "model": "TableMaster",
        "is_table_recog_enable": False,
        "max_time": 400
    }
}

print(f"Writing configuration to {CONFIG_FILE}...")
with open(CONFIG_FILE, "w") as f:
    json.dump(config_data, f, indent=4)

# 4. Launch the MinerU REST API
print("Launching MinerU REST API...")

command = [
    "mineru-api",
    "--host", "0.0.0.0",
    "--port", "7860"
]

subprocess.run(command)