Spaces:
Sleeping
Sleeping
Commit ·
4ef9c2c
1
Parent(s): 836ce92
Use RapidOCR/EasyOCR defaults; avoid Paddle runtime downloads
Browse files- requirements.txt +3 -2
- run_manager.py +13 -6
requirements.txt
CHANGED
|
@@ -12,7 +12,8 @@ pydantic==2.12.5
|
|
| 12 |
opencv-python-headless==4.11.0.86
|
| 13 |
numpy==1.26.4
|
| 14 |
ultralytics==8.4.12
|
| 15 |
-
|
| 16 |
-
|
| 17 |
torch==2.5.1
|
|
|
|
| 18 |
clip @ git+https://github.com/openai/CLIP.git
|
|
|
|
| 12 |
opencv-python-headless==4.11.0.86
|
| 13 |
numpy==1.26.4
|
| 14 |
ultralytics==8.4.12
|
| 15 |
+
rapidocr-onnxruntime==1.4.4
|
| 16 |
+
easyocr==1.7.2
|
| 17 |
torch==2.5.1
|
| 18 |
+
torchvision==0.20.1
|
| 19 |
clip @ git+https://github.com/openai/CLIP.git
|
run_manager.py
CHANGED
|
@@ -479,20 +479,28 @@ def start_run(
|
|
| 479 |
|
| 480 |
started = time.time()
|
| 481 |
logs_path = _logs_path(run_id)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 482 |
log_fh = open(logs_path, "a", encoding="utf-8", buffering=1)
|
| 483 |
log_fh.write(
|
| 484 |
f"[runner] run_id={run_id} variant={variant} started_at_epoch={started}\n"
|
| 485 |
f"[runner] command={' '.join(cmd)}\n"
|
| 486 |
f"[runner] cwd={PIPELINES_DIR}\n\n"
|
| 487 |
f"[runner] heartbeat_interval_sec={log_heartbeat_sec}\n"
|
| 488 |
-
f"[runner] python_unbuffered=1\n
|
|
|
|
|
|
|
|
|
|
| 489 |
)
|
| 490 |
log_fh.flush()
|
| 491 |
|
| 492 |
-
child_env = os.environ.copy()
|
| 493 |
-
child_env["PYTHONUNBUFFERED"] = "1"
|
| 494 |
-
child_env.setdefault("PYTHONIOENCODING", "utf-8")
|
| 495 |
-
|
| 496 |
proc = subprocess.Popen(
|
| 497 |
cmd,
|
| 498 |
cwd=str(PIPELINES_DIR),
|
|
@@ -578,4 +586,3 @@ def get_final_output(run_id: str, condensed: bool = False) -> Dict[str, Any]:
|
|
| 578 |
if not out_file.exists():
|
| 579 |
raise FileNotFoundError(f"Output not found: {out_file}")
|
| 580 |
return _read_json(out_file)
|
| 581 |
-
|
|
|
|
| 479 |
|
| 480 |
started = time.time()
|
| 481 |
logs_path = _logs_path(run_id)
|
| 482 |
+
child_env = os.environ.copy()
|
| 483 |
+
child_env["PYTHONUNBUFFERED"] = "1"
|
| 484 |
+
child_env.setdefault("PYTHONIOENCODING", "utf-8")
|
| 485 |
+
# Prefer faster OCR initialization by default in hosted runs.
|
| 486 |
+
# Space-level env vars can override these.
|
| 487 |
+
child_env.setdefault("OCR_MODE", "cpu")
|
| 488 |
+
child_env.setdefault("OCR_BACKEND_CPU", "rapidocr")
|
| 489 |
+
child_env.setdefault("OCR_BACKEND_GPU", "easyocr")
|
| 490 |
+
|
| 491 |
log_fh = open(logs_path, "a", encoding="utf-8", buffering=1)
|
| 492 |
log_fh.write(
|
| 493 |
f"[runner] run_id={run_id} variant={variant} started_at_epoch={started}\n"
|
| 494 |
f"[runner] command={' '.join(cmd)}\n"
|
| 495 |
f"[runner] cwd={PIPELINES_DIR}\n\n"
|
| 496 |
f"[runner] heartbeat_interval_sec={log_heartbeat_sec}\n"
|
| 497 |
+
f"[runner] python_unbuffered=1\n"
|
| 498 |
+
f"[runner] ocr_mode={child_env.get('OCR_MODE')} "
|
| 499 |
+
f"ocr_backend_cpu={child_env.get('OCR_BACKEND_CPU')} "
|
| 500 |
+
f"ocr_backend_gpu={child_env.get('OCR_BACKEND_GPU')}\n\n"
|
| 501 |
)
|
| 502 |
log_fh.flush()
|
| 503 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 504 |
proc = subprocess.Popen(
|
| 505 |
cmd,
|
| 506 |
cwd=str(PIPELINES_DIR),
|
|
|
|
| 586 |
if not out_file.exists():
|
| 587 |
raise FileNotFoundError(f"Output not found: {out_file}")
|
| 588 |
return _read_json(out_file)
|
|
|