agrim12345 commited on
Commit
4ef9c2c
·
1 Parent(s): 836ce92

Use RapidOCR/EasyOCR defaults; avoid Paddle runtime downloads

Browse files
Files changed (2) hide show
  1. requirements.txt +3 -2
  2. run_manager.py +13 -6
requirements.txt CHANGED
@@ -12,7 +12,8 @@ pydantic==2.12.5
12
  opencv-python-headless==4.11.0.86
13
  numpy==1.26.4
14
  ultralytics==8.4.12
15
- paddleocr==2.7.3
16
- paddlepaddle==2.6.2
17
  torch==2.5.1
 
18
  clip @ git+https://github.com/openai/CLIP.git
 
12
  opencv-python-headless==4.11.0.86
13
  numpy==1.26.4
14
  ultralytics==8.4.12
15
+ rapidocr-onnxruntime==1.4.4
16
+ easyocr==1.7.2
17
  torch==2.5.1
18
+ torchvision==0.20.1
19
  clip @ git+https://github.com/openai/CLIP.git
run_manager.py CHANGED
@@ -479,20 +479,28 @@ def start_run(
479
 
480
  started = time.time()
481
  logs_path = _logs_path(run_id)
 
 
 
 
 
 
 
 
 
482
  log_fh = open(logs_path, "a", encoding="utf-8", buffering=1)
483
  log_fh.write(
484
  f"[runner] run_id={run_id} variant={variant} started_at_epoch={started}\n"
485
  f"[runner] command={' '.join(cmd)}\n"
486
  f"[runner] cwd={PIPELINES_DIR}\n\n"
487
  f"[runner] heartbeat_interval_sec={log_heartbeat_sec}\n"
488
- f"[runner] python_unbuffered=1\n\n"
 
 
 
489
  )
490
  log_fh.flush()
491
 
492
- child_env = os.environ.copy()
493
- child_env["PYTHONUNBUFFERED"] = "1"
494
- child_env.setdefault("PYTHONIOENCODING", "utf-8")
495
-
496
  proc = subprocess.Popen(
497
  cmd,
498
  cwd=str(PIPELINES_DIR),
@@ -578,4 +586,3 @@ def get_final_output(run_id: str, condensed: bool = False) -> Dict[str, Any]:
578
  if not out_file.exists():
579
  raise FileNotFoundError(f"Output not found: {out_file}")
580
  return _read_json(out_file)
581
-
 
479
 
480
  started = time.time()
481
  logs_path = _logs_path(run_id)
482
+ child_env = os.environ.copy()
483
+ child_env["PYTHONUNBUFFERED"] = "1"
484
+ child_env.setdefault("PYTHONIOENCODING", "utf-8")
485
+ # Prefer faster OCR initialization by default in hosted runs.
486
+ # Space-level env vars can override these.
487
+ child_env.setdefault("OCR_MODE", "cpu")
488
+ child_env.setdefault("OCR_BACKEND_CPU", "rapidocr")
489
+ child_env.setdefault("OCR_BACKEND_GPU", "easyocr")
490
+
491
  log_fh = open(logs_path, "a", encoding="utf-8", buffering=1)
492
  log_fh.write(
493
  f"[runner] run_id={run_id} variant={variant} started_at_epoch={started}\n"
494
  f"[runner] command={' '.join(cmd)}\n"
495
  f"[runner] cwd={PIPELINES_DIR}\n\n"
496
  f"[runner] heartbeat_interval_sec={log_heartbeat_sec}\n"
497
+ f"[runner] python_unbuffered=1\n"
498
+ f"[runner] ocr_mode={child_env.get('OCR_MODE')} "
499
+ f"ocr_backend_cpu={child_env.get('OCR_BACKEND_CPU')} "
500
+ f"ocr_backend_gpu={child_env.get('OCR_BACKEND_GPU')}\n\n"
501
  )
502
  log_fh.flush()
503
 
 
 
 
 
504
  proc = subprocess.Popen(
505
  cmd,
506
  cwd=str(PIPELINES_DIR),
 
586
  if not out_file.exists():
587
  raise FileNotFoundError(f"Output not found: {out_file}")
588
  return _read_json(out_file)