Spaces:
Sleeping
Sleeping
File size: 5,053 Bytes
eefd991 7eed4ab ff130e1 6435d03 ff130e1 4e0a1c0 101c278 6435d03 eefd991 4e0a1c0 eefd991 6435d03 4e0a1c0 101c278 adafdec 101c278 eefd991 4e0a1c0 101c278 4e0a1c0 adafdec 4e0a1c0 fceb48f 4e0a1c0 eefd991 adafdec 4e0a1c0 adafdec 4e0a1c0 adafdec 4e0a1c0 adafdec 7eed4ab 4e0a1c0 ff130e1 4e0a1c0 7eed4ab 4e0a1c0 7eed4ab 4e0a1c0 7eed4ab 101c278 4e0a1c0 adafdec 4e0a1c0 adafdec 7eed4ab 4e0a1c0 7eed4ab 4e0a1c0 adafdec 4e0a1c0 adafdec 101c278 adafdec fceb48f adafdec 101c278 fceb48f adafdec 101c278 fceb48f 101c278 4e0a1c0 adafdec 4e0a1c0 adafdec eefd991 adafdec eefd991 4e0a1c0 eefd991 adafdec 4e0a1c0 eefd991 4e0a1c0 eefd991 101c278 87c068a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 | import json
import os
import shutil
import zipfile
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
from uuid import uuid4
import gradio as gr
from pipeline import run_pipeline
DEFAULT_CONFIG: Dict[str, Any] = {
"model": os.getenv("OPENAI_MODEL", "gpt-4o-mini"),
"rewrite": False,
"projects": [{"name": "STANDARD", "description": "Generic scoring"}],
# OCR knobs
"ocr_max_pages": 8,
"ocr_dpi": 200,
# Reporting knobs
"top_n": 25,
"bucket_thresholds": {
"top": 8.0,
"strong": 6.5,
"maybe": 5.0
},
}
TMP_ROOT = Path("/tmp/resume_evaluator").resolve()
UPLOAD_DIR = TMP_ROOT / "input_uploads"
OUTPUT_ROOT = TMP_ROOT / "output_root" # persistent across runs for dedupe manifest
RESULTS_ZIP = TMP_ROOT / "results.zip"
def _ensure_dirs() -> None:
TMP_ROOT.mkdir(parents=True, exist_ok=True)
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
OUTPUT_ROOT.mkdir(parents=True, exist_ok=True)
def _clean_upload_staging_only() -> None:
_ensure_dirs()
if UPLOAD_DIR.exists():
shutil.rmtree(UPLOAD_DIR, ignore_errors=True)
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
def _parse_config(config_text: str) -> Dict[str, Any]:
text = (config_text or "").strip()
if not text:
return dict(DEFAULT_CONFIG)
try:
cfg = json.loads(text)
except json.JSONDecodeError as e:
raise ValueError(f"Config JSON is invalid: {e}") from e
if not isinstance(cfg, dict):
raise ValueError("Config JSON must be an object (dict).")
merged = dict(DEFAULT_CONFIG)
merged.update(cfg)
return merged
def _stage_inputs(uploaded_files: Optional[List[str]]) -> List[str]:
_clean_upload_staging_only()
staged: List[str] = []
uploaded_files = uploaded_files or []
for f in uploaded_files:
src = Path(f)
if not src.exists():
continue
if src.suffix.lower() == ".zip":
with zipfile.ZipFile(src, "r") as z:
z.extractall(UPLOAD_DIR)
staged.extend([str(p.resolve()) for p in sorted(UPLOAD_DIR.rglob("*.pdf"))])
continue
if src.suffix.lower() == ".pdf":
dst = UPLOAD_DIR / src.name
shutil.copy2(src, dst)
staged.append(str(dst.resolve()))
# dedupe while preserving order
seen = set()
out: List[str] = []
for p in staged:
if p not in seen:
seen.add(p)
out.append(p)
return out
def _zip_dir(src_dir: Path, zip_path: Path) -> None:
if zip_path.exists():
zip_path.unlink()
with zipfile.ZipFile(zip_path, "w", compression=zipfile.ZIP_DEFLATED) as z:
for p in sorted(src_dir.rglob("*")):
if p.is_file():
z.write(p, arcname=str(p.relative_to(src_dir)))
def process(uploaded_files: Optional[List[str]], config_text: str) -> Tuple[str, Optional[str]]:
_ensure_dirs()
cfg = _parse_config(config_text)
pdf_paths = _stage_inputs(uploaded_files)
if not pdf_paths:
return ("No PDFs found. Upload PDFs or a ZIP containing PDFs.", None)
job_id = uuid4().hex[:10] # short stable id
try:
result = run_pipeline(
input_files=pdf_paths,
config=cfg,
output_root=str(OUTPUT_ROOT),
job_id=job_id,
)
except Exception as e:
return (f"Pipeline failed: {type(e).__name__}: {e}", None)
job_dir = Path(result["job_dir"])
zip_path = Path(result["zip_path"])
# (Optional) also drop a copy at /tmp/results.zip for Gradio download stability
try:
if RESULTS_ZIP.exists():
RESULTS_ZIP.unlink()
shutil.copy2(zip_path, RESULTS_ZIP)
except Exception:
# not fatal
pass
counts = result.get("counts", {})
status = (
f"job_id={job_id} | "
f"total={counts.get('total', 0)} "
f"success={counts.get('success', 0)} "
f"skipped={counts.get('skipped', 0)} "
f"failed={counts.get('failed', 0)}"
)
return (status, str(RESULTS_ZIP if RESULTS_ZIP.exists() else zip_path))
def build_ui() -> gr.Blocks:
with gr.Blocks(title="Resume Evaluator") as demo:
gr.Markdown("# Resume Evaluator")
files = gr.File(
label="Upload PDF(s) or a ZIP",
file_count="multiple",
type="filepath",
)
config = gr.Code(
label="Config JSON (optional)",
language="json",
value=json.dumps(DEFAULT_CONFIG, indent=2),
)
btn = gr.Button("Process", variant="primary")
status = gr.Textbox(label="Status", interactive=False)
results = gr.File(label="Download Results ZIP", interactive=False)
btn.click(fn=process, inputs=[files, config], outputs=[status, results])
return demo
demo = build_ui()
if __name__ == "__main__":
# Keep SSR off; it’s still noisy in HF
demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False) |