Spaces:

SAadmin
/

resume-evaluator

Sleeping

File size: 5,053 Bytes

eefd991
7eed4ab
ff130e1
6435d03
ff130e1
4e0a1c0
101c278
6435d03
 
 
eefd991
4e0a1c0
 
eefd991
6435d03
4e0a1c0
101c278
adafdec
 
101c278
 
 
 
 
 
 
eefd991
 
4e0a1c0
 
101c278
4e0a1c0
 
 
adafdec
 
4e0a1c0
fceb48f
4e0a1c0
eefd991
adafdec
 
 
 
 
 
 
4e0a1c0
 
 
 
 
 
 
 
 
 
adafdec
 
 
4e0a1c0
adafdec
4e0a1c0
 
 
 
adafdec
7eed4ab
4e0a1c0
ff130e1
4e0a1c0
7eed4ab
 
 
 
 
 
4e0a1c0
 
7eed4ab
 
 
4e0a1c0
7eed4ab
 
 
101c278
4e0a1c0
adafdec
4e0a1c0
 
 
adafdec
 
7eed4ab
 
4e0a1c0
 
 
 
 
 
 
7eed4ab
4e0a1c0
 
adafdec
4e0a1c0
 
 
 
 
adafdec
 
101c278
adafdec
fceb48f
adafdec
 
101c278
fceb48f
adafdec
 
 
 
101c278
 
fceb48f
101c278
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4e0a1c0
 
 
 
 
 
adafdec
 
 
 
 
4e0a1c0
adafdec
 
 
 
 
eefd991
adafdec
eefd991
4e0a1c0
 
eefd991
adafdec
4e0a1c0
eefd991
 
4e0a1c0
eefd991
 
101c278
87c068a

import json
import os
import shutil
import zipfile
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
from uuid import uuid4

import gradio as gr

from pipeline import run_pipeline

DEFAULT_CONFIG: Dict[str, Any] = {
    "model": os.getenv("OPENAI_MODEL", "gpt-4o-mini"),
    "rewrite": False,
    "projects": [{"name": "STANDARD", "description": "Generic scoring"}],
    # OCR knobs
    "ocr_max_pages": 8,
    "ocr_dpi": 200,
    # Reporting knobs
    "top_n": 25,
    "bucket_thresholds": {
        "top": 8.0,
        "strong": 6.5,
        "maybe": 5.0
    },
}

TMP_ROOT = Path("/tmp/resume_evaluator").resolve()
UPLOAD_DIR = TMP_ROOT / "input_uploads"
OUTPUT_ROOT = TMP_ROOT / "output_root"     # persistent across runs for dedupe manifest
RESULTS_ZIP = TMP_ROOT / "results.zip"


def _ensure_dirs() -> None:
    TMP_ROOT.mkdir(parents=True, exist_ok=True)
    UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
    OUTPUT_ROOT.mkdir(parents=True, exist_ok=True)


def _clean_upload_staging_only() -> None:
    _ensure_dirs()
    if UPLOAD_DIR.exists():
        shutil.rmtree(UPLOAD_DIR, ignore_errors=True)
    UPLOAD_DIR.mkdir(parents=True, exist_ok=True)


def _parse_config(config_text: str) -> Dict[str, Any]:
    text = (config_text or "").strip()
    if not text:
        return dict(DEFAULT_CONFIG)

    try:
        cfg = json.loads(text)
    except json.JSONDecodeError as e:
        raise ValueError(f"Config JSON is invalid: {e}") from e

    if not isinstance(cfg, dict):
        raise ValueError("Config JSON must be an object (dict).")

    merged = dict(DEFAULT_CONFIG)
    merged.update(cfg)
    return merged


def _stage_inputs(uploaded_files: Optional[List[str]]) -> List[str]:
    _clean_upload_staging_only()
    staged: List[str] = []
    uploaded_files = uploaded_files or []

    for f in uploaded_files:
        src = Path(f)
        if not src.exists():
            continue

        if src.suffix.lower() == ".zip":
            with zipfile.ZipFile(src, "r") as z:
                z.extractall(UPLOAD_DIR)
            staged.extend([str(p.resolve()) for p in sorted(UPLOAD_DIR.rglob("*.pdf"))])
            continue

        if src.suffix.lower() == ".pdf":
            dst = UPLOAD_DIR / src.name
            shutil.copy2(src, dst)
            staged.append(str(dst.resolve()))

    # dedupe while preserving order
    seen = set()
    out: List[str] = []
    for p in staged:
        if p not in seen:
            seen.add(p)
            out.append(p)
    return out


def _zip_dir(src_dir: Path, zip_path: Path) -> None:
    if zip_path.exists():
        zip_path.unlink()
    with zipfile.ZipFile(zip_path, "w", compression=zipfile.ZIP_DEFLATED) as z:
        for p in sorted(src_dir.rglob("*")):
            if p.is_file():
                z.write(p, arcname=str(p.relative_to(src_dir)))


def process(uploaded_files: Optional[List[str]], config_text: str) -> Tuple[str, Optional[str]]:
    _ensure_dirs()

    cfg = _parse_config(config_text)
    pdf_paths = _stage_inputs(uploaded_files)

    if not pdf_paths:
        return ("No PDFs found. Upload PDFs or a ZIP containing PDFs.", None)

    job_id = uuid4().hex[:10]  # short stable id
    try:
        result = run_pipeline(
            input_files=pdf_paths,
            config=cfg,
            output_root=str(OUTPUT_ROOT),
            job_id=job_id,
        )
    except Exception as e:
        return (f"Pipeline failed: {type(e).__name__}: {e}", None)

    job_dir = Path(result["job_dir"])
    zip_path = Path(result["zip_path"])

    # (Optional) also drop a copy at /tmp/results.zip for Gradio download stability
    try:
        if RESULTS_ZIP.exists():
            RESULTS_ZIP.unlink()
        shutil.copy2(zip_path, RESULTS_ZIP)
    except Exception:
        # not fatal
        pass

    counts = result.get("counts", {})
    status = (
        f"job_id={job_id} | "
        f"total={counts.get('total', 0)} "
        f"success={counts.get('success', 0)} "
        f"skipped={counts.get('skipped', 0)} "
        f"failed={counts.get('failed', 0)}"
    )
    return (status, str(RESULTS_ZIP if RESULTS_ZIP.exists() else zip_path))


def build_ui() -> gr.Blocks:
    with gr.Blocks(title="Resume Evaluator") as demo:
        gr.Markdown("# Resume Evaluator")

        files = gr.File(
            label="Upload PDF(s) or a ZIP",
            file_count="multiple",
            type="filepath",
        )

        config = gr.Code(
            label="Config JSON (optional)",
            language="json",
            value=json.dumps(DEFAULT_CONFIG, indent=2),
        )

        btn = gr.Button("Process", variant="primary")

        status = gr.Textbox(label="Status", interactive=False)
        results = gr.File(label="Download Results ZIP", interactive=False)

        btn.click(fn=process, inputs=[files, config], outputs=[status, results])
    return demo


demo = build_ui()

if __name__ == "__main__":
    # Keep SSR off; it’s still noisy in HF
    demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)