File size: 5,053 Bytes
eefd991
7eed4ab
ff130e1
6435d03
ff130e1
4e0a1c0
101c278
6435d03
 
 
eefd991
4e0a1c0
 
eefd991
6435d03
4e0a1c0
101c278
adafdec
 
101c278
 
 
 
 
 
 
eefd991
 
4e0a1c0
 
101c278
4e0a1c0
 
 
adafdec
 
4e0a1c0
fceb48f
4e0a1c0
eefd991
adafdec
 
 
 
 
 
 
4e0a1c0
 
 
 
 
 
 
 
 
 
adafdec
 
 
4e0a1c0
adafdec
4e0a1c0
 
 
 
adafdec
7eed4ab
4e0a1c0
ff130e1
4e0a1c0
7eed4ab
 
 
 
 
 
4e0a1c0
 
7eed4ab
 
 
4e0a1c0
7eed4ab
 
 
101c278
4e0a1c0
adafdec
4e0a1c0
 
 
adafdec
 
7eed4ab
 
4e0a1c0
 
 
 
 
 
 
7eed4ab
4e0a1c0
 
adafdec
4e0a1c0
 
 
 
 
adafdec
 
101c278
adafdec
fceb48f
adafdec
 
101c278
fceb48f
adafdec
 
 
 
101c278
 
fceb48f
101c278
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4e0a1c0
 
 
 
 
 
adafdec
 
 
 
 
4e0a1c0
adafdec
 
 
 
 
eefd991
adafdec
eefd991
4e0a1c0
 
eefd991
adafdec
4e0a1c0
eefd991
 
4e0a1c0
eefd991
 
101c278
87c068a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
import json
import os
import shutil
import zipfile
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
from uuid import uuid4

import gradio as gr

from pipeline import run_pipeline

DEFAULT_CONFIG: Dict[str, Any] = {
    "model": os.getenv("OPENAI_MODEL", "gpt-4o-mini"),
    "rewrite": False,
    "projects": [{"name": "STANDARD", "description": "Generic scoring"}],
    # OCR knobs
    "ocr_max_pages": 8,
    "ocr_dpi": 200,
    # Reporting knobs
    "top_n": 25,
    "bucket_thresholds": {
        "top": 8.0,
        "strong": 6.5,
        "maybe": 5.0
    },
}

TMP_ROOT = Path("/tmp/resume_evaluator").resolve()
UPLOAD_DIR = TMP_ROOT / "input_uploads"
OUTPUT_ROOT = TMP_ROOT / "output_root"     # persistent across runs for dedupe manifest
RESULTS_ZIP = TMP_ROOT / "results.zip"


def _ensure_dirs() -> None:
    TMP_ROOT.mkdir(parents=True, exist_ok=True)
    UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
    OUTPUT_ROOT.mkdir(parents=True, exist_ok=True)


def _clean_upload_staging_only() -> None:
    _ensure_dirs()
    if UPLOAD_DIR.exists():
        shutil.rmtree(UPLOAD_DIR, ignore_errors=True)
    UPLOAD_DIR.mkdir(parents=True, exist_ok=True)


def _parse_config(config_text: str) -> Dict[str, Any]:
    text = (config_text or "").strip()
    if not text:
        return dict(DEFAULT_CONFIG)

    try:
        cfg = json.loads(text)
    except json.JSONDecodeError as e:
        raise ValueError(f"Config JSON is invalid: {e}") from e

    if not isinstance(cfg, dict):
        raise ValueError("Config JSON must be an object (dict).")

    merged = dict(DEFAULT_CONFIG)
    merged.update(cfg)
    return merged


def _stage_inputs(uploaded_files: Optional[List[str]]) -> List[str]:
    _clean_upload_staging_only()
    staged: List[str] = []
    uploaded_files = uploaded_files or []

    for f in uploaded_files:
        src = Path(f)
        if not src.exists():
            continue

        if src.suffix.lower() == ".zip":
            with zipfile.ZipFile(src, "r") as z:
                z.extractall(UPLOAD_DIR)
            staged.extend([str(p.resolve()) for p in sorted(UPLOAD_DIR.rglob("*.pdf"))])
            continue

        if src.suffix.lower() == ".pdf":
            dst = UPLOAD_DIR / src.name
            shutil.copy2(src, dst)
            staged.append(str(dst.resolve()))

    # dedupe while preserving order
    seen = set()
    out: List[str] = []
    for p in staged:
        if p not in seen:
            seen.add(p)
            out.append(p)
    return out


def _zip_dir(src_dir: Path, zip_path: Path) -> None:
    if zip_path.exists():
        zip_path.unlink()
    with zipfile.ZipFile(zip_path, "w", compression=zipfile.ZIP_DEFLATED) as z:
        for p in sorted(src_dir.rglob("*")):
            if p.is_file():
                z.write(p, arcname=str(p.relative_to(src_dir)))


def process(uploaded_files: Optional[List[str]], config_text: str) -> Tuple[str, Optional[str]]:
    _ensure_dirs()

    cfg = _parse_config(config_text)
    pdf_paths = _stage_inputs(uploaded_files)

    if not pdf_paths:
        return ("No PDFs found. Upload PDFs or a ZIP containing PDFs.", None)

    job_id = uuid4().hex[:10]  # short stable id
    try:
        result = run_pipeline(
            input_files=pdf_paths,
            config=cfg,
            output_root=str(OUTPUT_ROOT),
            job_id=job_id,
        )
    except Exception as e:
        return (f"Pipeline failed: {type(e).__name__}: {e}", None)

    job_dir = Path(result["job_dir"])
    zip_path = Path(result["zip_path"])

    # (Optional) also drop a copy at /tmp/results.zip for Gradio download stability
    try:
        if RESULTS_ZIP.exists():
            RESULTS_ZIP.unlink()
        shutil.copy2(zip_path, RESULTS_ZIP)
    except Exception:
        # not fatal
        pass

    counts = result.get("counts", {})
    status = (
        f"job_id={job_id} | "
        f"total={counts.get('total', 0)} "
        f"success={counts.get('success', 0)} "
        f"skipped={counts.get('skipped', 0)} "
        f"failed={counts.get('failed', 0)}"
    )
    return (status, str(RESULTS_ZIP if RESULTS_ZIP.exists() else zip_path))


def build_ui() -> gr.Blocks:
    with gr.Blocks(title="Resume Evaluator") as demo:
        gr.Markdown("# Resume Evaluator")

        files = gr.File(
            label="Upload PDF(s) or a ZIP",
            file_count="multiple",
            type="filepath",
        )

        config = gr.Code(
            label="Config JSON (optional)",
            language="json",
            value=json.dumps(DEFAULT_CONFIG, indent=2),
        )

        btn = gr.Button("Process", variant="primary")

        status = gr.Textbox(label="Status", interactive=False)
        results = gr.File(label="Download Results ZIP", interactive=False)

        btn.click(fn=process, inputs=[files, config], outputs=[status, results])
    return demo


demo = build_ui()

if __name__ == "__main__":
    # Keep SSR off; it’s still noisy in HF
    demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)