Avinashnalla7 commited on
Commit
adafdec
·
verified ·
1 Parent(s): e786373

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -61
app.py CHANGED
@@ -9,29 +9,38 @@ import gradio as gr
9
 
10
  from pipeline import run_pipeline
11
 
12
- # ---- Defaults / Constants ----
13
-
14
  DEFAULT_CONFIG: Dict[str, Any] = {
15
  "model": os.getenv("OPENAI_MODEL", "gpt-4o-mini"),
16
  "rewrite": False,
17
  "projects": [{"name": "STANDARD", "description": "Generic scoring"}],
 
 
 
18
  }
19
 
20
  TMP_ROOT = Path("/tmp/resume_evaluator").resolve()
21
  UPLOAD_DIR = TMP_ROOT / "input_uploads"
22
- OUTPUT_DIR = TMP_ROOT / "output"
23
  RESULTS_ZIP = TMP_ROOT / "results.zip"
24
 
25
 
26
- # ---- Helpers ----
27
-
28
- def _clean_tmp() -> None:
29
- if TMP_ROOT.exists():
30
- shutil.rmtree(TMP_ROOT, ignore_errors=True)
31
  UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
32
  OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
33
 
34
 
 
 
 
 
 
 
 
 
 
 
 
35
  def _parse_config(config_text: str) -> Dict[str, Any]:
36
  text = (config_text or "").strip()
37
  if not text:
@@ -42,17 +51,22 @@ def _parse_config(config_text: str) -> Dict[str, Any]:
42
  except json.JSONDecodeError as e:
43
  raise ValueError(f"Config JSON is invalid: {e}") from e
44
 
45
- # Merge user config over defaults (shallow merge is enough for your current shape)
 
 
46
  merged = dict(DEFAULT_CONFIG)
47
- merged.update(cfg if isinstance(cfg, dict) else {})
48
  return merged
49
 
50
 
51
  def _stage_inputs(uploaded_files: Optional[List[str]]) -> List[str]:
52
  """
53
- Gradio gives file paths when using type='filepath'.
54
- We copy into /tmp for stable paths. If ZIP is uploaded, extract PDFs.
 
55
  """
 
 
56
  staged: List[str] = []
57
  uploaded_files = uploaded_files or []
58
 
@@ -61,28 +75,25 @@ def _stage_inputs(uploaded_files: Optional[List[str]]) -> List[str]:
61
  if not src.exists():
62
  continue
63
 
64
- # ZIP case
65
  if src.suffix.lower() == ".zip":
66
  with zipfile.ZipFile(src, "r") as z:
67
  z.extractall(UPLOAD_DIR)
68
  staged.extend([str(p.resolve()) for p in sorted(UPLOAD_DIR.rglob("*.pdf"))])
69
  continue
70
 
71
- # PDF case
72
  if src.suffix.lower() == ".pdf":
73
  dst = UPLOAD_DIR / src.name
74
  shutil.copy2(src, dst)
75
  staged.append(str(dst.resolve()))
76
 
77
- # Deduplicate while preserving order
78
  seen = set()
79
- unique: List[str] = []
80
  for p in staged:
81
  if p not in seen:
82
  seen.add(p)
83
- unique.append(p)
84
-
85
- return unique
86
 
87
 
88
  def _zip_dir(src_dir: Path, zip_path: Path) -> None:
@@ -95,32 +106,27 @@ def _zip_dir(src_dir: Path, zip_path: Path) -> None:
95
  z.write(p, arcname=str(p.relative_to(src_dir)))
96
 
97
 
98
- # ---- Gradio handler ----
99
-
100
  def process(uploaded_files: Optional[List[str]], config_text: str) -> Tuple[str, Optional[str]]:
101
  """
102
- Returns:
103
- (status_text, results_zip_path_or_none)
104
  """
105
- _clean_tmp()
106
 
107
  cfg = _parse_config(config_text)
108
  pdf_paths = _stage_inputs(uploaded_files)
109
 
110
  if not pdf_paths:
111
- return ("No PDFs found. Upload one or more PDFs, or a ZIP containing PDFs.", None)
112
-
113
- # Run pipeline
114
- try:
115
- run_pipeline(
116
- input_files=pdf_paths,
117
- config=cfg,
118
- output_dir=str(OUTPUT_DIR),
119
- )
120
- except Exception as e:
121
- return (f"Pipeline failed: {type(e).__name__}: {e}", None)
122
-
123
- # Zip results
124
  try:
125
  _zip_dir(OUTPUT_DIR, RESULTS_ZIP)
126
  except Exception as e:
@@ -129,44 +135,32 @@ except Exception as e:
129
  return (f"Processed {len(pdf_paths)} file(s). Results are ready.", str(RESULTS_ZIP))
130
 
131
 
132
- # ---- UI ----
133
-
134
  def build_ui() -> gr.Blocks:
135
  with gr.Blocks(title="Resume Evaluator") as demo:
136
  gr.Markdown("# Resume Evaluator")
137
 
138
- with gr.Group():
139
- files = gr.File(
140
- label="Upload PDF(s) or a ZIP",
141
- file_count="multiple",
142
- type="filepath",
143
- )
144
 
145
- with gr.Group():
146
- config = gr.Code(
147
- label="Config JSON (optional)",
148
- language="json",
149
- value=json.dumps(DEFAULT_CONFIG, indent=2),
150
- )
151
 
152
- with gr.Row():
153
- btn = gr.Button("Process", variant="primary")
154
 
155
  status = gr.Textbox(label="Status", interactive=False)
156
-
157
  results = gr.File(label="Download Results ZIP", interactive=False)
158
 
159
- btn.click(
160
- fn=process,
161
- inputs=[files, config],
162
- outputs=[status, results],
163
- )
164
-
165
  return demo
166
 
167
 
168
  demo = build_ui()
169
 
170
  if __name__ == "__main__":
171
- # Keep it explicit for HF Spaces
172
  demo.launch(server_name="0.0.0.0", server_port=7860)
 
9
 
10
  from pipeline import run_pipeline
11
 
 
 
12
  DEFAULT_CONFIG: Dict[str, Any] = {
13
  "model": os.getenv("OPENAI_MODEL", "gpt-4o-mini"),
14
  "rewrite": False,
15
  "projects": [{"name": "STANDARD", "description": "Generic scoring"}],
16
+ # OCR knobs (optional)
17
+ "ocr_max_pages": 8,
18
+ "ocr_dpi": 200,
19
  }
20
 
21
  TMP_ROOT = Path("/tmp/resume_evaluator").resolve()
22
  UPLOAD_DIR = TMP_ROOT / "input_uploads"
23
+ OUTPUT_DIR = TMP_ROOT / "output" # IMPORTANT: do not delete if you want dedupe persistence
24
  RESULTS_ZIP = TMP_ROOT / "results.zip"
25
 
26
 
27
+ def _ensure_dirs() -> None:
28
+ TMP_ROOT.mkdir(parents=True, exist_ok=True)
 
 
 
29
  UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
30
  OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
31
 
32
 
33
+ def _clean_upload_staging_only() -> None:
34
+ """
35
+ DO NOT delete OUTPUT_DIR if you want index dedupe to persist across runs.
36
+ Only clear uploads staging.
37
+ """
38
+ _ensure_dirs()
39
+ if UPLOAD_DIR.exists():
40
+ shutil.rmtree(UPLOAD_DIR, ignore_errors=True)
41
+ UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
42
+
43
+
44
  def _parse_config(config_text: str) -> Dict[str, Any]:
45
  text = (config_text or "").strip()
46
  if not text:
 
51
  except json.JSONDecodeError as e:
52
  raise ValueError(f"Config JSON is invalid: {e}") from e
53
 
54
+ if not isinstance(cfg, dict):
55
+ raise ValueError("Config JSON must be an object (dict).")
56
+
57
  merged = dict(DEFAULT_CONFIG)
58
+ merged.update(cfg)
59
  return merged
60
 
61
 
62
  def _stage_inputs(uploaded_files: Optional[List[str]]) -> List[str]:
63
  """
64
+ Gradio gives file paths (type='filepath').
65
+ Copy into UPLOAD_DIR for stable paths.
66
+ Supports PDFs and ZIPs containing PDFs.
67
  """
68
+ _clean_upload_staging_only()
69
+
70
  staged: List[str] = []
71
  uploaded_files = uploaded_files or []
72
 
 
75
  if not src.exists():
76
  continue
77
 
 
78
  if src.suffix.lower() == ".zip":
79
  with zipfile.ZipFile(src, "r") as z:
80
  z.extractall(UPLOAD_DIR)
81
  staged.extend([str(p.resolve()) for p in sorted(UPLOAD_DIR.rglob("*.pdf"))])
82
  continue
83
 
 
84
  if src.suffix.lower() == ".pdf":
85
  dst = UPLOAD_DIR / src.name
86
  shutil.copy2(src, dst)
87
  staged.append(str(dst.resolve()))
88
 
89
+ # dedupe while preserving order
90
  seen = set()
91
+ out: List[str] = []
92
  for p in staged:
93
  if p not in seen:
94
  seen.add(p)
95
+ out.append(p)
96
+ return out
 
97
 
98
 
99
  def _zip_dir(src_dir: Path, zip_path: Path) -> None:
 
106
  z.write(p, arcname=str(p.relative_to(src_dir)))
107
 
108
 
 
 
109
  def process(uploaded_files: Optional[List[str]], config_text: str) -> Tuple[str, Optional[str]]:
110
  """
111
+ Returns (status, results_zip_path_or_none).
 
112
  """
113
+ _ensure_dirs()
114
 
115
  cfg = _parse_config(config_text)
116
  pdf_paths = _stage_inputs(uploaded_files)
117
 
118
  if not pdf_paths:
119
+ return ("No PDFs found. Upload PDFs or a ZIP containing PDFs.", None)
120
+
121
+ try:
122
+ run_pipeline(
123
+ input_files=pdf_paths,
124
+ config=cfg,
125
+ output_dir=str(OUTPUT_DIR),
126
+ )
127
+ except Exception as e:
128
+ return (f"Pipeline failed: {type(e).__name__}: {e}", None)
129
+
 
 
130
  try:
131
  _zip_dir(OUTPUT_DIR, RESULTS_ZIP)
132
  except Exception as e:
 
135
  return (f"Processed {len(pdf_paths)} file(s). Results are ready.", str(RESULTS_ZIP))
136
 
137
 
 
 
138
  def build_ui() -> gr.Blocks:
139
  with gr.Blocks(title="Resume Evaluator") as demo:
140
  gr.Markdown("# Resume Evaluator")
141
 
142
+ files = gr.File(
143
+ label="Upload PDF(s) or a ZIP",
144
+ file_count="multiple",
145
+ type="filepath",
146
+ )
 
147
 
148
+ config = gr.Code(
149
+ label="Config JSON (optional)",
150
+ language="json",
151
+ value=json.dumps(DEFAULT_CONFIG, indent=2),
152
+ )
 
153
 
154
+ btn = gr.Button("Process", variant="primary")
 
155
 
156
  status = gr.Textbox(label="Status", interactive=False)
 
157
  results = gr.File(label="Download Results ZIP", interactive=False)
158
 
159
+ btn.click(fn=process, inputs=[files, config], outputs=[status, results])
 
 
 
 
 
160
  return demo
161
 
162
 
163
  demo = build_ui()
164
 
165
  if __name__ == "__main__":
 
166
  demo.launch(server_name="0.0.0.0", server_port=7860)