Avinashnalla7 commited on
Commit
fceb48f
·
verified ·
1 Parent(s): 213dcf0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -8
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import json
2
  import os
3
  import shutil
 
4
  import zipfile
5
  from pathlib import Path
6
  from typing import Any, Dict, List, Optional, Tuple
@@ -20,19 +21,23 @@ DEFAULT_CONFIG: Dict[str, Any] = {
20
 
21
  TMP_ROOT = Path("/tmp/resume_evaluator").resolve()
22
  UPLOAD_DIR = TMP_ROOT / "input_uploads"
23
- OUTPUT_DIR = TMP_ROOT / "output" # IMPORTANT: do not delete if you want dedupe persistence
 
 
 
 
24
  RESULTS_ZIP = TMP_ROOT / "results.zip"
25
 
26
 
27
  def _ensure_dirs() -> None:
28
  TMP_ROOT.mkdir(parents=True, exist_ok=True)
29
  UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
30
- OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
31
 
32
 
33
  def _clean_upload_staging_only() -> None:
34
  """
35
- DO NOT delete OUTPUT_DIR if you want index dedupe to persist across runs.
36
  Only clear uploads staging.
37
  """
38
  _ensure_dirs()
@@ -86,7 +91,7 @@ def _stage_inputs(uploaded_files: Optional[List[str]]) -> List[str]:
86
  shutil.copy2(src, dst)
87
  staged.append(str(dst.resolve()))
88
 
89
- # dedupe while preserving order
90
  seen = set()
91
  out: List[str] = []
92
  for p in staged:
@@ -118,21 +123,38 @@ def process(uploaded_files: Optional[List[str]], config_text: str) -> Tuple[str,
118
  if not pdf_paths:
119
  return ("No PDFs found. Upload PDFs or a ZIP containing PDFs.", None)
120
 
 
 
 
121
  try:
122
- run_pipeline(
 
123
  input_files=pdf_paths,
124
  config=cfg,
125
- output_dir=str(OUTPUT_DIR),
 
126
  )
127
  except Exception as e:
128
  return (f"Pipeline failed: {type(e).__name__}: {e}", None)
129
 
 
130
  try:
131
- _zip_dir(OUTPUT_DIR, RESULTS_ZIP)
 
 
 
 
132
  except Exception as e:
133
  return (f"Failed to package results: {type(e).__name__}: {e}", None)
134
 
135
- return (f"Processed {len(pdf_paths)} file(s). Results are ready.", str(RESULTS_ZIP))
 
 
 
 
 
 
 
136
 
137
 
138
  def build_ui() -> gr.Blocks:
 
1
  import json
2
  import os
3
  import shutil
4
+ import uuid
5
  import zipfile
6
  from pathlib import Path
7
  from typing import Any, Dict, List, Optional, Tuple
 
21
 
22
  TMP_ROOT = Path("/tmp/resume_evaluator").resolve()
23
  UPLOAD_DIR = TMP_ROOT / "input_uploads"
24
+
25
+ # IMPORTANT:
26
+ # Treat OUTPUT_ROOT as a persistent root (for dedupe manifest/state).
27
+ # Each run should write into OUTPUT_ROOT/jobs/{job_id}/...
28
+ OUTPUT_ROOT = TMP_ROOT / "output"
29
  RESULTS_ZIP = TMP_ROOT / "results.zip"
30
 
31
 
32
  def _ensure_dirs() -> None:
33
  TMP_ROOT.mkdir(parents=True, exist_ok=True)
34
  UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
35
+ OUTPUT_ROOT.mkdir(parents=True, exist_ok=True)
36
 
37
 
38
  def _clean_upload_staging_only() -> None:
39
  """
40
+ DO NOT delete OUTPUT_ROOT if you want dedupe state to persist across runs.
41
  Only clear uploads staging.
42
  """
43
  _ensure_dirs()
 
91
  shutil.copy2(src, dst)
92
  staged.append(str(dst.resolve()))
93
 
94
+ # Dedupe while preserving order
95
  seen = set()
96
  out: List[str] = []
97
  for p in staged:
 
123
  if not pdf_paths:
124
  return ("No PDFs found. Upload PDFs or a ZIP containing PDFs.", None)
125
 
126
+ # Job boundary for future FastAPI/DB/SFTP integration
127
+ job_id = uuid.uuid4().hex[:12]
128
+
129
  try:
130
+ # IMPORTANT: pipeline must create a job-scoped folder under OUTPUT_ROOT and return job_dir.
131
+ result = run_pipeline(
132
  input_files=pdf_paths,
133
  config=cfg,
134
+ output_dir=str(OUTPUT_ROOT),
135
+ job_id=job_id,
136
  )
137
  except Exception as e:
138
  return (f"Pipeline failed: {type(e).__name__}: {e}", None)
139
 
140
+ # We only zip the job folder, NOT the entire OUTPUT_ROOT.
141
  try:
142
+ job_dir = Path(result.get("job_dir") or "").resolve()
143
+ if not job_dir.exists() or not job_dir.is_dir():
144
+ return (f"Pipeline failed: invalid job_dir returned: {result.get('job_dir')!r}", None)
145
+
146
+ _zip_dir(job_dir, RESULTS_ZIP)
147
  except Exception as e:
148
  return (f"Failed to package results: {type(e).__name__}: {e}", None)
149
 
150
+ counts = result.get("counts") or {}
151
+ total = counts.get("total", len(pdf_paths))
152
+ success = counts.get("success", "?")
153
+ skipped = counts.get("skipped", "?")
154
+ failed = counts.get("failed", "?")
155
+
156
+ status_msg = f"Job {job_id} done. total={total} success={success} skipped={skipped} failed={failed}"
157
+ return (status_msg, str(RESULTS_ZIP))
158
 
159
 
160
  def build_ui() -> gr.Blocks: