Avinashnalla7 commited on
Commit
4e0a1c0
·
verified ·
1 Parent(s): 18f2460

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +134 -69
app.py CHANGED
@@ -3,41 +3,60 @@ import os
3
  import shutil
4
  import zipfile
5
  from pathlib import Path
6
- from typing import List, Optional, Dict, Any
7
 
8
  import gradio as gr
9
 
10
  from pipeline import run_pipeline
11
- import asyncio
12
- asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy())
13
- import sys, asyncio
14
- print("PY_VER", sys.version)
15
- asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy())
16
- print("LOOP_POLICY_SET")
17
- DEFAULT_CONFIG = {
18
  "model": os.getenv("OPENAI_MODEL", "gpt-4o-mini"),
19
  "rewrite": False,
20
- # Later: WordPress will send projects in here
21
- "projects": [
22
- {"name": "STANDARD", "description": "Generic scoring"}
23
- ],
24
  }
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
- def _stage_inputs(uploaded_files: List[str]) -> List[str]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  """
29
- Gradio gives us file paths (type='filepath').
30
- We copy them into /tmp/input_uploads so pipeline has stable paths.
31
- If ZIP is uploaded, extract PDFs.
32
  """
33
- input_dir = Path("/tmp/input_uploads").resolve()
34
- if input_dir.exists():
35
- shutil.rmtree(input_dir)
36
- input_dir.mkdir(parents=True, exist_ok=True)
37
-
38
  staged: List[str] = []
 
39
 
40
- for f in uploaded_files or []:
41
  src = Path(f)
42
  if not src.exists():
43
  continue
@@ -45,72 +64,118 @@ def _stage_inputs(uploaded_files: List[str]) -> List[str]:
45
  # ZIP case
46
  if src.suffix.lower() == ".zip":
47
  with zipfile.ZipFile(src, "r") as z:
48
- z.extractall(input_dir)
49
- # collect PDFs
50
- for p in sorted(input_dir.rglob("*.pdf")):
51
- staged.append(str(p.resolve()))
52
  continue
53
 
54
  # PDF case
55
  if src.suffix.lower() == ".pdf":
56
- dst = input_dir / src.name
57
  shutil.copy2(src, dst)
58
  staged.append(str(dst.resolve()))
59
 
60
- return staged
 
 
 
 
 
 
61
 
 
62
 
63
- def process(files, config_json: str):
64
- # files can be None / single / list depending on gradio version; normalize.
65
- if files is None:
66
- return None
67
 
68
- if isinstance(files, str):
69
- uploaded_files = [files]
70
- else:
71
- uploaded_files = [f for f in files]
72
 
73
- staged_pdfs = _stage_inputs(uploaded_files)
74
- if not staged_pdfs:
75
- raise gr.Error("No PDFs found. Upload PDFs or a ZIP containing PDFs.")
 
76
 
77
- # config
78
- cfg = DEFAULT_CONFIG
79
- if config_json and config_json.strip():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  try:
81
- cfg = json.loads(config_json)
82
- except Exception:
83
- raise gr.Error("Config JSON is invalid. Fix JSON formatting.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
- zip_path = run_pipeline(input_files=staged_pdfs, config=cfg)
86
- return zip_path
87
 
 
88
 
89
- with gr.Blocks() as demo:
90
- gr.Markdown("# Resume Evaluator")
91
 
92
- files = gr.File(
93
- label="Upload PDF(s) or a ZIP",
94
- file_count="multiple",
95
- type="filepath", # <-- FIX
96
- )
97
 
98
- config_json = gr.Textbox(
99
- label="Config JSON (optional)",
100
- value=json.dumps(DEFAULT_CONFIG, indent=2),
101
- lines=10,
102
- )
103
 
104
- btn = gr.Button("Process")
105
- out = gr.File(label="Download Results ZIP")
106
 
107
- btn.click(fn=process, inputs=[files, config_json], outputs=[out])
108
 
109
  if __name__ == "__main__":
110
- demo.queue() # important on HF
111
- demo.launch(
112
- server_name="0.0.0.0",
113
- server_port=int(os.getenv("PORT", "7860")),
114
- ssr_mode=False,
115
- show_error=True,
116
- )
 
3
  import shutil
4
  import zipfile
5
  from pathlib import Path
6
+ from typing import Any, Dict, List, Optional, Tuple
7
 
8
  import gradio as gr
9
 
10
  from pipeline import run_pipeline
11
+
12
+ # ---- Defaults / Constants ----
13
+
14
+ DEFAULT_CONFIG: Dict[str, Any] = {
 
 
 
15
  "model": os.getenv("OPENAI_MODEL", "gpt-4o-mini"),
16
  "rewrite": False,
17
+ "projects": [{"name": "STANDARD", "description": "Generic scoring"}],
 
 
 
18
  }
19
 
20
+ TMP_ROOT = Path("/tmp/resume_evaluator").resolve()
21
+ UPLOAD_DIR = TMP_ROOT / "input_uploads"
22
+ OUTPUT_DIR = TMP_ROOT / "output"
23
+ RESULTS_ZIP = TMP_ROOT / "results.zip"
24
+
25
+
26
+ # ---- Helpers ----
27
+
28
+ def _clean_tmp() -> None:
29
+ if TMP_ROOT.exists():
30
+ shutil.rmtree(TMP_ROOT, ignore_errors=True)
31
+ UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
32
+ OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
33
+
34
 
35
+ def _parse_config(config_text: str) -> Dict[str, Any]:
36
+ text = (config_text or "").strip()
37
+ if not text:
38
+ return dict(DEFAULT_CONFIG)
39
+
40
+ try:
41
+ cfg = json.loads(text)
42
+ except json.JSONDecodeError as e:
43
+ raise ValueError(f"Config JSON is invalid: {e}") from e
44
+
45
+ # Merge user config over defaults (shallow merge is enough for your current shape)
46
+ merged = dict(DEFAULT_CONFIG)
47
+ merged.update(cfg if isinstance(cfg, dict) else {})
48
+ return merged
49
+
50
+
51
+ def _stage_inputs(uploaded_files: Optional[List[str]]) -> List[str]:
52
  """
53
+ Gradio gives file paths when using type='filepath'.
54
+ We copy into /tmp for stable paths. If ZIP is uploaded, extract PDFs.
 
55
  """
 
 
 
 
 
56
  staged: List[str] = []
57
+ uploaded_files = uploaded_files or []
58
 
59
+ for f in uploaded_files:
60
  src = Path(f)
61
  if not src.exists():
62
  continue
 
64
  # ZIP case
65
  if src.suffix.lower() == ".zip":
66
  with zipfile.ZipFile(src, "r") as z:
67
+ z.extractall(UPLOAD_DIR)
68
+ staged.extend([str(p.resolve()) for p in sorted(UPLOAD_DIR.rglob("*.pdf"))])
 
 
69
  continue
70
 
71
  # PDF case
72
  if src.suffix.lower() == ".pdf":
73
+ dst = UPLOAD_DIR / src.name
74
  shutil.copy2(src, dst)
75
  staged.append(str(dst.resolve()))
76
 
77
+ # Deduplicate while preserving order
78
+ seen = set()
79
+ unique: List[str] = []
80
+ for p in staged:
81
+ if p not in seen:
82
+ seen.add(p)
83
+ unique.append(p)
84
 
85
+ return unique
86
 
 
 
 
 
87
 
88
+ def _zip_dir(src_dir: Path, zip_path: Path) -> None:
89
+ if zip_path.exists():
90
+ zip_path.unlink()
 
91
 
92
+ with zipfile.ZipFile(zip_path, "w", compression=zipfile.ZIP_DEFLATED) as z:
93
+ for p in sorted(src_dir.rglob("*")):
94
+ if p.is_file():
95
+ z.write(p, arcname=str(p.relative_to(src_dir)))
96
 
97
+
98
+ # ---- Gradio handler ----
99
+
100
+ def process(uploaded_files: Optional[List[str]], config_text: str) -> Tuple[str, Optional[str]]:
101
+ """
102
+ Returns:
103
+ (status_text, results_zip_path_or_none)
104
+ """
105
+ _clean_tmp()
106
+
107
+ cfg = _parse_config(config_text)
108
+ pdf_paths = _stage_inputs(uploaded_files)
109
+
110
+ if not pdf_paths:
111
+ return ("No PDFs found. Upload one or more PDFs, or a ZIP containing PDFs.", None)
112
+
113
+ # Run pipeline
114
+ try:
115
+ # Your pipeline should write outputs somewhere; we give it OUTPUT_DIR if supported.
116
+ # If your run_pipeline signature differs, adjust here ONLY.
117
+ run_pipeline(
118
+ input_files=pdf_paths,
119
+ config=cfg,
120
+ output_dir=str(OUTPUT_DIR),
121
+ )
122
+ except TypeError:
123
+ # Fallback if your pipeline has a different signature (common in early iterations).
124
+ # Try minimal call patterns.
125
  try:
126
+ run_pipeline(pdf_paths, cfg, str(OUTPUT_DIR))
127
+ except Exception as e:
128
+ return (f"Pipeline failed: {type(e).__name__}: {e}", None)
129
+ except Exception as e:
130
+ return (f"Pipeline failed: {type(e).__name__}: {e}", None)
131
+
132
+ # Zip results
133
+ try:
134
+ _zip_dir(OUTPUT_DIR, RESULTS_ZIP)
135
+ except Exception as e:
136
+ return (f"Failed to package results: {type(e).__name__}: {e}", None)
137
+
138
+ return (f"Processed {len(pdf_paths)} file(s). Results are ready.", str(RESULTS_ZIP))
139
+
140
+
141
+ # ---- UI ----
142
+
143
+ def build_ui() -> gr.Blocks:
144
+ with gr.Blocks(title="Resume Evaluator") as demo:
145
+ gr.Markdown("# Resume Evaluator")
146
+
147
+ with gr.Group():
148
+ files = gr.File(
149
+ label="Upload PDF(s) or a ZIP",
150
+ file_count="multiple",
151
+ type="filepath",
152
+ )
153
+
154
+ with gr.Group():
155
+ config = gr.Code(
156
+ label="Config JSON (optional)",
157
+ language="json",
158
+ value=json.dumps(DEFAULT_CONFIG, indent=2),
159
+ )
160
 
161
+ with gr.Row():
162
+ btn = gr.Button("Process", variant="primary")
163
 
164
+ status = gr.Textbox(label="Status", interactive=False)
165
 
166
+ results = gr.File(label="Download Results ZIP", interactive=False)
 
167
 
168
+ btn.click(
169
+ fn=process,
170
+ inputs=[files, config],
171
+ outputs=[status, results],
172
+ )
173
 
174
+ return demo
 
 
 
 
175
 
 
 
176
 
177
+ demo = build_ui()
178
 
179
  if __name__ == "__main__":
180
+ # Keep it explicit for HF Spaces
181
+ demo.launch(server_name="0.0.0.0", server_port=7860)