Avinashnalla7 commited on
Commit
7eed4ab
·
verified ·
1 Parent(s): 9ac22ba

Update app.py

Browse files

Key fix: Gradio File type must be filepath, not file.
Also: support ZIP upload OR multiple PDFs.

Files changed (1) hide show
  1. app.py +64 -46
app.py CHANGED
@@ -1,8 +1,9 @@
1
- import os
2
  import json
 
3
  import shutil
4
  import zipfile
5
  from pathlib import Path
 
6
 
7
  import gradio as gr
8
 
@@ -12,71 +13,88 @@ from pipeline import run_pipeline
12
  DEFAULT_CONFIG = {
13
  "model": os.getenv("OPENAI_MODEL", "gpt-4o-mini"),
14
  "rewrite": False,
15
- # later: projects, thresholds, etc.
 
 
 
16
  }
17
 
18
 
19
- def _ensure_clean_dir(p: Path):
20
- if p.exists():
21
- shutil.rmtree(p)
22
- p.mkdir(parents=True, exist_ok=True)
 
 
 
 
 
 
23
 
 
24
 
25
- def process(files, config_json):
26
- # 1) parse config safely
27
- cfg = DEFAULT_CONFIG.copy()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  if config_json and config_json.strip():
29
  try:
30
- user_cfg = json.loads(config_json)
31
- if isinstance(user_cfg, dict):
32
- cfg.update(user_cfg)
33
- except Exception as e:
34
- raise gr.Error(f"Config JSON is invalid: {e}")
35
-
36
- # 2) validate upload
37
- if not files or len(files) == 0:
38
- raise gr.Error("Upload at least 1 PDF.")
39
-
40
- # 3) copy uploads into /tmp (HF safe)
41
- base = Path("/tmp/resume_eval")
42
- input_dir = base / "input_uploads"
43
- _ensure_clean_dir(input_dir)
44
-
45
- input_files = []
46
- for f in files:
47
- src = Path(f) # because we use type="filepath"
48
- if not src.exists():
49
- raise gr.Error(f"Uploaded file path missing: {src}")
50
- dst = input_dir / src.name
51
- shutil.copy2(src, dst)
52
- input_files.append(str(dst))
53
-
54
- # 4) run pipeline => must return real zip path
55
- zip_path = run_pipeline(input_files=input_files, config=cfg)
56
-
57
- # 5) hard validation: file must exist and be non-trivial
58
- z = Path(zip_path)
59
- if not z.exists():
60
- raise gr.Error(f"Pipeline returned zip path but file does not exist: {z}")
61
- if z.stat().st_size < 200:
62
- raise gr.Error(f"ZIP too small ({z.stat().st_size} bytes). Pipeline likely wrote nothing.")
63
 
64
- return str(z)
 
65
 
66
 
67
  with gr.Blocks() as demo:
68
  gr.Markdown("# Resume Evaluator")
69
 
70
  files = gr.File(
71
- label="Upload PDF(s)",
72
  file_count="multiple",
73
- type="filepath" # IMPORTANT: must be filepath or binary
74
  )
75
 
76
  config_json = gr.Textbox(
77
  label="Config JSON (optional)",
78
  value=json.dumps(DEFAULT_CONFIG, indent=2),
79
- lines=10
80
  )
81
 
82
  btn = gr.Button("Process")
 
 
1
  import json
2
+ import os
3
  import shutil
4
  import zipfile
5
  from pathlib import Path
6
+ from typing import List, Optional, Dict, Any
7
 
8
  import gradio as gr
9
 
 
13
  DEFAULT_CONFIG = {
14
  "model": os.getenv("OPENAI_MODEL", "gpt-4o-mini"),
15
  "rewrite": False,
16
+ # Later: WordPress will send projects in here
17
+ "projects": [
18
+ {"name": "STANDARD", "description": "Generic scoring"}
19
+ ],
20
  }
21
 
22
 
23
+ def _stage_inputs(uploaded_files: List[str]) -> List[str]:
24
+ """
25
+ Gradio gives us file paths (type='filepath').
26
+ We copy them into /tmp/input_uploads so pipeline has stable paths.
27
+ If ZIP is uploaded, extract PDFs.
28
+ """
29
+ input_dir = Path("/tmp/input_uploads").resolve()
30
+ if input_dir.exists():
31
+ shutil.rmtree(input_dir)
32
+ input_dir.mkdir(parents=True, exist_ok=True)
33
 
34
+ staged: List[str] = []
35
 
36
+ for f in uploaded_files or []:
37
+ src = Path(f)
38
+ if not src.exists():
39
+ continue
40
+
41
+ # ZIP case
42
+ if src.suffix.lower() == ".zip":
43
+ with zipfile.ZipFile(src, "r") as z:
44
+ z.extractall(input_dir)
45
+ # collect PDFs
46
+ for p in sorted(input_dir.rglob("*.pdf")):
47
+ staged.append(str(p.resolve()))
48
+ continue
49
+
50
+ # PDF case
51
+ if src.suffix.lower() == ".pdf":
52
+ dst = input_dir / src.name
53
+ shutil.copy2(src, dst)
54
+ staged.append(str(dst.resolve()))
55
+
56
+ return staged
57
+
58
+
59
+ def process(files, config_json: str):
60
+ # files can be None / single / list depending on gradio version; normalize.
61
+ if files is None:
62
+ return None
63
+
64
+ if isinstance(files, str):
65
+ uploaded_files = [files]
66
+ else:
67
+ uploaded_files = [f for f in files]
68
+
69
+ staged_pdfs = _stage_inputs(uploaded_files)
70
+ if not staged_pdfs:
71
+ raise gr.Error("No PDFs found. Upload PDFs or a ZIP containing PDFs.")
72
+
73
+ # config
74
+ cfg = DEFAULT_CONFIG
75
  if config_json and config_json.strip():
76
  try:
77
+ cfg = json.loads(config_json)
78
+ except Exception:
79
+ raise gr.Error("Config JSON is invalid. Fix JSON formatting.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
+ zip_path = run_pipeline(input_files=staged_pdfs, config=cfg)
82
+ return zip_path
83
 
84
 
85
  with gr.Blocks() as demo:
86
  gr.Markdown("# Resume Evaluator")
87
 
88
  files = gr.File(
89
+ label="Upload PDF(s) or a ZIP",
90
  file_count="multiple",
91
+ type="filepath", # <-- FIX
92
  )
93
 
94
  config_json = gr.Textbox(
95
  label="Config JSON (optional)",
96
  value=json.dumps(DEFAULT_CONFIG, indent=2),
97
+ lines=10,
98
  )
99
 
100
  btn = gr.Button("Process")