Avinashnalla7 commited on
Commit
eefd991
·
verified ·
1 Parent(s): de6faa2

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +53 -0
  2. pipeline.py +86 -0
  3. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ import json
4
+ import gradio as gr
5
+ from pipeline import run_pipeline
6
+
7
+ DEFAULT_CONFIG = {
8
+ "model": os.getenv("OPENAI_MODEL", "gpt-4o-mini"),
9
+ "rewrite": False
10
+ }
11
+
12
+ def process(files, config_json):
13
+ if not files:
14
+ return None
15
+
16
+ try:
17
+ config = json.loads(config_json) if config_json else DEFAULT_CONFIG
18
+ except Exception:
19
+ config = DEFAULT_CONFIG
20
+
21
+ input_paths = [f.name for f in files]
22
+
23
+ zip_path = run_pipeline(
24
+ input_files=input_paths,
25
+ config=config
26
+ )
27
+
28
+ return zip_path
29
+
30
+
31
+ with gr.Blocks() as demo:
32
+ gr.Markdown("## Resume Evaluator")
33
+
34
+ files = gr.File(
35
+ label="Upload PDF(s)",
36
+ file_count="multiple",
37
+ type="file"
38
+ )
39
+
40
+ config_json = gr.Textbox(
41
+ label="Config JSON (optional)",
42
+ value=json.dumps(DEFAULT_CONFIG, indent=2),
43
+ lines=6
44
+ )
45
+
46
+ btn = gr.Button("Process")
47
+ output = gr.File(label="Download Results ZIP")
48
+
49
+ btn.click(fn=process, inputs=[files, config_json], outputs=output)
50
+
51
+
52
+ if __name__ == "__main__":
53
+ demo.launch(server_name="0.0.0.0", server_port=7860)
pipeline.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import time
4
+ import shutil
5
+ import zipfile
6
+ from pathlib import Path
7
+ from typing import Dict, List, Any
8
+
9
+ # ---------------------------------------------------------
10
+ # IMPORTANT:
11
+ # You MUST paste/call your existing notebook functions here.
12
+ # Minimal changes:
13
+ # - your PDF->text extractor
14
+ # - your LLM evaluator
15
+ # - your export_to_drive_clean logic (renamed to local export)
16
+ # ---------------------------------------------------------
17
+
18
+ def _safe_mkdir(p: str) -> None:
19
+ Path(p).mkdir(parents=True, exist_ok=True)
20
+
21
+ def _zip_dir(folder: str, zip_path: str) -> str:
22
+ folder = str(Path(folder).resolve())
23
+ zip_path = str(Path(zip_path).resolve())
24
+ with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as z:
25
+ for p in Path(folder).rglob("*"):
26
+ if p.is_file():
27
+ z.write(str(p), arcname=str(p).replace(folder + "/", ""))
28
+ return zip_path
29
+
30
+ def run_pipeline(input_files: List[str], config: Dict[str, Any]) -> str:
31
+ """
32
+ input_files: absolute paths to PDFs
33
+ config: dict containing projects/thresholds/model/rewrite, etc.
34
+ returns: path to output zip
35
+ """
36
+ # Output dirs (HF-safe: use /tmp)
37
+ run_id = f"run_{int(time.time())}"
38
+ base_out = Path("/tmp") / run_id
39
+ input_dir = base_out / "INPUT_PDFS"
40
+ output_dir = base_out / "OUTPUT"
41
+
42
+ _safe_mkdir(str(input_dir))
43
+ _safe_mkdir(str(output_dir))
44
+
45
+ # 1) Copy inputs
46
+ pdf_paths = []
47
+ for f in input_files:
48
+ src = Path(f)
49
+ if not src.exists():
50
+ continue
51
+ if src.suffix.lower() != ".pdf":
52
+ continue
53
+ dst = input_dir / src.name
54
+ shutil.copy2(src, dst)
55
+ pdf_paths.append(str(dst))
56
+
57
+ if not pdf_paths:
58
+ raise RuntimeError("No PDFs provided.")
59
+
60
+ # 2) ---- YOUR PIPELINE HERE ----
61
+ # You must replace this placeholder with your real pipeline logic.
62
+ # The end result MUST be: evaluations = List[dict]
63
+ evaluations: List[dict] = []
64
+
65
+ # TODO: call your pdf->text + llm evaluation here
66
+ # evaluations = evaluate_pdfs(pdf_paths, config)
67
+
68
+ # Minimal placeholder to prove flow works:
69
+ for p in pdf_paths:
70
+ evaluations.append({
71
+ "filename": os.path.basename(p),
72
+ "candidate_name": os.path.splitext(os.path.basename(p))[0],
73
+ "scores": {"skill": 0, "experience": 0, "growth": 0, "context_fit": 0},
74
+ "tags": ["STANDARD"]
75
+ })
76
+
77
+ # 3) Export artifacts to output_dir
78
+ # TODO: replace with your real export logic (bucket folders, csv, master index)
79
+ with open(output_dir / "master_index.json", "w", encoding="utf-8") as f:
80
+ json.dump({"count": len(evaluations), "evaluations": evaluations}, f, indent=2)
81
+
82
+ # 4) Zip output
83
+ zip_path = str(base_out / "results.zip")
84
+ _zip_dir(str(output_dir), zip_path)
85
+
86
+ return zip_path
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio>=4.0.0
2
+ openai>=1.0.0
3
+ pypdf