DevNumb commited on
Commit
58a5fa0
·
verified ·
1 Parent(s): eae88ec

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -0
app.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import fitz # PyMuPDF for PDFs
4
+ import docx
5
+ import request
6
+ HF_TOKEN = os.getenv("HF_TOKEN")
7
+ # Load embedding model (fast & free)
8
+ API_URL = "https://router.huggingface.co/hf-inference/models/sentence-transformers/all-MiniLM-L6-v2/pipeline/sentence-similarity"
9
+ headers = {
10
+ "Authorization": f"Bearer {os.environ['HF_TOKEN']}",
11
+ }
12
+
13
+
14
+
15
+ # ---- Text extraction ----
16
+ def extract_text(file):
17
+ if file.name.endswith(".pdf"):
18
+ text = ""
19
+ with fitz.open(stream=file.read(), filetype="pdf") as doc:
20
+ for page in doc:
21
+ text += page.get_text("text") + "\n"
22
+ return text
23
+ elif file.name.endswith(".docx"):
24
+ docf = docx.Document(file)
25
+ return "\n".join(p.text for p in docf.paragraphs)
26
+ return ""
27
+
28
+
29
+ # ---- API embedding helper ----
30
+ def get_embedding(text):
31
+ payload = {"inputs": text}
32
+ resp = requests.post(HF_API_URL, headers=HEADERS, json=payload, timeout=60)
33
+ data = resp.json()
34
+ if isinstance(data, list) and "embedding" in data[0]:
35
+ return np.array(data[0]["embedding"])
36
+ elif isinstance(data, list) and isinstance(data[0], list):
37
+ return np.array(data[0])
38
+ return np.zeros(384)
39
+
40
+
41
+ # ---- CV ranking ----
42
+ def rank_cvs(job_description, files):
43
+ if not job_description or not files:
44
+ return "⚠️ Please upload CVs and provide a job description."
45
+
46
+ job_emb = get_embedding(job_description)
47
+ scores, names = [], []
48
+
49
+ for f in files:
50
+ text = extract_text(f)
51
+ if not text.strip():
52
+ continue
53
+ cv_emb = get_embedding(text[:4000]) # limit text length
54
+ sim = np.dot(job_emb, cv_emb) / (
55
+ np.linalg.norm(job_emb) * np.linalg.norm(cv_emb)
56
+ )
57
+ scores.append(sim)
58
+ names.append(f.name)
59
+
60
+ top = sorted(zip(names, scores), key=lambda x: x[1], reverse=True)[:10]
61
+ return "\n\n".join(
62
+ [f"**{i+1}. {n}** — Similarity: `{s:.3f}`" for i, (n, s) in enumerate(top)]
63
+ )
64
+
65
+
66
+ # ---- Gradio UI ----
67
+ demo = gr.Interface(
68
+ fn=rank_cvs,
69
+ inputs=[
70
+ gr.Textbox(label="💼 Job Description", lines=5),
71
+ gr.File(label="📁 Upload CVs (PDF/DOCX)", file_count="multiple", type="file"),
72
+ ],
73
+ outputs=gr.Markdown(),
74
+ title="📄 AI CV Ranker (API-powered)",
75
+ description="Ranks uploaded CVs based on job relevance using Hugging Face API.",
76
+ )
77
+
78
+ if __name__ == "__main__":
79
+ demo.launch()
80
+