Update app.py
Browse files
app.py
CHANGED
|
@@ -10,15 +10,15 @@ model = SentenceTransformer("all-MiniLM-L6-v2")
|
|
| 10 |
|
| 11 |
|
| 12 |
# ---- Text extraction ----
|
| 13 |
-
def extract_text(
|
| 14 |
-
if
|
| 15 |
text = ""
|
| 16 |
-
with fitz.open(stream=
|
| 17 |
for page in doc:
|
| 18 |
text += page.get_text("text") + "\n"
|
| 19 |
return text
|
| 20 |
-
elif
|
| 21 |
-
docf = docx.Document(
|
| 22 |
return "\n".join(p.text for p in docf.paragraphs)
|
| 23 |
return ""
|
| 24 |
|
|
@@ -39,7 +39,11 @@ def rank_cvs(job_description, files):
|
|
| 39 |
scores, names = [], []
|
| 40 |
|
| 41 |
for f in files:
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
if not text.strip():
|
| 44 |
continue
|
| 45 |
cv_emb = get_embedding(text[:4000]) # limit text length
|
|
@@ -47,7 +51,7 @@ def rank_cvs(job_description, files):
|
|
| 47 |
np.linalg.norm(job_emb) * np.linalg.norm(cv_emb)
|
| 48 |
)
|
| 49 |
scores.append(sim)
|
| 50 |
-
names.append(
|
| 51 |
|
| 52 |
top = sorted(zip(names, scores), key=lambda x: x[1], reverse=True)[:10]
|
| 53 |
return "\n\n".join(
|
|
@@ -60,8 +64,7 @@ demo = gr.Interface(
|
|
| 60 |
fn=rank_cvs,
|
| 61 |
inputs=[
|
| 62 |
gr.Textbox(label="πΌ Job Description", lines=5),
|
| 63 |
-
|
| 64 |
-
gr.File(label="π Upload CVs (PDF/DOCX)", file_count="multiple", type="binary"),
|
| 65 |
],
|
| 66 |
outputs=gr.Markdown(),
|
| 67 |
title="π AI CV Ranker (Local Model)",
|
|
|
|
| 10 |
|
| 11 |
|
| 12 |
# ---- Text extraction ----
|
| 13 |
+
def extract_text(file_bytes, filename):
|
| 14 |
+
if filename.endswith(".pdf"):
|
| 15 |
text = ""
|
| 16 |
+
with fitz.open(stream=file_bytes, filetype="pdf") as doc:
|
| 17 |
for page in doc:
|
| 18 |
text += page.get_text("text") + "\n"
|
| 19 |
return text
|
| 20 |
+
elif filename.endswith(".docx"):
|
| 21 |
+
docf = docx.Document(file_bytes)
|
| 22 |
return "\n".join(p.text for p in docf.paragraphs)
|
| 23 |
return ""
|
| 24 |
|
|
|
|
| 39 |
scores, names = [], []
|
| 40 |
|
| 41 |
for f in files:
|
| 42 |
+
# Extract filename and file bytes
|
| 43 |
+
filename = f.name
|
| 44 |
+
file_bytes = f.read()
|
| 45 |
+
|
| 46 |
+
text = extract_text(file_bytes, filename)
|
| 47 |
if not text.strip():
|
| 48 |
continue
|
| 49 |
cv_emb = get_embedding(text[:4000]) # limit text length
|
|
|
|
| 51 |
np.linalg.norm(job_emb) * np.linalg.norm(cv_emb)
|
| 52 |
)
|
| 53 |
scores.append(sim)
|
| 54 |
+
names.append(filename)
|
| 55 |
|
| 56 |
top = sorted(zip(names, scores), key=lambda x: x[1], reverse=True)[:10]
|
| 57 |
return "\n\n".join(
|
|
|
|
| 64 |
fn=rank_cvs,
|
| 65 |
inputs=[
|
| 66 |
gr.Textbox(label="πΌ Job Description", lines=5),
|
| 67 |
+
gr.File(label="π Upload CVs (PDF/DOCX)", file_count="multiple", type="file"),
|
|
|
|
| 68 |
],
|
| 69 |
outputs=gr.Markdown(),
|
| 70 |
title="π AI CV Ranker (Local Model)",
|