Update app.py
Browse files
app.py
CHANGED
|
@@ -10,15 +10,15 @@ model = SentenceTransformer("all-MiniLM-L6-v2")
|
|
| 10 |
|
| 11 |
|
| 12 |
# ---- Text extraction ----
|
| 13 |
-
def extract_text(
|
| 14 |
if filename.endswith(".pdf"):
|
| 15 |
text = ""
|
| 16 |
-
with fitz.open(
|
| 17 |
for page in doc:
|
| 18 |
text += page.get_text("text") + "\n"
|
| 19 |
return text
|
| 20 |
elif filename.endswith(".docx"):
|
| 21 |
-
docf = docx.Document(
|
| 22 |
return "\n".join(p.text for p in docf.paragraphs)
|
| 23 |
return ""
|
| 24 |
|
|
@@ -38,12 +38,11 @@ def rank_cvs(job_description, files):
|
|
| 38 |
job_emb = get_embedding(job_description)
|
| 39 |
scores, names = [], []
|
| 40 |
|
| 41 |
-
for
|
| 42 |
-
#
|
| 43 |
-
filename =
|
| 44 |
-
file_bytes = f.read()
|
| 45 |
|
| 46 |
-
text = extract_text(
|
| 47 |
if not text.strip():
|
| 48 |
continue
|
| 49 |
cv_emb = get_embedding(text[:4000]) # limit text length
|
|
@@ -64,7 +63,7 @@ demo = gr.Interface(
|
|
| 64 |
fn=rank_cvs,
|
| 65 |
inputs=[
|
| 66 |
gr.Textbox(label="πΌ Job Description", lines=5),
|
| 67 |
-
gr.File(label="π Upload CVs (PDF/DOCX)", file_count="multiple", type="
|
| 68 |
],
|
| 69 |
outputs=gr.Markdown(),
|
| 70 |
title="π AI CV Ranker (Local Model)",
|
|
|
|
| 10 |
|
| 11 |
|
| 12 |
# ---- Text extraction ----
|
| 13 |
+
def extract_text(file_path, filename):
|
| 14 |
if filename.endswith(".pdf"):
|
| 15 |
text = ""
|
| 16 |
+
with fitz.open(file_path) as doc:
|
| 17 |
for page in doc:
|
| 18 |
text += page.get_text("text") + "\n"
|
| 19 |
return text
|
| 20 |
elif filename.endswith(".docx"):
|
| 21 |
+
docf = docx.Document(file_path)
|
| 22 |
return "\n".join(p.text for p in docf.paragraphs)
|
| 23 |
return ""
|
| 24 |
|
|
|
|
| 38 |
job_emb = get_embedding(job_description)
|
| 39 |
scores, names = [], []
|
| 40 |
|
| 41 |
+
for file_path in files:
|
| 42 |
+
# Get filename from file path
|
| 43 |
+
filename = os.path.basename(file_path)
|
|
|
|
| 44 |
|
| 45 |
+
text = extract_text(file_path, filename)
|
| 46 |
if not text.strip():
|
| 47 |
continue
|
| 48 |
cv_emb = get_embedding(text[:4000]) # limit text length
|
|
|
|
| 63 |
fn=rank_cvs,
|
| 64 |
inputs=[
|
| 65 |
gr.Textbox(label="πΌ Job Description", lines=5),
|
| 66 |
+
gr.File(label="π Upload CVs (PDF/DOCX)", file_count="multiple", type="filepath"),
|
| 67 |
],
|
| 68 |
outputs=gr.Markdown(),
|
| 69 |
title="π AI CV Ranker (Local Model)",
|