Spaces:

DevNumb
/

chatbot

Sleeping

DevNumb commited on Nov 13, 2025

Commit

d9d5b32

verified ·

1 Parent(s): cc93d27

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -10,15 +10,15 @@ model = SentenceTransformer("all-MiniLM-L6-v2")
 # ---- Text extraction ----
-def extract_text(file):
-    if file.name.endswith(".pdf"):
         text = ""
-        with fitz.open(stream=file.read(), filetype="pdf") as doc:
             for page in doc:
                 text += page.get_text("text") + "\n"
         return text
-    elif file.name.endswith(".docx"):
-        docf = docx.Document(file)
         return "\n".join(p.text for p in docf.paragraphs)
     return ""
@@ -39,7 +39,11 @@ def rank_cvs(job_description, files):
     scores, names = [], []
     for f in files:
-        text = extract_text(f)
         if not text.strip():
             continue
         cv_emb = get_embedding(text[:4000])  # limit text length
@@ -47,7 +51,7 @@ def rank_cvs(job_description, files):
             np.linalg.norm(job_emb) * np.linalg.norm(cv_emb)
         )
         scores.append(sim)
-        names.append(f.name)
     top = sorted(zip(names, scores), key=lambda x: x[1], reverse=True)[:10]
     return "\n\n".join(
@@ -60,8 +64,7 @@ demo = gr.Interface(
     fn=rank_cvs,
     inputs=[
         gr.Textbox(label="💼 Job Description", lines=5),
-        # Option 2: Get raw bytes
-        gr.File(label="📁 Upload CVs (PDF/DOCX)", file_count="multiple", type="binary"),
     ],
     outputs=gr.Markdown(),
     title="📄 AI CV Ranker (Local Model)",

 # ---- Text extraction ----
+def extract_text(file_bytes, filename):
+    if filename.endswith(".pdf"):
         text = ""
+        with fitz.open(stream=file_bytes, filetype="pdf") as doc:
             for page in doc:
                 text += page.get_text("text") + "\n"
         return text
+    elif filename.endswith(".docx"):
+        docf = docx.Document(file_bytes)
         return "\n".join(p.text for p in docf.paragraphs)
     return ""
     scores, names = [], []
     for f in files:
+        # Extract filename and file bytes
+        filename = f.name
+        file_bytes = f.read()
+        text = extract_text(file_bytes, filename)
         if not text.strip():
             continue
         cv_emb = get_embedding(text[:4000])  # limit text length
             np.linalg.norm(job_emb) * np.linalg.norm(cv_emb)
         )
         scores.append(sim)
+        names.append(filename)
     top = sorted(zip(names, scores), key=lambda x: x[1], reverse=True)[:10]
     return "\n\n".join(
     fn=rank_cvs,
     inputs=[
         gr.Textbox(label="💼 Job Description", lines=5),
+        gr.File(label="📁 Upload CVs (PDF/DOCX)", file_count="multiple", type="file"),
     ],
     outputs=gr.Markdown(),
     title="📄 AI CV Ranker (Local Model)",