Spaces:

srinikesh1432
/

AI_Resume_Analyzer_and_Job_Prediction

Sleeping

App Files Files Community

srinikesh1432 commited on Oct 20, 2025

Commit

b2cba08

verified ·

1 Parent(s): f0afd6e

Update app.py

Browse files

Files changed (1) hide show

app.py +163 -62

app.py CHANGED Viewed

@@ -1,62 +1,163 @@
-import gradio as gr
-import os
-from pathlib import Path
-import numpy as np
-import pandas as pd
-from sklearn.feature_extraction.text import TfidfVectorizer
-from sklearn.metrics.pairwise import cosine_similarity
-import PyPDF2, docx
-# ----------- CONFIG ------------
-RESUME_DIR = Path("resumes")
-MAX_FEATURES = 20000
-# -------------------------------
-def extract_text(path):
-    if path.suffix.lower() == ".pdf":
-        pdf = PyPDF2.PdfReader(open(path, "rb"))
-        return "\n".join([p.extract_text() or "" for p in pdf.pages])
-    elif path.suffix.lower() in [".docx", ".doc"]:
-        d = docx.Document(path)
-        return "\n".join([p.text for p in d.paragraphs])
-    else:
-        return path.read_text(encoding="utf-8", errors="ignore")
-def load_resumes():
-    texts, names = [], []
-    for p in RESUME_DIR.glob("**/*"):
-        if p.suffix.lower() in [".pdf", ".docx", ".doc", ".txt"]:
-            try:
-                txt = extract_text(p)
-                texts.append(" ".join(txt.lower().split()))
-                names.append(p.name)
-            except Exception:
-                pass
-    return names, texts
-filenames, texts = load_resumes()
-vectorizer = TfidfVectorizer(stop_words="english", max_features=MAX_FEATURES)
-X = vectorizer.fit_transform(texts)
-def match_resume(job_description):
-    jd = " ".join(job_description.lower().split())
-    jd_vec = vectorizer.transform([jd])
-    sims = cosine_similarity(jd_vec, X).flatten()
-    mean, std = sims.mean(), sims.std() or 1e-6
-    conf = 1 / (1 + np.exp(-((sims - mean) / std)))  # sigmoid confidence
-    df = pd.DataFrame({
-        "Resume": filenames,
-        "Match (%)": (sims * 100).round(2),
-        "Confidence (%)": (conf * 100).round(2)
-    }).sort_values("Match (%)", ascending=False).head(10)
-    return df
-iface = gr.Interface(
-    fn=match_resume,
-    inputs=gr.Textbox(lines=6, label="Paste Job Description"),
-    outputs=gr.Dataframe(label="Top Matching Resumes"),
-    title="AI Resume Analyzer + Job Matcher",
-    description="Upload your resume dataset and match against a job description using NLP (TF-IDF + Confidence Scoring)."
-)
-iface.launch()

+import os
+import glob
+import gradio as gr
+import numpy as np
+import pandas as pd
+from sklearn.metrics.pairwise import cosine_similarity
+from sentence_transformers import SentenceTransformer
+from PyPDF2 import PdfReader
+import docx
+import re
+from collections import Counter
+# -------------------------
+# CONFIG
+# -------------------------
+DATASET_FOLDER = "resumes"  # Folder with 288 resumes
+TOP_K = 3                   # Top 3 recommendations
+# -------------------------
+# HELPER FUNCTIONS
+# -------------------------
+def extract_text_from_pdf(file):
+    text = ""
+    try:
+        reader = PdfReader(file)
+        for page in reader.pages:
+            page_text = page.extract_text()
+            if page_text:
+                text += page_text + " "
+    except:
+        pass
+    return text
+def extract_text_from_docx(file):
+    text = ""
+    try:
+        doc = docx.Document(file)
+        text = " ".join([para.text for para in doc.paragraphs])
+    except:
+        pass
+    return text
+def extract_text(file):
+    ext = file.name.split('.')[-1].lower() if hasattr(file, "name") else "txt"
+    if ext == "pdf":
+        return extract_text_from_pdf(file)
+    elif ext == "docx":
+        return extract_text_from_docx(file)
+    elif ext == "txt":
+        try:
+            file.seek(0)
+            return file.read().decode("utf-8")
+        except:
+            return ""
+    else:
+        return ""
+def load_resume_dataset(folder_path):
+    resumes = []
+    names = []
+    paths = glob.glob(os.path.join(folder_path, "*"))
+    for path in paths:
+        text = ""
+        ext = path.split('.')[-1].lower()
+        try:
+            if ext == "pdf":
+                text = extract_text_from_pdf(path)
+            elif ext == "docx":
+                text = extract_text_from_docx(path)
+            elif ext == "txt":
+                with open(path, 'r', encoding='utf-8', errors='ignore') as f:
+                    text = f.read()
+        except:
+            continue
+        if text.strip():
+            resumes.append(text)
+            names.append(os.path.basename(path))
+    return names, resumes
+# -------------------------
+# DYNAMIC JOB ROLE EXTRACTION
+# -------------------------
+def infer_job_from_text(text):
+    """
+    Extract probable job/role from resume text.
+    Uses heuristics: first lines, capitalized phrases, or frequent nouns.
+    """
+    lines = text.split("\n")
+    # Try first 5 lines for capitalized phrases
+    candidate_lines = lines[:5]
+    pattern = re.compile(r'\b[A-Z][a-zA-Z &/-]{2,}\b')
+    roles = []
+    for line in candidate_lines:
+        matches = pattern.findall(line)
+        roles.extend(matches)
+    # Fallback: top 1 frequent capitalized word
+    if roles:
+        most_common = Counter(roles).most_common(1)
+        return most_common[0][0]
+    else:
+        # fallback: "Other"
+        return "Other"
+# -------------------------
+# LOAD MODEL & DATASET
+# -------------------------
+st_model = SentenceTransformer('all-MiniLM-L6-v2')
+resume_names, resume_texts = load_resume_dataset(DATASET_FOLDER)
+resume_embeddings = st_model.encode(resume_texts, convert_to_numpy=True)
+# Build dynamic job roles dict
+resume_roles = {name: infer_job_from_text(text) for name, text in zip(resume_names, resume_texts)}
+# -------------------------
+# MATCH FUNCTION
+# -------------------------
+def match_resume(file):
+    input_text = extract_text(file)
+    if not input_text.strip():
+        return pd.DataFrame([{"Error": "Could not extract text from this resume."}])
+    input_emb = st_model.encode([input_text], convert_to_numpy=True)
+    sims = cosine_similarity(input_emb, resume_embeddings)[0]
+    top_indices = sims.argsort()[-TOP_K:][::-1]
+    results = []
+    for idx in top_indices:
+        matched_resume_name = resume_names[idx]
+        similarity_score = sims[idx]
+        recommended_job = resume_roles[matched_resume_name]
+        results.append({
+            "Matched Resume": matched_resume_name,
+            "Recommended Job": recommended_job,
+            "Confidence Score": f"{similarity_score*100:.2f}%"
+        })
+    return pd.DataFrame(results)
+# -------------------------
+# GRADIO UI
+# -------------------------
+css = """
+body {background-color: #f7f9fc;}
+h1 {color: #333; text-align: center;}
+.gr-button {background-color: #4CAF50; color: white;}
+"""
+title = "<h1>AI Resume Analyzer & Job Matcher</h1>"
+iface = gr.Interface(
+    fn=match_resume,
+    inputs=gr.File(label="Upload Your Resume (PDF, DOCX, TXT)"),
+    outputs=gr.Dataframe(label="Top Job Matches"),
+    title="AI Resume Analyzer & Job Matcher",
+    description="Upload a resume to get top 3 job recommendations with confidence scores.",
+    css=css,
+)
+if __name__ == "__main__":
+    iface.launch()