Spaces:

mkshari
/

srcdaksh

Sleeping

App Files Files Community

mkshari commited on 9 days ago

Commit

89e2ebc

verified ·

1 Parent(s): 2b73f22

Update app.py

Browse files

Files changed (1) hide show

app.py +115 -75

app.py CHANGED Viewed

@@ -7,32 +7,35 @@ import pandas as pd
 import re
 # Load models
 try:
     nlp = spacy.load("en_core_web_sm")
-except (ValueError, OSError):
-    import os
-    os.system("python -m spacy download en_core_web_sm")
-    nlp = spacy.load("en_core_web_sm")
 except Exception as e:
-    # Fallback for some HF environments where direct model loading is needed
     try:
         import en_core_web_sm
         nlp = en_core_web_sm.load()
     except:
-        print(f"Error loading spaCy model: {e}")
-        nlp = None
 model = SentenceTransformer('all-MiniLM-L6-v2')
-# Common Skill Dictionary (Simplified for the demo)
 SKILLS_DB = [
     "python", "javascript", "react", "fastapi", "aws", "docker", "kubernetes", "sql",
     "git", "machine learning", "nlp", "tensorflow", "pytorch", "java", "c++", "golang",
     "postgresql", "mongodb", "redis", "cloud computing", "devops", "rest api", "graphql",
-    "scikit-learn", "pandas", "numpy", "django", "flask", "typescript", "angular", "vue"
 ]
 ROADMAP_DB = {
     "python": "Master Python: [Real Python](https://realpython.com/) | [Programming with Mosh](https://www.youtube.com/user/programmingwithmosh)",
     "react": "Build UI with React: [Official Docs](https://react.dev/) | [FreeCodeCamp React Course](https://www.freecodecamp.org/news/free-react-course-2024/)",
     "aws": "Cloud Mastery: [AWS Skill Builder](https://explore.skillbuilder.aws/) | [Cloud Guru](https://www.pluralsight.com/cloud-computing/aws)",
@@ -46,74 +49,101 @@ ROADMAP_DB = {
     "javascript": "JS Deep Dive: [MDN Web Docs](https://developer.mozilla.org/en-US/docs/Web/JavaScript) | [JavaScript.info](https://javascript.info/)",
     "typescript": "Strict Typing: [TypeScript Handbook](https://www.typescriptlang.org/docs/handbook/intro.html)",
     "postgresql": "Advanced Data: [Postgres Tutorial](https://www.postgresqltutorial.com/)",
-    "rest api": "API Design: [RESTful API Guide](https://restfulapi.net/)"
 }
 def extract_text_from_pdf(pdf_file):
-    with pdfplumber.open(pdf_file) as pdf:
-        text = ""
-        for page in pdf.pages:
-            text += page.extract_text() or ""
-    return text
 def extract_text_from_docx(docx_file):
-    doc = Document(docx_file)
-    text = ""
-    for para in doc.paragraphs:
-        text += para.text + "\n"
-    return text
 def clean_text(text):
-    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
     return text.lower().strip()
 def get_skills(text):
-    text = clean_text(text)
     found_skills = set()
     for skill in SKILLS_DB:
-        if re.search(r'\b' + re.escape(skill) + r'\b', text):
             found_skills.add(skill)
     return found_skills
 def analyze_resume(resume_file, jd_text):
     if resume_file is None or not jd_text.strip():
-        return "Please upload a resume and provide a job description.", "", "", 0, None
     # Step 1: Extract text
-    if resume_file.name.endswith('.pdf'):
-        resume_text = extract_text_from_pdf(resume_file)
-    elif resume_file.name.endswith('.docx'):
-        resume_text = extract_text_from_docx(resume_file)
     else:
-        return "Unsupported file format. Please upload PDF or DOCX.", "", "", 0, None
-    # Step 2: NLP Analysis (Skills)
     resume_skills = get_skills(resume_text)
     jd_skills = get_skills(jd_text)
     present_skills = list(resume_skills.intersection(jd_skills))
     missing_skills = list(jd_skills - resume_skills)
-    # Step 3: Similarity Score (Sentence Transformers)
     embeddings1 = model.encode(resume_text, convert_to_tensor=True)
     embeddings2 = model.encode(jd_text, convert_to_tensor=True)
     cosine_score = util.pytorch_cos_sim(embeddings1, embeddings2)
     match_percentage = round(cosine_score.item() * 100, 2)
-    # Format output
-    present_str = ", ".join([s.capitalize() for s in present_skills]) if present_skills else "None found."
-    missing_str = ", ".join([s.capitalize() for s in missing_skills]) if missing_skills else "None! You are a great match."
     return f"{match_percentage}%", present_str, missing_str, match_percentage, missing_skills
 def get_roadmap(missing_skills):
     if not missing_skills:
-        return "🎉 Great job! You have all the key skills mentioned. Keep up explicitly highlighting them in your experience section."
     roadmap_items = []
     for skill in missing_skills:
-        resource = ROADMAP_DB.get(skill.lower(), f"Search for {skill} tutorials on YouTube or Coursera.")
-        roadmap_items.append(f"### {skill.capitalize()}\n{resource}")
     return "\n\n".join(roadmap_items)
@@ -122,47 +152,56 @@ custom_css = """
 #logo-img {
     margin: auto;
     display: block;
 }
 .gradio-container {
-    background-color: #f8f9fa;
 }
 .main-header {
     text-align: center;
-    color: #003366; /* Navy Blue from Logo */
-    margin-bottom: 20px;
 }
 .sub-header {
     text-align: center;
-    color: #b8860b; /* Gold from Logo */
-    font-weight: bold;
 }
 .sastra-text {
     text-align: center;
-    font-size: 0.9em;
-    color: #555;
-    letter-spacing: 1px;
 }
 #analyze-btn {
-    background: linear-gradient(90deg, #003366 0%, #004080 100%) !important;
     color: white !important;
-    border: none;
-    border-radius: 8px;
-    padding: 10px 20px;
-    font-weight: bold;
 }
 #roadmap-btn {
-    background: linear-gradient(90deg, #b8860b 0%, #daa520 100%) !important;
     color: white !important;
-    border: none;
 }
 """
-# Gradio Interface
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo"), css=custom_css) as demo:
-    with gr.Row(variant="compact"):
         with gr.Column(scale=1):
-            gr.Image("logo.png", show_label=False, height=120, container=False, elem_id="logo-img")
-        with gr.Column(scale=4):
             gr.Markdown("# SETHU AI", elem_classes=["main-header"])
             gr.Markdown("### From Resume to Career Readiness", elem_classes=["sub-header"])
             gr.Markdown("SASTRA DEEMED UNIVERSITY", elem_classes=["sastra-text"])
@@ -170,27 +209,26 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo"), css=custom_css) as de
     gr.Markdown("---")
     with gr.Row():
-        with gr.Column():
-            gr.Markdown("### 📄 Input Details")
-            resume_input = gr.File(label="Upload Resume (PDF or DOCX)", file_types=[".pdf", ".docx"])
-            jd_input = gr.Textbox(label="Job Description", placeholder="Paste the job requirements here...", lines=8)
-            analyze_btn = gr.Button("Analyze Resume", variant="primary", elem_id="analyze-btn")
-        with gr.Column():
-            gr.Markdown("### 📊 Analysis Dashboard")
-            match_score_output = gr.Label(label="Match Percentage")
             with gr.Tabs():
-                with gr.TabItem("Skills Found"):
-                    present_skills_output = gr.Textbox(label="Available in Resume", interactive=False)
-                with gr.TabItem("Gap Analysis"):
-                    missing_skills_output = gr.Textbox(label="Skills to Acquire", interactive=False)
             gr.Markdown("---")
-            roadmap_btn = gr.Button("Get Guidance & Roadmap", interactive=True, elem_id="roadmap-btn")
-            roadmap_output = gr.Markdown(visible=False)
-    # State for hidden analysis results
     missing_skills_state = gr.State([])
     def on_analyze(resume, jd):
@@ -199,19 +237,18 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo"), css=custom_css) as de
             match_score_output: score_str,
             present_skills_output: present,
             missing_skills_output: missing,
-            roadmap_btn: gr.update(interactive=True),
             missing_skills_state: missing_list,
             roadmap_output: gr.update(visible=False)
         }
     def on_roadmap(missing_list):
-        roadmap_content = get_roadmap(missing_list)
-        return gr.update(value=roadmap_content, visible=True)
     analyze_btn.click(
         on_analyze,
         inputs=[resume_input, jd_input],
-        outputs=[match_score_output, present_skills_output, missing_skills_output, roadmap_btn, missing_skills_state, roadmap_output]
     )
     roadmap_btn.click(
@@ -222,3 +259,6 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo"), css=custom_css) as de
 if __name__ == "__main__":
     demo.launch()

 import re
 # Load models
+print("Loading models...")
 try:
     nlp = spacy.load("en_core_web_sm")
+    print("spaCy model loaded successfully.")
 except Exception as e:
+    print(f"spaCy load error: {e}. Trying direct import...")
     try:
         import en_core_web_sm
         nlp = en_core_web_sm.load()
     except:
+        print("Model not found. Using fallback keyword matching only.")
+        nlp = None
 model = SentenceTransformer('all-MiniLM-L6-v2')
+print("Sentence Transformer loaded.")
+# Common Skill Dictionary (Expanded)
 SKILLS_DB = [
     "python", "javascript", "react", "fastapi", "aws", "docker", "kubernetes", "sql",
     "git", "machine learning", "nlp", "tensorflow", "pytorch", "java", "c++", "golang",
     "postgresql", "mongodb", "redis", "cloud computing", "devops", "rest api", "graphql",
+    "scikit-learn", "pandas", "numpy", "django", "flask", "typescript", "angular", "vue",
+    "html", "css", "node.js", "express", "azure", "gcp", "linux", "bash", "jenkins",
+    "terraform", "ansible", "prompt engineering", "openai", "llm", "bert", "transformer",
+    "tableau", "powerbi", "excel", "dynamic programming", "data structures", "algorithms"
 ]
 ROADMAP_DB = {
+    # (previous content kept, adding more)
     "python": "Master Python: [Real Python](https://realpython.com/) | [Programming with Mosh](https://www.youtube.com/user/programmingwithmosh)",
     "react": "Build UI with React: [Official Docs](https://react.dev/) | [FreeCodeCamp React Course](https://www.freecodecamp.org/news/free-react-course-2024/)",
     "aws": "Cloud Mastery: [AWS Skill Builder](https://explore.skillbuilder.aws/) | [Cloud Guru](https://www.pluralsight.com/cloud-computing/aws)",
     "javascript": "JS Deep Dive: [MDN Web Docs](https://developer.mozilla.org/en-US/docs/Web/JavaScript) | [JavaScript.info](https://javascript.info/)",
     "typescript": "Strict Typing: [TypeScript Handbook](https://www.typescriptlang.org/docs/handbook/intro.html)",
     "postgresql": "Advanced Data: [Postgres Tutorial](https://www.postgresqltutorial.com/)",
+    "rest api": "API Design: [RESTful API Guide](https://restfulapi.net/)",
+    "html": "Web Basics: [W3Schools HTML](https://www.w3schools.com/html/)",
+    "css": "Styling: [CSS-Tricks](https://css-tricks.com/)",
+    "node.js": "Backend JS: [Node.js Guide](https://nodejs.dev/en/learn/)"
 }
 def extract_text_from_pdf(pdf_file):
+    try:
+        with pdfplumber.open(pdf_file) as pdf:
+            text = ""
+            for page in pdf.pages:
+                text += page.extract_text() or ""
+        return text
+    except Exception as e:
+        print(f"PDF Extraction Error: {e}")
+        return ""
 def extract_text_from_docx(docx_file):
+    try:
+        doc = Document(docx_file)
+        text = ""
+        for para in doc.paragraphs:
+            text += para.text + "\n"
+        return text
+    except Exception as e:
+        print(f"DOCX Extraction Error: {e}")
+        return ""
 def clean_text(text):
+    text = re.sub(r'[^a-zA-Z0-9\s#\.\+]', ' ', text) # Preserve # for C#, . for Node.js, + for C++
     return text.lower().strip()
 def get_skills(text):
+    clean_t = clean_text(text)
     found_skills = set()
     for skill in SKILLS_DB:
+        # Improved regex to handle skills with dots or pluses
+        pattern = r'\b' + re.escape(skill) + r'\b'
+        if re.search(pattern, clean_t):
             found_skills.add(skill)
     return found_skills
 def analyze_resume(resume_file, jd_text):
+    print("Analysis started...")
     if resume_file is None or not jd_text.strip():
+        return "Please upload a resume and provide a job description.", "", "", 0, []
     # Step 1: Extract text
+    resume_path = resume_file.name
+    print(f"Extracting text from: {resume_path}")
+    if resume_path.lower().endswith('.pdf'):
+        resume_text = extract_text_from_pdf(resume_path)
+    elif resume_path.lower().endswith('.docx'):
+        resume_text = extract_text_from_docx(resume_path)
     else:
+        return "Unsupported file format. Please upload PDF or DOCX.", "", "", 0, []
+    if not resume_text.strip():
+        return "Could not extract text from the file. Please check the file content.", "", "", 0, []
+    # Step 2: Skill Extraction
     resume_skills = get_skills(resume_text)
     jd_skills = get_skills(jd_text)
+    print(f"Resume Skills: {resume_skills}")
+    print(f"JD Skills: {jd_skills}")
     present_skills = list(resume_skills.intersection(jd_skills))
     missing_skills = list(jd_skills - resume_skills)
+    # Step 3: Similarity Score
     embeddings1 = model.encode(resume_text, convert_to_tensor=True)
     embeddings2 = model.encode(jd_text, convert_to_tensor=True)
     cosine_score = util.pytorch_cos_sim(embeddings1, embeddings2)
     match_percentage = round(cosine_score.item() * 100, 2)
+    # Adjust score if no skills overlap but similarity is high
+    if not present_skills and match_percentage > 50:
+        match_percentage -= 20 # Penalize for lack of keyword match
+    present_str = ", ".join([s.capitalize() for s in present_skills]) if present_skills else "No matching skills found."
+    missing_str = ", ".join([s.capitalize() for s in missing_skills]) if missing_skills else "All JD skills found in resume!"
+    print(f"Match: {match_percentage}%")
     return f"{match_percentage}%", present_str, missing_str, match_percentage, missing_skills
 def get_roadmap(missing_skills):
     if not missing_skills:
+        return "### 🎉 Perfect Match!\nYou already possess all the key skills mentioned in the job description. Tip: ensure you've highlighted these clearly in your experience sections."
     roadmap_items = []
     for skill in missing_skills:
+        resource = ROADMAP_DB.get(skill.lower(), f"Suggested resource for {skill}: Check out specialized courses on Coursera, Udemy, or YouTube.")
+        roadmap_items.append(f"#### 📖 {skill.capitalize()}\n{resource}")
     return "\n\n".join(roadmap_items)
 #logo-img {
     margin: auto;
     display: block;
+    max-width: 150px;
 }
 .gradio-container {
+    background-color: #f0f2f5;
+    font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
 }
 .main-header {
     text-align: center;
+    color: #003366;
+    margin-bottom: 5px;
 }
 .sub-header {
     text-align: center;
+    color: #b8860b;
+    margin-top: 0;
+    font-style: italic;
 }
 .sastra-text {
     text-align: center;
+    font-size: 1em;
+    color: #444;
+    font-weight: bold;
 }
 #analyze-btn {
+    background: linear-gradient(135deg, #003366 0%, #00509d 100%) !important;
+    border: none !important;
     color: white !important;
+    height: 50px;
+    font-size: 1.1em;
 }
 #roadmap-btn {
+    background: linear-gradient(135deg, #b8860b 0%, #daa520 100%) !important;
+    border: none !important;
     color: white !important;
+    height: 45px;
+}
+.output-label {
+    text-align: center;
+    font-size: 2em;
 }
 """
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo"), css=custom_css) as demo:
+    with gr.Row():
         with gr.Column(scale=1):
+            try:
+                gr.Image("logo.png", show_label=False, height=130, container=False, elem_id="logo-img")
+            except:
+                gr.Markdown("### [LOGO MISSING]")
+        with gr.Column(scale=3):
             gr.Markdown("# SETHU AI", elem_classes=["main-header"])
             gr.Markdown("### From Resume to Career Readiness", elem_classes=["sub-header"])
             gr.Markdown("SASTRA DEEMED UNIVERSITY", elem_classes=["sastra-text"])
     gr.Markdown("---")
     with gr.Row():
+        with gr.Column(scale=1):
+            gr.Markdown("### 📥 Step 1: Upload & Paste")
+            resume_input = gr.File(label="Upload Resume (PDF/DOCX)", file_types=[".pdf", ".docx"], type="filepath")
+            jd_input = gr.Textbox(label="Job Description", placeholder="Paste the job requirements here...", lines=10)
+            analyze_btn = gr.Button("Analyze Match", variant="primary", elem_id="analyze-btn")
+        with gr.Column(scale=1):
+            gr.Markdown("### 📊 Step 2: Evaluation")
+            match_score_output = gr.Label(label="Match Quality", elem_classes=["output-label"])
             with gr.Tabs():
+                with gr.TabItem("✅ Skills Matched"):
+                    present_skills_output = gr.Textbox(label="", interactive=False, lines=4)
+                with gr.TabItem("❌ Missing Skills"):
+                    missing_skills_output = gr.Textbox(label="", interactive=False, lines=4)
             gr.Markdown("---")
+            roadmap_btn = gr.Button("🚀 Generate Learning Roadmap", interactive=True, elem_id="roadmap-btn")
+    roadmap_output = gr.Markdown(visible=False)
     missing_skills_state = gr.State([])
     def on_analyze(resume, jd):
             match_score_output: score_str,
             present_skills_output: present,
             missing_skills_output: missing,
             missing_skills_state: missing_list,
             roadmap_output: gr.update(visible=False)
         }
     def on_roadmap(missing_list):
+        content = get_roadmap(missing_list)
+        return gr.update(value=content, visible=True)
     analyze_btn.click(
         on_analyze,
         inputs=[resume_input, jd_input],
+        outputs=[match_score_output, present_skills_output, missing_skills_output, missing_skills_state, roadmap_output]
     )
     roadmap_btn.click(
 if __name__ == "__main__":
     demo.launch()
+if __name__ == "__main__":
+    demo.launch()