Spaces:

riteshkokam
/

JobFinder

Sleeping

App Files Files Community

riteshkokam commited on Jun 22, 2025

Commit

92bb6b1

verified ·

1 Parent(s): 86190d1

Update app.py

Browse files

Files changed (1) hide show

app.py +99 -20

app.py CHANGED Viewed

@@ -11,6 +11,7 @@ from google import genai
 from google.genai.types import GenerateContentConfig, ThinkingConfig
 from datetime import datetime
 import math
 # Initialize components
 kw_extractor = yake.KeywordExtractor(n=2, top=30)
@@ -23,6 +24,22 @@ You are a job-matching assistant. Given a resume and job listings,
 rank and explain why each job is a good fit.
 """
 # 1️⃣ Extract text from resume
 def extract_text(file):
     ext = file.name.lower().split('.')[-1]
@@ -34,7 +51,7 @@ def extract_text(file):
 # 2️⃣ Extract keywords using YAKE
 def extract_keywords(text):
-    # Remove the first line (often the candidate’s name/header)
     parts = text.split("\n", 1)
     body = parts[1] if len(parts) > 1 else text
@@ -78,7 +95,56 @@ def fetch_remoteok(keywords):
         return [j for j in data if any(kw.lower() in (j.get("position","") + j.get("description","")).lower() for kw in keywords)]
     return []
-# 4️⃣ Rank jobs by semantic similarity
 def rank_jobs(resume_text, jobs):
     if not jobs:
         return []
@@ -87,7 +153,7 @@ def rank_jobs(resume_text, jobs):
     sims = cosine_similarity(emb_r, emb_j)[0]
     return sorted(zip(jobs, sims), key=lambda x: x[1], reverse=True)
-# 5️⃣ Gemini refinement (optional)
 def refine_with_ai(ranked, resume_text):
     lines = []
     for job, _ in ranked:
@@ -116,7 +182,7 @@ def format_posted(job):
         return datetime.fromtimestamp(raw).strftime("%Y-%m-%d")
     return str(raw)[:10]
-# 6️⃣ Main pipeline
 def find_jobs(file, added_kw, use_ai):
     resume = extract_text(file) or ""
     base_kws = added_kw.split(",") if added_kw.strip() else extract_keywords(resume)
@@ -126,27 +192,33 @@ def find_jobs(file, added_kw, use_ai):
     print("Rank_jobs", ranked)
     table = []
-    for job, score in ranked:
         role     = job.get("title") or job.get("position", "")
         company  = job.get("company") or job.get("company_name", "")
         location = job.get("location", "N/A")
-        # Normalize date (as we did before)
         posted   = format_posted(job)
         apply_url= job.get("url") or job.get("apply_url","") or job.get("joblink","") or ""
         # Make sure none of these are dicts/lists
         table.append({
-            "Role":     str(role),
-            "Company":  str(company),
-            "Location": str(location),
-            "Posted":   str(posted),
-            "Score":    f"{score*100:.1f}%",
-            "Apply":    str(apply_url)
         })
     explanation = refine_with_ai(ranked, resume) if use_ai else ""
     return table, explanation
-# 7️⃣ Jobs in Markdown
 def jobs_to_markdown(table, page, per_page=PER_PAGE):
     total = len(table)
     pages = max(1, math.ceil(total / per_page))
@@ -155,13 +227,15 @@ def jobs_to_markdown(table, page, per_page=PER_PAGE):
     slice_ = table[start:end]
     md  = f"**Showing jobs {start+1}–{min(end,total)} of {total} (Page {page}/{pages})**\n\n"
-    md += "| Role | Company | Location | Posted | Score | Apply |\n"
-    md += "| ---- | ------- | -------- | ------ | ----- | ----- |\n"
     for row in slice_:
         link = f"[Apply]({row['Apply']})" if row['Apply'] else ""
         md += (
-            f"| {row['Role']} | {row['Company']} | {row['Location']} "
-            f"| {row['Posted']} | {row['Score']} | {link} |\n"
         )
     return md
@@ -175,20 +249,25 @@ def load_jobs_and_pages(resume, added_kw, use_ai):
     return full_table, explanation, expl_header, slider_update, first_page_md
-# 7️⃣ Gradio UI
 with gr.Blocks(theme=gr.themes.Base()) as demo:
     gr.Markdown("## 🌍 Global Job Finder")
     with gr.Row():
         resume = gr.File(label="Upload Resume (PDF/DOCX)")
         added = gr.Textbox(label="Add keywords (comma-separated)", placeholder="e.g. Python, ML")
     resume.upload(on_resume_upload, inputs=[resume], outputs=[added])
     use_ai = gr.Checkbox(label="Use AI to refine explanation", value=False)
     find_btn = gr.Button("Find Jobs")
     jobs_state = gr.State([])           # holds full table
     page_sel   = gr.Slider(1, 1, step=1, value=1, label="Page")
-    jobs_md    = gr.Markdown()          # shows the current page’s markdown
     expl_md_h  = gr.Markdown()
     expl_md    = gr.Markdown()          # shows AI explanation
@@ -208,4 +287,4 @@ with gr.Blocks(theme=gr.themes.Base()) as demo:
     )
 if __name__ == "__main__":
-    demo.launch()

 from google.genai.types import GenerateContentConfig, ThinkingConfig
 from datetime import datetime
 import math
+import json
 # Initialize components
 kw_extractor = yake.KeywordExtractor(n=2, top=30)
 rank and explain why each job is a good fit.
 """
+ANALYSIS_PROMPT = """
+Analyze the following job posting and provide:
+1. Company Type: One of [Startup, Mid-size, Enterprise, Non-profit, Government, Consulting, Agency, Other]
+2. Job Summary: A concise 1-2 sentence summary of the role
+Job Title: {title}
+Company: {company}
+Job Description: {description}
+Please respond in JSON format:
+{
+  "company_type": "one of the categories above",
+  "job_summary": "1-2 sentence summary"
+}
+"""
 # 1️⃣ Extract text from resume
 def extract_text(file):
     ext = file.name.lower().split('.')[-1]
 # 2️⃣ Extract keywords using YAKE
 def extract_keywords(text):
+    # Remove the first line (often the candidate's name/header)
     parts = text.split("\n", 1)
     body = parts[1] if len(parts) > 1 else text
         return [j for j in data if any(kw.lower() in (j.get("position","") + j.get("description","")).lower() for kw in keywords)]
     return []
+# 4️⃣ Analyze job with AI to get company type and summary
+def analyze_job_with_ai(job):
+    """Use AI to extract company type and job summary"""
+    try:
+        title = job.get("title") or job.get("position", "")
+        company = job.get("company") or job.get("company_name", "")
+        description = job.get("description", "")
+        # Truncate description if too long (to stay within token limits)
+        if len(description) > 1000:
+            description = description[:1000] + "..."
+        prompt = ANALYSIS_PROMPT.format(
+            title=title,
+            company=company,
+            description=description
+        )
+        resp = genai_client.models.generate_content(
+            model="gemini-2.5-flash",
+            contents=prompt,
+        )
+        # Try to parse JSON response
+        try:
+            result = json.loads(resp.text)
+            return result.get("company_type", "Other"), result.get("job_summary", "No summary available")
+        except json.JSONDecodeError:
+            # Fallback: try to extract from text response
+            text = resp.text or ""
+            if "company_type" in text.lower() and "job_summary" in text.lower():
+                lines = text.split('\n')
+                company_type = "Other"
+                job_summary = "No summary available"
+                for line in lines:
+                    if "company_type" in line.lower():
+                        company_type = line.split(":")[-1].strip().strip('"')
+                    elif "job_summary" in line.lower():
+                        job_summary = line.split(":")[-1].strip().strip('"')
+                return company_type, job_summary
+            else:
+                return "Other", "No summary available"
+    except Exception as e:
+        print(f"Error analyzing job with AI: {e}")
+        return "Other", "No summary available"
+# 5️⃣ Rank jobs by semantic similarity
 def rank_jobs(resume_text, jobs):
     if not jobs:
         return []
     sims = cosine_similarity(emb_r, emb_j)[0]
     return sorted(zip(jobs, sims), key=lambda x: x[1], reverse=True)
+# 6️⃣ Gemini refinement (optional)
 def refine_with_ai(ranked, resume_text):
     lines = []
     for job, _ in ranked:
         return datetime.fromtimestamp(raw).strftime("%Y-%m-%d")
     return str(raw)[:10]
+# 7️⃣ Main pipeline
 def find_jobs(file, added_kw, use_ai):
     resume = extract_text(file) or ""
     base_kws = added_kw.split(",") if added_kw.strip() else extract_keywords(resume)
     print("Rank_jobs", ranked)
     table = []
+    for i, (job, score) in enumerate(ranked):
         role     = job.get("title") or job.get("position", "")
         company  = job.get("company") or job.get("company_name", "")
         location = job.get("location", "N/A")
         posted   = format_posted(job)
         apply_url= job.get("url") or job.get("apply_url","") or job.get("joblink","") or ""
+        # Get company type and summary using AI
+        print(f"Analyzing job {i+1}/{len(ranked)}: {role} at {company}")
+        company_type, job_summary = analyze_job_with_ai(job)
         # Make sure none of these are dicts/lists
         table.append({
+            "Role":         str(role),
+            "Company":      str(company),
+            "Company Type": str(company_type),
+            "Location":     str(location),
+            "Posted":       str(posted),
+            "Score":        f"{score*100:.1f}%",
+            "Summary":      str(job_summary),
+            "Apply":        str(apply_url)
         })
     explanation = refine_with_ai(ranked, resume) if use_ai else ""
     return table, explanation
+# 8️⃣ Jobs in Markdown
 def jobs_to_markdown(table, page, per_page=PER_PAGE):
     total = len(table)
     pages = max(1, math.ceil(total / per_page))
     slice_ = table[start:end]
     md  = f"**Showing jobs {start+1}–{min(end,total)} of {total} (Page {page}/{pages})**\n\n"
+    md += "| Role | Company | Type | Location | Posted | Score | Summary | Apply |\n"
+    md += "| ---- | ------- | ---- | -------- | ------ | ----- | ------- | ----- |\n"
     for row in slice_:
         link = f"[Apply]({row['Apply']})" if row['Apply'] else ""
+        # Truncate summary if too long for table display
+        summary = row['Summary'][:100] + "..." if len(row['Summary']) > 100 else row['Summary']
         md += (
+            f"| {row['Role']} | {row['Company']} | {row['Company Type']} | {row['Location']} "
+            f"| {row['Posted']} | {row['Score']} | {summary} | {link} |\n"
         )
     return md
     return full_table, explanation, expl_header, slider_update, first_page_md
+# 9️⃣ Gradio UI
 with gr.Blocks(theme=gr.themes.Base()) as demo:
     gr.Markdown("## 🌍 Global Job Finder")
+    gr.Markdown("*Now with AI-powered company type classification and job summaries*")
     with gr.Row():
         resume = gr.File(label="Upload Resume (PDF/DOCX)")
         added = gr.Textbox(label="Add keywords (comma-separated)", placeholder="e.g. Python, ML")
     resume.upload(on_resume_upload, inputs=[resume], outputs=[added])
     use_ai = gr.Checkbox(label="Use AI to refine explanation", value=False)
+    gr.Markdown("**Note:** AI analysis for company type and job summaries is automatically enabled and may take a few moments per job.")
     find_btn = gr.Button("Find Jobs")
     jobs_state = gr.State([])           # holds full table
     page_sel   = gr.Slider(1, 1, step=1, value=1, label="Page")
+    jobs_md    = gr.Markdown()          # shows the current page's markdown
     expl_md_h  = gr.Markdown()
     expl_md    = gr.Markdown()          # shows AI explanation
     )
 if __name__ == "__main__":
+    demo.launch()