Spaces:

riteshkokam
/

JobFinder

Sleeping

App Files Files Community

riteshkokam commited on Jun 22, 2025

Commit

9f93bdb

verified ·

1 Parent(s): 8b7bc54

Update app.py

Browse files

Files changed (1) hide show

app.py +75 -79

app.py CHANGED Viewed

@@ -12,11 +12,28 @@ from google.genai.types import GenerateContentConfig, ThinkingConfig
 from datetime import datetime
 import math
 import json
 # Initialize components
 kw_extractor = yake.KeywordExtractor(n=2, top=30)
 embedder = SentenceTransformer("all-MiniLM-L6-v2")
 genai_client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
 PER_PAGE = 10
 SYSTEM_PROMPT = """
@@ -24,21 +41,7 @@ You are a job-matching assistant. Given a resume and job listings,
 rank and explain why each job is a good fit.
 """
-ANALYSIS_PROMPT = """
-Analyze the following job posting and provide:
-1. Company Type: One of [Startup, Mid-size, Enterprise, Non-profit, Government, Consulting, Agency, Other]
-2. Job Summary: A concise 1-2 sentence summary of the role
-Job Title: {title}
-Company: {company}
-Job Description: {description}
-Please respond in JSON format:
-{
-  "company_type": "one of the categories above",
-  "job_summary": "1-2 sentence summary"
-}
-"""
 # 1️⃣ Extract text from resume
 def extract_text(file):
@@ -95,71 +98,62 @@ def fetch_remoteok(keywords):
         return [j for j in data if any(kw.lower() in (j.get("position","") + j.get("description","")).lower() for kw in keywords)]
     return []
-# 4️⃣ Analyze job with AI to get company type and summary
-def analyze_job_with_ai(job):
-    """Use AI to extract company type and job summary"""
     try:
-        title = job.get("title") or job.get("position", "")
-        company = job.get("company") or job.get("company_name", "")
         description = job.get("description", "")
         # Skip if no description available
-        if not description or len(description.strip()) < 10:
-            return "Other", "Limited job description available"
-        # Truncate description if too long (to stay within token limits)
-        if len(description) > 800:
-            description = description[:800] + "..."
-        prompt = f"""
-Analyze this job posting and provide:
-1. Company Type: Choose ONE from [Startup, Mid-size, Enterprise, Non-profit, Government, Consulting, Agency, Other]
-2. Job Summary: Write a concise 1-2 sentence summary of the main responsibilities
-Job Title: {title}
-Company: {company}
-Job Description: {description}
-Respond in this exact format:
-Company Type: [choose one category]
-Job Summary: [1-2 sentences describing the role]
-"""
-        resp = genai_client.models.generate_content(
-            model="gemini-2.5-flash",
-            contents=prompt,
-        )
-        response_text = resp.text or ""
-        # Extract company type and summary from response
-        company_type = "Other"
-        job_summary = "No summary available"
-        lines = response_text.split('\n')
-        for line in lines:
-            line = line.strip()
-            if line.lower().startswith('company type:'):
-                company_type = line.split(':', 1)[1].strip()
-            elif line.lower().startswith('job summary:'):
-                job_summary = line.split(':', 1)[1].strip()
-        # Validate company type
-        valid_types = ["Startup", "Mid-size", "Enterprise", "Non-profit", "Government", "Consulting", "Agency", "Other"]
-        if company_type not in valid_types:
-            company_type = "Other"
-        # Ensure job summary is not empty
-        if not job_summary or job_summary.lower() in ["no summary available", "n/a", ""]:
-            job_summary = f"Position involves {title.lower()} responsibilities at {company}"
-        return company_type, job_summary
     except Exception as e:
-        print(f"Error analyzing job with AI: {e}")
         title = job.get("title") or job.get("position", "")
-        company = job.get("company") or job.get("company_name", "")
-        return "Other", f"Role involves {title.lower()} responsibilities" if title else "Job details available upon application"
 # 5️⃣ Rank jobs by semantic similarity
 def rank_jobs(resume_text, jobs):
@@ -216,15 +210,14 @@ def find_jobs(file, added_kw, use_ai):
         posted   = format_posted(job)
         apply_url= job.get("url") or job.get("apply_url","") or job.get("joblink","") or ""
-        # Get company type and summary using AI
         print(f"Analyzing job {i+1}/{len(ranked)}: {role} at {company}")
-        company_type, job_summary = analyze_job_with_ai(job)
         # Make sure none of these are dicts/lists
         table.append({
             "Role":         str(role),
             "Company":      str(company),
-            "Company Type": str(company_type),
             "Location":     str(location),
             "Posted":       str(posted),
             "Score":        f"{score*100:.1f}%",
@@ -246,13 +239,16 @@ def jobs_to_dataframe(table, page, per_page=PER_PAGE):
     # Convert to list of lists for DataFrame
     df_data = []
     for row in slice_:
-        # Create clickable link for apply URL
-        apply_link = f'<a href="{row["Apply"]}" target="_blank">Apply</a>' if row['Apply'] else "N/A"
         df_data.append([
             row['Role'],
             row['Company'],
-            row['Company Type'],
             row['Location'],
             row['Posted'],
             row['Score'],
@@ -275,7 +271,7 @@ def load_jobs_and_pages(resume, added_kw, use_ai):
 # 9️⃣ Gradio UI
 with gr.Blocks(theme=gr.themes.Base()) as demo:
     gr.Markdown("## 🌍 Global Job Finder")
-    gr.Markdown("*Now with AI-powered company type classification and job summaries*")
     with gr.Row():
         resume = gr.File(label="Upload Resume (PDF/DOCX)")
@@ -284,7 +280,7 @@ with gr.Blocks(theme=gr.themes.Base()) as demo:
     resume.upload(on_resume_upload, inputs=[resume], outputs=[added])
     use_ai = gr.Checkbox(label="Use AI to refine explanation", value=False)
-    gr.Markdown("**Note:** AI analysis for company type and job summaries is automatically enabled and may take a few moments per job.")
     find_btn = gr.Button("Find Jobs")
@@ -294,10 +290,10 @@ with gr.Blocks(theme=gr.themes.Base()) as demo:
     # Page info display
     page_info = gr.Markdown("")
-    # DataFrame for jobs display
     jobs_df = gr.DataFrame(
-        headers=["Role", "Company", "Type", "Location", "Posted", "Score", "Summary", "Apply"],
-        datatype=["str", "str", "str", "str", "str", "str", "str", "html"],
         interactive=False,
         wrap=True,
         value=[]

 from datetime import datetime
 import math
 import json
+from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
 # Initialize components
 kw_extractor = yake.KeywordExtractor(n=2, top=30)
 embedder = SentenceTransformer("all-MiniLM-L6-v2")
 genai_client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
+# Initialize local summarization model (2025 state-of-the-art)
+print("Loading summarization model...")
+try:
+    # Using Microsoft's DialoGPT-based summarizer - fast and efficient for 2025
+    summarizer = pipeline(
+        "summarization",
+        model="facebook/bart-large-cnn",  # Fast and reliable for job descriptions
+        tokenizer="facebook/bart-large-cnn",
+        device=0 if os.system("nvidia-smi") == 0 else -1  # Use GPU if available
+    )
+    print("Summarization model loaded successfully!")
+except Exception as e:
+    print(f"Error loading model, falling back to basic summarization: {e}")
+    summarizer = None
 PER_PAGE = 10
 SYSTEM_PROMPT = """
 rank and explain why each job is a good fit.
 """
 # 1️⃣ Extract text from resume
 def extract_text(file):
         return [j for j in data if any(kw.lower() in (j.get("position","") + j.get("description","")).lower() for kw in keywords)]
     return []
+# 4️⃣ Analyze job with local Hugging Face model for fast summaries
+def get_job_summary_fast(job):
+    """Use local HF model to get job summary quickly"""
     try:
         description = job.get("description", "")
+        title = job.get("title") or job.get("position", "")
         # Skip if no description available
+        if not description or len(description.strip()) < 20:
+            return f"Position for {title}" if title else "Job details available upon application"
+        # Clean and prepare text for summarization
+        clean_desc = description.replace('\n', ' ').replace('\r', ' ').strip()
+        # Use local model if available
+        if summarizer:
+            try:
+                # Truncate to model's max length (1024 tokens for BART)
+                if len(clean_desc) > 800:
+                    clean_desc = clean_desc[:800] + "..."
+                # Generate summary with specific parameters for job descriptions
+                summary_result = summarizer(
+                    clean_desc,
+                    max_length=60,  # Keep summaries concise
+                    min_length=20,
+                    do_sample=False,
+                    truncation=True
+                )
+                summary = summary_result[0]['summary_text']
+                # Clean up the summary
+                if summary:
+                    # Remove redundant phrases and make it more natural
+                    summary = summary.replace("The job involves", "").replace("This position", "Position").strip()
+                    if not summary.endswith('.'):
+                        summary += '.'
+                    return summary
+            except Exception as e:
+                print(f"Local model error: {e}")
+        # Fallback: Extract first meaningful sentence from description
+        sentences = clean_desc.split('.')
+        for sentence in sentences[:3]:  # Check first 3 sentences
+            if len(sentence.strip()) > 30 and any(word in sentence.lower() for word in ['responsible', 'role', 'position', 'work', 'develop', 'manage', 'lead']):
+                return sentence.strip() + '.'
+        # Last fallback
+        return f"{title} role with responsibilities in {clean_desc[:50]}..." if title else "Job details available upon application"
     except Exception as e:
+        print(f"Error getting job summary: {e}")
         title = job.get("title") or job.get("position", "")
+        return f"Role involves {title.lower()} responsibilities" if title else "Job details available upon application"
 # 5️⃣ Rank jobs by semantic similarity
 def rank_jobs(resume_text, jobs):
         posted   = format_posted(job)
         apply_url= job.get("url") or job.get("apply_url","") or job.get("joblink","") or ""
+        # Get fast job summary using local model
         print(f"Analyzing job {i+1}/{len(ranked)}: {role} at {company}")
+        job_summary = get_job_summary_fast(job)
         # Make sure none of these are dicts/lists
         table.append({
             "Role":         str(role),
             "Company":      str(company),
             "Location":     str(location),
             "Posted":       str(posted),
             "Score":        f"{score*100:.1f}%",
     # Convert to list of lists for DataFrame
     df_data = []
     for row in slice_:
+        # Create properly formatted clickable link for apply URL
+        if row['Apply'] and row['Apply'] != 'N/A':
+            # Format as HTML link with proper styling
+            apply_link = f'<a href="{row["Apply"]}" target="_blank" style="color: #2563eb; text-decoration: underline;">Apply</a>'
+        else:
+            apply_link = "N/A"
         df_data.append([
             row['Role'],
             row['Company'],
             row['Location'],
             row['Posted'],
             row['Score'],
 # 9️⃣ Gradio UI
 with gr.Blocks(theme=gr.themes.Base()) as demo:
     gr.Markdown("## 🌍 Global Job Finder")
+    gr.Markdown("*Now with fast AI-powered job summaries using local models*")
     with gr.Row():
         resume = gr.File(label="Upload Resume (PDF/DOCX)")
     resume.upload(on_resume_upload, inputs=[resume], outputs=[added])
     use_ai = gr.Checkbox(label="Use AI to refine explanation", value=False)
+    gr.Markdown("**Note:** Job summaries are generated using fast local AI models for quick results.")
     find_btn = gr.Button("Find Jobs")
     # Page info display
     page_info = gr.Markdown("")
+    # DataFrame for jobs display (removed Company Type column)
     jobs_df = gr.DataFrame(
+        headers=["Role", "Company", "Location", "Posted", "Score", "Summary", "Apply"],
+        datatype=["str", "str", "str", "str", "str", "str", "html"],
         interactive=False,
         wrap=True,
         value=[]