Spaces:

riteshkokam
/

JobFinder

Sleeping

App Files Files Community

riteshkokam commited on Jun 22, 2025

Commit

0215b5a

verified ·

1 Parent(s): 9f93bdb

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -92

app.py CHANGED Viewed

@@ -12,28 +12,12 @@ from google.genai.types import GenerateContentConfig, ThinkingConfig
 from datetime import datetime
 import math
 import json
-from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
 # Initialize components
 kw_extractor = yake.KeywordExtractor(n=2, top=30)
 embedder = SentenceTransformer("all-MiniLM-L6-v2")
 genai_client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
-# Initialize local summarization model (2025 state-of-the-art)
-print("Loading summarization model...")
-try:
-    # Using Microsoft's DialoGPT-based summarizer - fast and efficient for 2025
-    summarizer = pipeline(
-        "summarization",
-        model="facebook/bart-large-cnn",  # Fast and reliable for job descriptions
-        tokenizer="facebook/bart-large-cnn",
-        device=0 if os.system("nvidia-smi") == 0 else -1  # Use GPU if available
-    )
-    print("Summarization model loaded successfully!")
-except Exception as e:
-    print(f"Error loading model, falling back to basic summarization: {e}")
-    summarizer = None
 PER_PAGE = 10
 SYSTEM_PROMPT = """
@@ -41,8 +25,6 @@ You are a job-matching assistant. Given a resume and job listings,
 rank and explain why each job is a good fit.
 """
 # 1️⃣ Extract text from resume
 def extract_text(file):
     ext = file.name.lower().split('.')[-1]
@@ -98,64 +80,7 @@ def fetch_remoteok(keywords):
         return [j for j in data if any(kw.lower() in (j.get("position","") + j.get("description","")).lower() for kw in keywords)]
     return []
-# 4️⃣ Analyze job with local Hugging Face model for fast summaries
-def get_job_summary_fast(job):
-    """Use local HF model to get job summary quickly"""
-    try:
-        description = job.get("description", "")
-        title = job.get("title") or job.get("position", "")
-        # Skip if no description available
-        if not description or len(description.strip()) < 20:
-            return f"Position for {title}" if title else "Job details available upon application"
-        # Clean and prepare text for summarization
-        clean_desc = description.replace('\n', ' ').replace('\r', ' ').strip()
-        # Use local model if available
-        if summarizer:
-            try:
-                # Truncate to model's max length (1024 tokens for BART)
-                if len(clean_desc) > 800:
-                    clean_desc = clean_desc[:800] + "..."
-                # Generate summary with specific parameters for job descriptions
-                summary_result = summarizer(
-                    clean_desc,
-                    max_length=60,  # Keep summaries concise
-                    min_length=20,
-                    do_sample=False,
-                    truncation=True
-                )
-                summary = summary_result[0]['summary_text']
-                # Clean up the summary
-                if summary:
-                    # Remove redundant phrases and make it more natural
-                    summary = summary.replace("The job involves", "").replace("This position", "Position").strip()
-                    if not summary.endswith('.'):
-                        summary += '.'
-                    return summary
-            except Exception as e:
-                print(f"Local model error: {e}")
-        # Fallback: Extract first meaningful sentence from description
-        sentences = clean_desc.split('.')
-        for sentence in sentences[:3]:  # Check first 3 sentences
-            if len(sentence.strip()) > 30 and any(word in sentence.lower() for word in ['responsible', 'role', 'position', 'work', 'develop', 'manage', 'lead']):
-                return sentence.strip() + '.'
-        # Last fallback
-        return f"{title} role with responsibilities in {clean_desc[:50]}..." if title else "Job details available upon application"
-    except Exception as e:
-        print(f"Error getting job summary: {e}")
-        title = job.get("title") or job.get("position", "")
-        return f"Role involves {title.lower()} responsibilities" if title else "Job details available upon application"
-# 5️⃣ Rank jobs by semantic similarity
 def rank_jobs(resume_text, jobs):
     if not jobs:
         return []
@@ -164,7 +89,7 @@ def rank_jobs(resume_text, jobs):
     sims = cosine_similarity(emb_r, emb_j)[0]
     return sorted(zip(jobs, sims), key=lambda x: x[1], reverse=True)
-# 6️⃣ Gemini refinement (optional)
 def refine_with_ai(ranked, resume_text):
     lines = []
     for job, _ in ranked:
@@ -193,7 +118,7 @@ def format_posted(job):
         return datetime.fromtimestamp(raw).strftime("%Y-%m-%d")
     return str(raw)[:10]
-# 7️⃣ Main pipeline
 def find_jobs(file, added_kw, use_ai):
     resume = extract_text(file) or ""
     base_kws = added_kw.split(",") if added_kw.strip() else extract_keywords(resume)
@@ -210,10 +135,6 @@ def find_jobs(file, added_kw, use_ai):
         posted   = format_posted(job)
         apply_url= job.get("url") or job.get("apply_url","") or job.get("joblink","") or ""
-        # Get fast job summary using local model
-        print(f"Analyzing job {i+1}/{len(ranked)}: {role} at {company}")
-        job_summary = get_job_summary_fast(job)
         # Make sure none of these are dicts/lists
         table.append({
             "Role":         str(role),
@@ -221,14 +142,13 @@ def find_jobs(file, added_kw, use_ai):
             "Location":     str(location),
             "Posted":       str(posted),
             "Score":        f"{score*100:.1f}%",
-            "Summary":      str(job_summary),
             "Apply":        str(apply_url)
         })
     explanation = refine_with_ai(ranked, resume) if use_ai else ""
     return table, explanation
-# 8️⃣ Jobs in DataFrame format
 def jobs_to_dataframe(table, page, per_page=PER_PAGE):
     total = len(table)
     pages = max(1, math.ceil(total / per_page))
@@ -252,7 +172,6 @@ def jobs_to_dataframe(table, page, per_page=PER_PAGE):
             row['Location'],
             row['Posted'],
             row['Score'],
-            row['Summary'],
             apply_link
         ])
@@ -268,10 +187,9 @@ def load_jobs_and_pages(resume, added_kw, use_ai):
     return full_table, explanation, expl_header, slider_update, first_page_data, page_info
-# 9️⃣ Gradio UI
 with gr.Blocks(theme=gr.themes.Base()) as demo:
     gr.Markdown("## 🌍 Global Job Finder")
-    gr.Markdown("*Now with fast AI-powered job summaries using local models*")
     with gr.Row():
         resume = gr.File(label="Upload Resume (PDF/DOCX)")
@@ -280,8 +198,6 @@ with gr.Blocks(theme=gr.themes.Base()) as demo:
     resume.upload(on_resume_upload, inputs=[resume], outputs=[added])
     use_ai = gr.Checkbox(label="Use AI to refine explanation", value=False)
-    gr.Markdown("**Note:** Job summaries are generated using fast local AI models for quick results.")
     find_btn = gr.Button("Find Jobs")
     jobs_state = gr.State([])           # holds full table
@@ -290,10 +206,10 @@ with gr.Blocks(theme=gr.themes.Base()) as demo:
     # Page info display
     page_info = gr.Markdown("")
-    # DataFrame for jobs display (removed Company Type column)
     jobs_df = gr.DataFrame(
-        headers=["Role", "Company", "Location", "Posted", "Score", "Summary", "Apply"],
-        datatype=["str", "str", "str", "str", "str", "str", "html"],
         interactive=False,
         wrap=True,
         value=[]

 from datetime import datetime
 import math
 import json
 # Initialize components
 kw_extractor = yake.KeywordExtractor(n=2, top=30)
 embedder = SentenceTransformer("all-MiniLM-L6-v2")
 genai_client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
 PER_PAGE = 10
 SYSTEM_PROMPT = """
 rank and explain why each job is a good fit.
 """
 # 1️⃣ Extract text from resume
 def extract_text(file):
     ext = file.name.lower().split('.')[-1]
         return [j for j in data if any(kw.lower() in (j.get("position","") + j.get("description","")).lower() for kw in keywords)]
     return []
+# 4️⃣ Rank jobs by semantic similarity
 def rank_jobs(resume_text, jobs):
     if not jobs:
         return []
     sims = cosine_similarity(emb_r, emb_j)[0]
     return sorted(zip(jobs, sims), key=lambda x: x[1], reverse=True)
+# 5️⃣ Gemini refinement (optional)
 def refine_with_ai(ranked, resume_text):
     lines = []
     for job, _ in ranked:
         return datetime.fromtimestamp(raw).strftime("%Y-%m-%d")
     return str(raw)[:10]
+# 6️⃣ Main pipeline
 def find_jobs(file, added_kw, use_ai):
     resume = extract_text(file) or ""
     base_kws = added_kw.split(",") if added_kw.strip() else extract_keywords(resume)
         posted   = format_posted(job)
         apply_url= job.get("url") or job.get("apply_url","") or job.get("joblink","") or ""
         # Make sure none of these are dicts/lists
         table.append({
             "Role":         str(role),
             "Location":     str(location),
             "Posted":       str(posted),
             "Score":        f"{score*100:.1f}%",
             "Apply":        str(apply_url)
         })
     explanation = refine_with_ai(ranked, resume) if use_ai else ""
     return table, explanation
+# 7️⃣ Jobs in DataFrame format
 def jobs_to_dataframe(table, page, per_page=PER_PAGE):
     total = len(table)
     pages = max(1, math.ceil(total / per_page))
             row['Location'],
             row['Posted'],
             row['Score'],
             apply_link
         ])
     return full_table, explanation, expl_header, slider_update, first_page_data, page_info
+# 8️⃣ Gradio UI
 with gr.Blocks(theme=gr.themes.Base()) as demo:
     gr.Markdown("## 🌍 Global Job Finder")
     with gr.Row():
         resume = gr.File(label="Upload Resume (PDF/DOCX)")
     resume.upload(on_resume_upload, inputs=[resume], outputs=[added])
     use_ai = gr.Checkbox(label="Use AI to refine explanation", value=False)
     find_btn = gr.Button("Find Jobs")
     jobs_state = gr.State([])           # holds full table
     # Page info display
     page_info = gr.Markdown("")
+    # DataFrame for jobs display (removed Summary column)
     jobs_df = gr.DataFrame(
+        headers=["Role", "Company", "Location", "Posted", "Score", "Apply"],
+        datatype=["str", "str", "str", "str", "str", "html"],
         interactive=False,
         wrap=True,
         value=[]