Spaces:

sivan26
/

Jobconnection

Build error

App Files Files Community

sivan26 commited on Aug 13, 2025

Commit

c4600bb

verified ·

1 Parent(s): b601ed7

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -88

app.py CHANGED Viewed

@@ -1,116 +1,51 @@
 import gradio as gr
 import pandas as pd
 import numpy as np
-import random
-from typing import List, Dict, Tuple
 import re
 import warnings
 # Import ML libraries from scikit-learn
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.metrics.pairwise import cosine_similarity
-from sklearn.preprocessing import LabelEncoder
 # Suppress warnings for a cleaner output
 warnings.filterwarnings('ignore')
-# --- 1. DATA GENERATION ---
-# This part is the same as your code, creating a realistic dataset of jobs.
-def generate_job_database() -> List[Dict]:
-    """Generate a comprehensive database of 1000 jobs across various industries."""
-    job_templates = {
-        "Technology": [
-            {"title": "Software Engineer", "desc": "Design, develop, and maintain software applications.", "skills": ["Python", "Java", "JavaScript", "Git", "Agile", "Problem Solving"]},
-            {"title": "Data Scientist", "desc": "Analyze complex data to extract valuable business insights.", "skills": ["Python", "R", "Machine Learning", "SQL", "Statistics", "Pandas"]},
-            {"title": "DevOps Engineer", "desc": "Manage infrastructure, deployment pipelines, and automation.", "skills": ["AWS", "Docker", "Kubernetes", "Linux", "CI/CD", "Terraform"]},
-            {"title": "Frontend Developer", "desc": "Create intuitive user interfaces and engaging web experiences.", "skills": ["JavaScript", "React", "CSS", "HTML", "TypeScript", "UI/UX Principles"]},
-            {"title": "Backend Developer", "desc": "Build robust server-side applications, services, and APIs.", "skills": ["Python", "Node.js", "Django", "PostgreSQL", "REST APIs", "MongoDB"]},
-            {"title": "Machine Learning Engineer", "desc": "Deploy, monitor, and maintain ML models in production environments.", "skills": ["Python", "TensorFlow", "PyTorch", "MLOps", "Docker", "Scikit-learn"]},
-        ],
-        "Healthcare": [
-            {"title": "Registered Nurse", "desc": "Provide compassionate patient care and medical support.", "skills": ["Patient Care", "Medical Knowledge", "CPR", "Communication", "Teamwork"]},
-            {"title": "Healthcare Data Analyst", "desc": "Analyze clinical data to improve patient outcomes and operational efficiency.", "skills": ["SQL", "Python", "Tableau", "Healthcare Regulations", "Statistics"]},
-            {"title": "Medical Assistant", "desc": "Support healthcare providers with clinical and administrative tasks.", "skills": ["Patient Communication", "Medical Records", "Scheduling", "Clinical Skills"]},
-        ],
-        "Finance": [
-            {"title": "Financial Analyst", "desc": "Analyze financial data, create financial models, and support investment decisions.", "skills": ["Financial Modeling", "Excel", "Data Analysis", "Valuation", "Market Research"]},
-            {"title": "Accountant", "desc": "Manage financial records, prepare tax documents, and ensure compliance.", "skills": ["Accounting", "QuickBooks", "Tax Law", "Financial Reporting", "Auditing"]},
-            {"title": "Fintech Software Engineer", "desc": "Develop software for financial services, focusing on security and scalability.", "skills": ["Python", "Java", "SQL", "Cybersecurity", "Blockchain"]},
-        ],
-        "Marketing": [
-            {"title": "Digital Marketing Manager", "desc": "Develop and execute comprehensive digital marketing strategies.", "skills": ["Digital Marketing", "SEO", "Social Media", "Google Analytics", "Content Strategy"]},
-            {"title": "Content Creator", "desc": "Produce engaging and brand-aligned content for various platforms.", "skills": ["Content Creation", "SEO", "Social Media", "Writing", "Video Editing"]},
-            {"title": "Marketing Data Analyst", "desc": "Analyze marketing campaign performance and customer behavior data.", "skills": ["SQL", "Google Analytics", "Data Visualization", "A/B Testing", "Excel"]},
-        ]
-    }
-    experience_levels = ["Entry-level", "Mid-level", "Senior", "Lead/Principal"]
-    salary_ranges = {
-        "Entry-level": ["$45k-$65k", "$50k-$70k"], "Mid-level": ["$70k-$95k", "$75k-$100k"],
-        "Senior": ["$100k-$130k", "$115k-$145k"], "Lead/Principal": ["$140k-$170k", "$150k-$180k"]
-    }
-    jobs = []
-    job_id = 1
-    for _ in range(150): # Generate a larger database
-        for category, templates in job_templates.items():
-            template = random.choice(templates)
-            exp_level = random.choice(experience_levels)
-            title = f"{exp_level} {template['title']}" if exp_level != "Entry-level" else template['title']
-            job = {
-                "id": job_id, "title": title, "description": template["desc"], "requirements": list(set(template["skills"])),
-                "experience_level": exp_level, "salary_range": random.choice(salary_ranges[exp_level]), "category": category,
-                "location": random.choice(["Remote", "New York, NY", "San Francisco, CA", "Chicago, IL", "Austin, TX"]),
-            }
-            jobs.append(job)
-            job_id += 1
-    return jobs
-# --- 2. MACHINE LEARNING MODEL CLASS ---
-# This class now contains the ML logic.
 class MLJobRecommendationSystem:
     def __init__(self, jobs_database: List[Dict]):
         print("🤖 Initializing ML-powered Job Recommendation System...")
         self.df = pd.DataFrame(jobs_database)
         self.vectorizer = TfidfVectorizer(max_features=500, stop_words='english', ngram_range=(1, 2))
-        # This is where the "training" happens.
         self._train_model()
         print("✅ ML models trained successfully!")
     def _train_model(self):
-        """
-        Prepares the data and "trains" the TF-IDF model.
-        In TF-IDF, "training" consists of learning the vocabulary and inverse document frequency weights.
-        """
-        # We create a single text field for each job to feed into the model.
-        # This combines the most important text features of a job.
         self.df['combined_text'] = (
             self.df['title'] + ' ' +
             self.df['description'] + ' ' +
             self.df['requirements'].apply(lambda x: ' '.join(x))
         ).str.lower()
-        # The fit_transform method learns the vocabulary from our job data and converts it into a matrix of TF-IDF features.
-        # This matrix, self.job_vectors, is our "trained model". It represents every job in a numerical format.
         self.job_vectors = self.vectorizer.fit_transform(self.df['combined_text'])
     def recommend_jobs(self, user_skills: str, num_recommendations: int = 10,
                       filter_category: str = "All Categories", filter_experience: str = "All Levels") -> str:
-        """
-        This function takes user input and uses the trained model to find the best matches.
-        This is the "prediction" or "inference" step.
-        """
         if not user_skills.strip():
             return "🔍 Please enter your skills to get personalized AI-powered job recommendations!"
         try:
-            # 1. PREPARE USER INPUT: We must process the user's skills in the exact same way as our training data.
             user_text = re.sub(r'[^\w\s,]', '', user_skills.lower())
-            # 2. TRANSFORM USER INPUT: Use the *already trained* vectorizer to convert the user's skills into a numerical vector.
-            # We use `transform`, not `fit_transform`, because we don't want to re-learn the vocabulary.
             user_vector = self.vectorizer.transform([user_text])
-            # 3. FILTER JOBS: Apply user's filters for category and experience level.
             filtered_df = self.df.copy()
             if filter_category and filter_category != "All Categories":
                 filtered_df = filtered_df[filtered_df['category'] == filter_category]
@@ -120,26 +55,20 @@ class MLJobRecommendationSystem:
             if filtered_df.empty:
                 return "❌ No jobs found matching your filter criteria. Please adjust your filters and try again."
-            # Get the indices of the filtered jobs to use with our main job_vectors matrix
             filtered_indices = filtered_df.index
             filtered_job_vectors = self.job_vectors[filtered_indices]
-            # 4. CALCULATE SIMILARITY: This is the core of the prediction.
-            # We calculate the cosine similarity between the user's vector and all the (filtered) job vectors.
             similarity_scores = cosine_similarity(user_vector, filtered_job_vectors)[0]
-            # 5. RANK AND SELECT: Add scores to our filtered dataframe and sort to find the best matches.
             filtered_df['similarity_score'] = similarity_scores
             sorted_jobs = filtered_df.sort_values(by='similarity_score', ascending=False)
-            top_jobs = sorted_jobs.head(num_recommendations)
-            # 6. FORMAT AND RETURN RESULTS
             recommendations = ["# 🎯 AI-Powered Job Recommendations\n*Based on semantic similarity between your skills and job descriptions.*\n---"]
             for _, job in top_jobs.iterrows():
-                # Provide an AI Confidence Score based on the similarity
                 score = job['similarity_score']
-                if score < 0.05: continue # Don't show jobs with virtually no match
                 match_quality = "🟢 Excellent Match" if score >= 0.5 else "🟡 Good Match" if score >= 0.25 else "🟠 Moderate Match"
@@ -164,14 +93,14 @@ class MLJobRecommendationSystem:
         except Exception as e:
             return f"�� An unexpected error occurred: {str(e)}. Please try again."
-# --- 3. SETUP AND LAUNCH GRADIO INTERFACE ---
-# Initialize the system by generating data and training the model
 print("🚀 Starting application...")
-jobs_db = generate_job_database()
-ml_system = MLJobRecommendationSystem(jobs_db)
-# Define the user interface using Gradio
 with gr.Blocks(theme=gr.themes.Soft(), title="AI Job Recommender") as app:
     gr.HTML("""
     <div style="text-align: center; max-width: 800px; margin: auto;">
@@ -212,7 +141,6 @@ with gr.Blocks(theme=gr.themes.Soft(), title="AI Job Recommender") as app:
                 value="### Your personalized job recommendations will appear here.\nEnter your skills and click the button to start! ✨"
             )
-    # Connect the button click to the recommendation function
     submit_btn.click(
         fn=ml_system.recommend_jobs,
         inputs=[skills_input, num_jobs, category_filter, experience_filter],

 import gradio as gr
 import pandas as pd
 import numpy as np
 import re
 import warnings
+from typing import List, Dict
 # Import ML libraries from scikit-learn
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.metrics.pairwise import cosine_similarity
+# --- MODIFIED: Import the dataset from the separate file ---
+from jobs_dataset import JOBS_DATABASE
 # Suppress warnings for a cleaner output
 warnings.filterwarnings('ignore')
+# --- The `generate_job_database` function has been REMOVED from this file ---
+# --- MACHINE LEARNING MODEL CLASS (This code is the same) ---
 class MLJobRecommendationSystem:
     def __init__(self, jobs_database: List[Dict]):
         print("🤖 Initializing ML-powered Job Recommendation System...")
         self.df = pd.DataFrame(jobs_database)
         self.vectorizer = TfidfVectorizer(max_features=500, stop_words='english', ngram_range=(1, 2))
         self._train_model()
         print("✅ ML models trained successfully!")
     def _train_model(self):
+        """Prepares the data and 'trains' the TF-IDF model."""
         self.df['combined_text'] = (
             self.df['title'] + ' ' +
             self.df['description'] + ' ' +
             self.df['requirements'].apply(lambda x: ' '.join(x))
         ).str.lower()
         self.job_vectors = self.vectorizer.fit_transform(self.df['combined_text'])
     def recommend_jobs(self, user_skills: str, num_recommendations: int = 10,
                       filter_category: str = "All Categories", filter_experience: str = "All Levels") -> str:
+        """Uses the trained model to find and recommend jobs."""
         if not user_skills.strip():
             return "🔍 Please enter your skills to get personalized AI-powered job recommendations!"
         try:
             user_text = re.sub(r'[^\w\s,]', '', user_skills.lower())
             user_vector = self.vectorizer.transform([user_text])
             filtered_df = self.df.copy()
             if filter_category and filter_category != "All Categories":
                 filtered_df = filtered_df[filtered_df['category'] == filter_category]
             if filtered_df.empty:
                 return "❌ No jobs found matching your filter criteria. Please adjust your filters and try again."
             filtered_indices = filtered_df.index
             filtered_job_vectors = self.job_vectors[filtered_indices]
             similarity_scores = cosine_similarity(user_vector, filtered_job_vectors)[0]
             filtered_df['similarity_score'] = similarity_scores
             sorted_jobs = filtered_df.sort_values(by='similarity_score', ascending=False)
+            top_jobs = sorted_jobs.head(int(num_recommendations))
             recommendations = ["# 🎯 AI-Powered Job Recommendations\n*Based on semantic similarity between your skills and job descriptions.*\n---"]
             for _, job in top_jobs.iterrows():
                 score = job['similarity_score']
+                if score < 0.05: continue
                 match_quality = "🟢 Excellent Match" if score >= 0.5 else "🟡 Good Match" if score >= 0.25 else "🟠 Moderate Match"
         except Exception as e:
             return f"�� An unexpected error occurred: {str(e)}. Please try again."
+# --- SETUP AND LAUNCH GRADIO INTERFACE ---
+# --- MODIFIED: Initialize the system using the imported database ---
 print("🚀 Starting application...")
+# We use the JOBS_DATABASE variable we imported from the other file.
+ml_system = MLJobRecommendationSystem(JOBS_DATABASE)
+# Define the user interface using Gradio (This code is the same)
 with gr.Blocks(theme=gr.themes.Soft(), title="AI Job Recommender") as app:
     gr.HTML("""
     <div style="text-align: center; max-width: 800px; margin: auto;">
                 value="### Your personalized job recommendations will appear here.\nEnter your skills and click the button to start! ✨"
             )
     submit_btn.click(
         fn=ml_system.recommend_jobs,
         inputs=[skills_input, num_jobs, category_filter, experience_filter],