sivan26 commited on
Commit
c4600bb
Β·
verified Β·
1 Parent(s): b601ed7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -88
app.py CHANGED
@@ -1,116 +1,51 @@
1
  import gradio as gr
2
  import pandas as pd
3
  import numpy as np
4
- import random
5
- from typing import List, Dict, Tuple
6
  import re
7
  import warnings
 
8
 
9
  # Import ML libraries from scikit-learn
10
  from sklearn.feature_extraction.text import TfidfVectorizer
11
  from sklearn.metrics.pairwise import cosine_similarity
12
- from sklearn.preprocessing import LabelEncoder
 
 
13
 
14
  # Suppress warnings for a cleaner output
15
  warnings.filterwarnings('ignore')
16
 
17
- # --- 1. DATA GENERATION ---
18
- # This part is the same as your code, creating a realistic dataset of jobs.
19
- def generate_job_database() -> List[Dict]:
20
- """Generate a comprehensive database of 1000 jobs across various industries."""
21
- job_templates = {
22
- "Technology": [
23
- {"title": "Software Engineer", "desc": "Design, develop, and maintain software applications.", "skills": ["Python", "Java", "JavaScript", "Git", "Agile", "Problem Solving"]},
24
- {"title": "Data Scientist", "desc": "Analyze complex data to extract valuable business insights.", "skills": ["Python", "R", "Machine Learning", "SQL", "Statistics", "Pandas"]},
25
- {"title": "DevOps Engineer", "desc": "Manage infrastructure, deployment pipelines, and automation.", "skills": ["AWS", "Docker", "Kubernetes", "Linux", "CI/CD", "Terraform"]},
26
- {"title": "Frontend Developer", "desc": "Create intuitive user interfaces and engaging web experiences.", "skills": ["JavaScript", "React", "CSS", "HTML", "TypeScript", "UI/UX Principles"]},
27
- {"title": "Backend Developer", "desc": "Build robust server-side applications, services, and APIs.", "skills": ["Python", "Node.js", "Django", "PostgreSQL", "REST APIs", "MongoDB"]},
28
- {"title": "Machine Learning Engineer", "desc": "Deploy, monitor, and maintain ML models in production environments.", "skills": ["Python", "TensorFlow", "PyTorch", "MLOps", "Docker", "Scikit-learn"]},
29
- ],
30
- "Healthcare": [
31
- {"title": "Registered Nurse", "desc": "Provide compassionate patient care and medical support.", "skills": ["Patient Care", "Medical Knowledge", "CPR", "Communication", "Teamwork"]},
32
- {"title": "Healthcare Data Analyst", "desc": "Analyze clinical data to improve patient outcomes and operational efficiency.", "skills": ["SQL", "Python", "Tableau", "Healthcare Regulations", "Statistics"]},
33
- {"title": "Medical Assistant", "desc": "Support healthcare providers with clinical and administrative tasks.", "skills": ["Patient Communication", "Medical Records", "Scheduling", "Clinical Skills"]},
34
- ],
35
- "Finance": [
36
- {"title": "Financial Analyst", "desc": "Analyze financial data, create financial models, and support investment decisions.", "skills": ["Financial Modeling", "Excel", "Data Analysis", "Valuation", "Market Research"]},
37
- {"title": "Accountant", "desc": "Manage financial records, prepare tax documents, and ensure compliance.", "skills": ["Accounting", "QuickBooks", "Tax Law", "Financial Reporting", "Auditing"]},
38
- {"title": "Fintech Software Engineer", "desc": "Develop software for financial services, focusing on security and scalability.", "skills": ["Python", "Java", "SQL", "Cybersecurity", "Blockchain"]},
39
- ],
40
- "Marketing": [
41
- {"title": "Digital Marketing Manager", "desc": "Develop and execute comprehensive digital marketing strategies.", "skills": ["Digital Marketing", "SEO", "Social Media", "Google Analytics", "Content Strategy"]},
42
- {"title": "Content Creator", "desc": "Produce engaging and brand-aligned content for various platforms.", "skills": ["Content Creation", "SEO", "Social Media", "Writing", "Video Editing"]},
43
- {"title": "Marketing Data Analyst", "desc": "Analyze marketing campaign performance and customer behavior data.", "skills": ["SQL", "Google Analytics", "Data Visualization", "A/B Testing", "Excel"]},
44
- ]
45
- }
46
- experience_levels = ["Entry-level", "Mid-level", "Senior", "Lead/Principal"]
47
- salary_ranges = {
48
- "Entry-level": ["$45k-$65k", "$50k-$70k"], "Mid-level": ["$70k-$95k", "$75k-$100k"],
49
- "Senior": ["$100k-$130k", "$115k-$145k"], "Lead/Principal": ["$140k-$170k", "$150k-$180k"]
50
- }
51
- jobs = []
52
- job_id = 1
53
- for _ in range(150): # Generate a larger database
54
- for category, templates in job_templates.items():
55
- template = random.choice(templates)
56
- exp_level = random.choice(experience_levels)
57
- title = f"{exp_level} {template['title']}" if exp_level != "Entry-level" else template['title']
58
- job = {
59
- "id": job_id, "title": title, "description": template["desc"], "requirements": list(set(template["skills"])),
60
- "experience_level": exp_level, "salary_range": random.choice(salary_ranges[exp_level]), "category": category,
61
- "location": random.choice(["Remote", "New York, NY", "San Francisco, CA", "Chicago, IL", "Austin, TX"]),
62
- }
63
- jobs.append(job)
64
- job_id += 1
65
- return jobs
66
 
67
- # --- 2. MACHINE LEARNING MODEL CLASS ---
68
- # This class now contains the ML logic.
69
  class MLJobRecommendationSystem:
70
  def __init__(self, jobs_database: List[Dict]):
71
  print("πŸ€– Initializing ML-powered Job Recommendation System...")
72
  self.df = pd.DataFrame(jobs_database)
73
  self.vectorizer = TfidfVectorizer(max_features=500, stop_words='english', ngram_range=(1, 2))
74
-
75
- # This is where the "training" happens.
76
  self._train_model()
77
  print("βœ… ML models trained successfully!")
78
 
79
  def _train_model(self):
80
- """
81
- Prepares the data and "trains" the TF-IDF model.
82
- In TF-IDF, "training" consists of learning the vocabulary and inverse document frequency weights.
83
- """
84
- # We create a single text field for each job to feed into the model.
85
- # This combines the most important text features of a job.
86
  self.df['combined_text'] = (
87
  self.df['title'] + ' ' +
88
  self.df['description'] + ' ' +
89
  self.df['requirements'].apply(lambda x: ' '.join(x))
90
  ).str.lower()
91
-
92
- # The fit_transform method learns the vocabulary from our job data and converts it into a matrix of TF-IDF features.
93
- # This matrix, self.job_vectors, is our "trained model". It represents every job in a numerical format.
94
  self.job_vectors = self.vectorizer.fit_transform(self.df['combined_text'])
95
 
96
  def recommend_jobs(self, user_skills: str, num_recommendations: int = 10,
97
  filter_category: str = "All Categories", filter_experience: str = "All Levels") -> str:
98
- """
99
- This function takes user input and uses the trained model to find the best matches.
100
- This is the "prediction" or "inference" step.
101
- """
102
  if not user_skills.strip():
103
  return "πŸ” Please enter your skills to get personalized AI-powered job recommendations!"
104
 
105
  try:
106
- # 1. PREPARE USER INPUT: We must process the user's skills in the exact same way as our training data.
107
  user_text = re.sub(r'[^\w\s,]', '', user_skills.lower())
108
-
109
- # 2. TRANSFORM USER INPUT: Use the *already trained* vectorizer to convert the user's skills into a numerical vector.
110
- # We use `transform`, not `fit_transform`, because we don't want to re-learn the vocabulary.
111
  user_vector = self.vectorizer.transform([user_text])
112
 
113
- # 3. FILTER JOBS: Apply user's filters for category and experience level.
114
  filtered_df = self.df.copy()
115
  if filter_category and filter_category != "All Categories":
116
  filtered_df = filtered_df[filtered_df['category'] == filter_category]
@@ -120,26 +55,20 @@ class MLJobRecommendationSystem:
120
  if filtered_df.empty:
121
  return "❌ No jobs found matching your filter criteria. Please adjust your filters and try again."
122
 
123
- # Get the indices of the filtered jobs to use with our main job_vectors matrix
124
  filtered_indices = filtered_df.index
125
  filtered_job_vectors = self.job_vectors[filtered_indices]
126
 
127
- # 4. CALCULATE SIMILARITY: This is the core of the prediction.
128
- # We calculate the cosine similarity between the user's vector and all the (filtered) job vectors.
129
  similarity_scores = cosine_similarity(user_vector, filtered_job_vectors)[0]
130
 
131
- # 5. RANK AND SELECT: Add scores to our filtered dataframe and sort to find the best matches.
132
  filtered_df['similarity_score'] = similarity_scores
133
  sorted_jobs = filtered_df.sort_values(by='similarity_score', ascending=False)
134
 
135
- top_jobs = sorted_jobs.head(num_recommendations)
136
 
137
- # 6. FORMAT AND RETURN RESULTS
138
  recommendations = ["# 🎯 AI-Powered Job Recommendations\n*Based on semantic similarity between your skills and job descriptions.*\n---"]
139
  for _, job in top_jobs.iterrows():
140
- # Provide an AI Confidence Score based on the similarity
141
  score = job['similarity_score']
142
- if score < 0.05: continue # Don't show jobs with virtually no match
143
 
144
  match_quality = "🟒 Excellent Match" if score >= 0.5 else "🟑 Good Match" if score >= 0.25 else "🟠 Moderate Match"
145
 
@@ -164,14 +93,14 @@ class MLJobRecommendationSystem:
164
  except Exception as e:
165
  return f"οΏ½οΏ½ An unexpected error occurred: {str(e)}. Please try again."
166
 
167
- # --- 3. SETUP AND LAUNCH GRADIO INTERFACE ---
168
 
169
- # Initialize the system by generating data and training the model
170
  print("πŸš€ Starting application...")
171
- jobs_db = generate_job_database()
172
- ml_system = MLJobRecommendationSystem(jobs_db)
173
 
174
- # Define the user interface using Gradio
175
  with gr.Blocks(theme=gr.themes.Soft(), title="AI Job Recommender") as app:
176
  gr.HTML("""
177
  <div style="text-align: center; max-width: 800px; margin: auto;">
@@ -212,7 +141,6 @@ with gr.Blocks(theme=gr.themes.Soft(), title="AI Job Recommender") as app:
212
  value="### Your personalized job recommendations will appear here.\nEnter your skills and click the button to start! ✨"
213
  )
214
 
215
- # Connect the button click to the recommendation function
216
  submit_btn.click(
217
  fn=ml_system.recommend_jobs,
218
  inputs=[skills_input, num_jobs, category_filter, experience_filter],
 
1
  import gradio as gr
2
  import pandas as pd
3
  import numpy as np
 
 
4
  import re
5
  import warnings
6
+ from typing import List, Dict
7
 
8
  # Import ML libraries from scikit-learn
9
  from sklearn.feature_extraction.text import TfidfVectorizer
10
  from sklearn.metrics.pairwise import cosine_similarity
11
+
12
+ # --- MODIFIED: Import the dataset from the separate file ---
13
+ from jobs_dataset import JOBS_DATABASE
14
 
15
  # Suppress warnings for a cleaner output
16
  warnings.filterwarnings('ignore')
17
 
18
+ # --- The `generate_job_database` function has been REMOVED from this file ---
19
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
+ # --- MACHINE LEARNING MODEL CLASS (This code is the same) ---
 
22
  class MLJobRecommendationSystem:
23
  def __init__(self, jobs_database: List[Dict]):
24
  print("πŸ€– Initializing ML-powered Job Recommendation System...")
25
  self.df = pd.DataFrame(jobs_database)
26
  self.vectorizer = TfidfVectorizer(max_features=500, stop_words='english', ngram_range=(1, 2))
 
 
27
  self._train_model()
28
  print("βœ… ML models trained successfully!")
29
 
30
  def _train_model(self):
31
+ """Prepares the data and 'trains' the TF-IDF model."""
 
 
 
 
 
32
  self.df['combined_text'] = (
33
  self.df['title'] + ' ' +
34
  self.df['description'] + ' ' +
35
  self.df['requirements'].apply(lambda x: ' '.join(x))
36
  ).str.lower()
 
 
 
37
  self.job_vectors = self.vectorizer.fit_transform(self.df['combined_text'])
38
 
39
  def recommend_jobs(self, user_skills: str, num_recommendations: int = 10,
40
  filter_category: str = "All Categories", filter_experience: str = "All Levels") -> str:
41
+ """Uses the trained model to find and recommend jobs."""
 
 
 
42
  if not user_skills.strip():
43
  return "πŸ” Please enter your skills to get personalized AI-powered job recommendations!"
44
 
45
  try:
 
46
  user_text = re.sub(r'[^\w\s,]', '', user_skills.lower())
 
 
 
47
  user_vector = self.vectorizer.transform([user_text])
48
 
 
49
  filtered_df = self.df.copy()
50
  if filter_category and filter_category != "All Categories":
51
  filtered_df = filtered_df[filtered_df['category'] == filter_category]
 
55
  if filtered_df.empty:
56
  return "❌ No jobs found matching your filter criteria. Please adjust your filters and try again."
57
 
 
58
  filtered_indices = filtered_df.index
59
  filtered_job_vectors = self.job_vectors[filtered_indices]
60
 
 
 
61
  similarity_scores = cosine_similarity(user_vector, filtered_job_vectors)[0]
62
 
 
63
  filtered_df['similarity_score'] = similarity_scores
64
  sorted_jobs = filtered_df.sort_values(by='similarity_score', ascending=False)
65
 
66
+ top_jobs = sorted_jobs.head(int(num_recommendations))
67
 
 
68
  recommendations = ["# 🎯 AI-Powered Job Recommendations\n*Based on semantic similarity between your skills and job descriptions.*\n---"]
69
  for _, job in top_jobs.iterrows():
 
70
  score = job['similarity_score']
71
+ if score < 0.05: continue
72
 
73
  match_quality = "🟒 Excellent Match" if score >= 0.5 else "🟑 Good Match" if score >= 0.25 else "🟠 Moderate Match"
74
 
 
93
  except Exception as e:
94
  return f"οΏ½οΏ½ An unexpected error occurred: {str(e)}. Please try again."
95
 
96
+ # --- SETUP AND LAUNCH GRADIO INTERFACE ---
97
 
98
+ # --- MODIFIED: Initialize the system using the imported database ---
99
  print("πŸš€ Starting application...")
100
+ # We use the JOBS_DATABASE variable we imported from the other file.
101
+ ml_system = MLJobRecommendationSystem(JOBS_DATABASE)
102
 
103
+ # Define the user interface using Gradio (This code is the same)
104
  with gr.Blocks(theme=gr.themes.Soft(), title="AI Job Recommender") as app:
105
  gr.HTML("""
106
  <div style="text-align: center; max-width: 800px; margin: auto;">
 
141
  value="### Your personalized job recommendations will appear here.\nEnter your skills and click the button to start! ✨"
142
  )
143
 
 
144
  submit_btn.click(
145
  fn=ml_system.recommend_jobs,
146
  inputs=[skills_input, num_jobs, category_filter, experience_filter],