sivan26 commited on
Commit
ee4cdc1
Β·
verified Β·
1 Parent(s): 5542f71

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +224 -0
app.py ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import numpy as np
4
+ import random
5
+ from typing import List, Dict, Tuple
6
+ import re
7
+ import warnings
8
+
9
+ # Import ML libraries from scikit-learn
10
+ from sklearn.feature_extraction.text import TfidfVectorizer
11
+ from sklearn.metrics.pairwise import cosine_similarity
12
+ from sklearn.preprocessing import LabelEncoder
13
+
14
+ # Suppress warnings for a cleaner output
15
+ warnings.filterwarnings('ignore')
16
+
17
+ # --- 1. DATA GENERATION ---
18
+ # This part is the same as your code, creating a realistic dataset of jobs.
19
+ def generate_job_database() -> List[Dict]:
20
+ """Generate a comprehensive database of 1000 jobs across various industries."""
21
+ job_templates = {
22
+ "Technology": [
23
+ {"title": "Software Engineer", "desc": "Design, develop, and maintain software applications.", "skills": ["Python", "Java", "JavaScript", "Git", "Agile", "Problem Solving"]},
24
+ {"title": "Data Scientist", "desc": "Analyze complex data to extract valuable business insights.", "skills": ["Python", "R", "Machine Learning", "SQL", "Statistics", "Pandas"]},
25
+ {"title": "DevOps Engineer", "desc": "Manage infrastructure, deployment pipelines, and automation.", "skills": ["AWS", "Docker", "Kubernetes", "Linux", "CI/CD", "Terraform"]},
26
+ {"title": "Frontend Developer", "desc": "Create intuitive user interfaces and engaging web experiences.", "skills": ["JavaScript", "React", "CSS", "HTML", "TypeScript", "UI/UX Principles"]},
27
+ {"title": "Backend Developer", "desc": "Build robust server-side applications, services, and APIs.", "skills": ["Python", "Node.js", "Django", "PostgreSQL", "REST APIs", "MongoDB"]},
28
+ {"title": "Machine Learning Engineer", "desc": "Deploy, monitor, and maintain ML models in production environments.", "skills": ["Python", "TensorFlow", "PyTorch", "MLOps", "Docker", "Scikit-learn"]},
29
+ ],
30
+ "Healthcare": [
31
+ {"title": "Registered Nurse", "desc": "Provide compassionate patient care and medical support.", "skills": ["Patient Care", "Medical Knowledge", "CPR", "Communication", "Teamwork"]},
32
+ {"title": "Healthcare Data Analyst", "desc": "Analyze clinical data to improve patient outcomes and operational efficiency.", "skills": ["SQL", "Python", "Tableau", "Healthcare Regulations", "Statistics"]},
33
+ {"title": "Medical Assistant", "desc": "Support healthcare providers with clinical and administrative tasks.", "skills": ["Patient Communication", "Medical Records", "Scheduling", "Clinical Skills"]},
34
+ ],
35
+ "Finance": [
36
+ {"title": "Financial Analyst", "desc": "Analyze financial data, create financial models, and support investment decisions.", "skills": ["Financial Modeling", "Excel", "Data Analysis", "Valuation", "Market Research"]},
37
+ {"title": "Accountant", "desc": "Manage financial records, prepare tax documents, and ensure compliance.", "skills": ["Accounting", "QuickBooks", "Tax Law", "Financial Reporting", "Auditing"]},
38
+ {"title": "Fintech Software Engineer", "desc": "Develop software for financial services, focusing on security and scalability.", "skills": ["Python", "Java", "SQL", "Cybersecurity", "Blockchain"]},
39
+ ],
40
+ "Marketing": [
41
+ {"title": "Digital Marketing Manager", "desc": "Develop and execute comprehensive digital marketing strategies.", "skills": ["Digital Marketing", "SEO", "Social Media", "Google Analytics", "Content Strategy"]},
42
+ {"title": "Content Creator", "desc": "Produce engaging and brand-aligned content for various platforms.", "skills": ["Content Creation", "SEO", "Social Media", "Writing", "Video Editing"]},
43
+ {"title": "Marketing Data Analyst", "desc": "Analyze marketing campaign performance and customer behavior data.", "skills": ["SQL", "Google Analytics", "Data Visualization", "A/B Testing", "Excel"]},
44
+ ]
45
+ }
46
+ experience_levels = ["Entry-level", "Mid-level", "Senior", "Lead/Principal"]
47
+ salary_ranges = {
48
+ "Entry-level": ["$45k-$65k", "$50k-$70k"], "Mid-level": ["$70k-$95k", "$75k-$100k"],
49
+ "Senior": ["$100k-$130k", "$115k-$145k"], "Lead/Principal": ["$140k-$170k", "$150k-$180k"]
50
+ }
51
+ jobs = []
52
+ job_id = 1
53
+ for _ in range(150): # Generate a larger database
54
+ for category, templates in job_templates.items():
55
+ template = random.choice(templates)
56
+ exp_level = random.choice(experience_levels)
57
+ title = f"{exp_level} {template['title']}" if exp_level != "Entry-level" else template['title']
58
+ job = {
59
+ "id": job_id, "title": title, "description": template["desc"], "requirements": list(set(template["skills"])),
60
+ "experience_level": exp_level, "salary_range": random.choice(salary_ranges[exp_level]), "category": category,
61
+ "location": random.choice(["Remote", "New York, NY", "San Francisco, CA", "Chicago, IL", "Austin, TX"]),
62
+ }
63
+ jobs.append(job)
64
+ job_id += 1
65
+ return jobs
66
+
67
+ # --- 2. MACHINE LEARNING MODEL CLASS ---
68
+ # This class now contains the ML logic.
69
+ class MLJobRecommendationSystem:
70
+ def __init__(self, jobs_database: List[Dict]):
71
+ print("πŸ€– Initializing ML-powered Job Recommendation System...")
72
+ self.df = pd.DataFrame(jobs_database)
73
+ self.vectorizer = TfidfVectorizer(max_features=500, stop_words='english', ngram_range=(1, 2))
74
+
75
+ # This is where the "training" happens.
76
+ self._train_model()
77
+ print("βœ… ML models trained successfully!")
78
+
79
+ def _train_model(self):
80
+ """
81
+ Prepares the data and "trains" the TF-IDF model.
82
+ In TF-IDF, "training" consists of learning the vocabulary and inverse document frequency weights.
83
+ """
84
+ # We create a single text field for each job to feed into the model.
85
+ # This combines the most important text features of a job.
86
+ self.df['combined_text'] = (
87
+ self.df['title'] + ' ' +
88
+ self.df['description'] + ' ' +
89
+ self.df['requirements'].apply(lambda x: ' '.join(x))
90
+ ).str.lower()
91
+
92
+ # The fit_transform method learns the vocabulary from our job data and converts it into a matrix of TF-IDF features.
93
+ # This matrix, self.job_vectors, is our "trained model". It represents every job in a numerical format.
94
+ self.job_vectors = self.vectorizer.fit_transform(self.df['combined_text'])
95
+
96
+ def recommend_jobs(self, user_skills: str, num_recommendations: int = 10,
97
+ filter_category: str = "All Categories", filter_experience: str = "All Levels") -> str:
98
+ """
99
+ This function takes user input and uses the trained model to find the best matches.
100
+ This is the "prediction" or "inference" step.
101
+ """
102
+ if not user_skills.strip():
103
+ return "πŸ” Please enter your skills to get personalized AI-powered job recommendations!"
104
+
105
+ try:
106
+ # 1. PREPARE USER INPUT: We must process the user's skills in the exact same way as our training data.
107
+ user_text = re.sub(r'[^\w\s,]', '', user_skills.lower())
108
+
109
+ # 2. TRANSFORM USER INPUT: Use the *already trained* vectorizer to convert the user's skills into a numerical vector.
110
+ # We use `transform`, not `fit_transform`, because we don't want to re-learn the vocabulary.
111
+ user_vector = self.vectorizer.transform([user_text])
112
+
113
+ # 3. FILTER JOBS: Apply user's filters for category and experience level.
114
+ filtered_df = self.df.copy()
115
+ if filter_category and filter_category != "All Categories":
116
+ filtered_df = filtered_df[filtered_df['category'] == filter_category]
117
+ if filter_experience and filter_experience != "All Levels":
118
+ filtered_df = filtered_df[filtered_df['experience_level'] == filter_experience]
119
+
120
+ if filtered_df.empty:
121
+ return "❌ No jobs found matching your filter criteria. Please adjust your filters and try again."
122
+
123
+ # Get the indices of the filtered jobs to use with our main job_vectors matrix
124
+ filtered_indices = filtered_df.index
125
+ filtered_job_vectors = self.job_vectors[filtered_indices]
126
+
127
+ # 4. CALCULATE SIMILARITY: This is the core of the prediction.
128
+ # We calculate the cosine similarity between the user's vector and all the (filtered) job vectors.
129
+ similarity_scores = cosine_similarity(user_vector, filtered_job_vectors)[0]
130
+
131
+ # 5. RANK AND SELECT: Add scores to our filtered dataframe and sort to find the best matches.
132
+ filtered_df['similarity_score'] = similarity_scores
133
+ sorted_jobs = filtered_df.sort_values(by='similarity_score', ascending=False)
134
+
135
+ top_jobs = sorted_jobs.head(num_recommendations)
136
+
137
+ # 6. FORMAT AND RETURN RESULTS
138
+ recommendations = ["# 🎯 AI-Powered Job Recommendations\n*Based on semantic similarity between your skills and job descriptions.*\n---"]
139
+ for _, job in top_jobs.iterrows():
140
+ # Provide an AI Confidence Score based on the similarity
141
+ score = job['similarity_score']
142
+ if score < 0.05: continue # Don't show jobs with virtually no match
143
+
144
+ match_quality = "🟒 Excellent Match" if score >= 0.5 else "🟑 Good Match" if score >= 0.25 else "🟠 Moderate Match"
145
+
146
+ recommendation = f"""
147
+ ## {job['title']}
148
+ **{match_quality}** | **AI Confidence: {score:.1%}**
149
+ - **Category:** {job['category']}
150
+ - **Experience:** {job['experience_level']}
151
+ - **Location:** {job['location']}
152
+ - **Salary:** {job['salary_range']}
153
+ - **Description:** {job['description']}
154
+ - **Core Skills:** {', '.join(job['requirements'])}
155
+ ---
156
+ """
157
+ recommendations.append(recommendation)
158
+
159
+ if len(recommendations) == 1:
160
+ return "πŸ˜” No relevant jobs found with the current skills. Try being more descriptive or adjusting filters."
161
+
162
+ return '\n'.join(recommendations)
163
+
164
+ except Exception as e:
165
+ return f"❌ An unexpected error occurred: {str(e)}. Please try again."
166
+
167
+ # --- 3. SETUP AND LAUNCH GRADIO INTERFACE ---
168
+
169
+ # Initialize the system by generating data and training the model
170
+ print("πŸš€ Starting application...")
171
+ jobs_db = generate_job_database()
172
+ ml_system = MLJobRecommendationSystem(jobs_db)
173
+
174
+ # Define the user interface using Gradio
175
+ with gr.Blocks(theme=gr.themes.Soft(), title="AI Job Recommender") as app:
176
+ gr.HTML("""
177
+ <div style="text-align: center; max-width: 800px; margin: auto;">
178
+ <h1>πŸ€– AI-Powered Job Recommendation System</h1>
179
+ <p>This app uses a Machine Learning model (TF-IDF and Cosine Similarity) to find jobs that are semantically similar to your skills, going beyond simple keyword matching.</p>
180
+ </div>
181
+ """)
182
+
183
+ with gr.Row():
184
+ with gr.Column(scale=2):
185
+ skills_input = gr.Textbox(
186
+ label="Enter Your Skills and Experience",
187
+ placeholder="e.g., Python development with flask, data analysis, machine learning models, and aws...",
188
+ lines=4,
189
+ )
190
+
191
+ num_jobs = gr.Slider(
192
+ minimum=5, maximum=20, value=10, step=1, label="Number of Recommendations"
193
+ )
194
+
195
+ with gr.Row():
196
+ category_filter = gr.Dropdown(
197
+ choices=["All Categories"] + sorted(list(ml_system.df['category'].unique())),
198
+ value="All Categories",
199
+ label="Filter by Industry"
200
+ )
201
+
202
+ experience_filter = gr.Dropdown(
203
+ choices=["All Levels"] + sorted(list(ml_system.df['experience_level'].unique())),
204
+ value="All Levels",
205
+ label="Filter by Experience"
206
+ )
207
+
208
+ submit_btn = gr.Button("πŸš€ Get AI-Powered Recommendations", variant="primary")
209
+
210
+ with gr.Column(scale=3):
211
+ output_markdown = gr.Markdown(
212
+ value="### Your personalized job recommendations will appear here.\nEnter your skills and click the button to start! ✨"
213
+ )
214
+
215
+ # Connect the button click to the recommendation function
216
+ submit_btn.click(
217
+ fn=ml_system.recommend_jobs,
218
+ inputs=[skills_input, num_jobs, category_filter, experience_filter],
219
+ outputs=output_markdown
220
+ )
221
+
222
+ # Launch the Gradio app
223
+ if __name__ == "__main__":
224
+ app.launch()