Spaces:
Sleeping
Sleeping
File size: 8,376 Bytes
7a65254 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 |
# Importing necessary libraries
import mysql.connector
import pandas as pd
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import string
import tkinter as tk
from tkinter import ttk, messagebox
# Download NLTK dependencies
nltk.download('punkt')
nltk.download('stopwords')
# MySQL Database Connection
def connect_db():
return mysql.connector.connect(
host="localhost",
user="root",
password="Chaitu895@", # Change to your actual MySQL password
database="resume_internship_matching"
)
# Fetching data from MySQL
def fetch_data():
conn = connect_db()
cursor = conn.cursor()
# Fetch resumes
cursor.execute("SELECT * FROM resume_info")
resumes = cursor.fetchall()
# Get column names from MySQL tables
cursor.execute("SHOW COLUMNS FROM resume_info")
resume_columns = [column[0] for column in cursor.fetchall()]
# Fetch internships
cursor.execute("SELECT * FROM internship_info")
internships = cursor.fetchall()
cursor.execute("SHOW COLUMNS FROM internship_info")
internship_columns = [column[0] for column in cursor.fetchall()]
resume_df = pd.DataFrame(resumes, columns=resume_columns)
internship_df = pd.DataFrame(internships, columns=internship_columns)
cursor.close()
conn.close()
print(resume_df.head())
print(internship_df.head())
return resume_df, internship_df
# Preprocessing function for skills
def preprocess_skills(skills):
if not isinstance(skills, str) or skills.strip() == '':
return []
tokens = word_tokenize(skills.lower())
tokens = [word for word in tokens if word not in stopwords.words('english') and word not in string.punctuation]
return tokens
# Load data from database
resume_df, internship_df = fetch_data()
# Preprocessing
resume_df.fillna('', inplace=True)
internship_df.fillna('', inplace=True)
resume_df['processed_Skills'] = resume_df['skills'].apply(preprocess_skills)
print("Processed Resume Skills:")
print(resume_df[['processed_Skills']].head())
internship_df['processed_Required_Skills'] = internship_df['skills_required'].apply(preprocess_skills)
print("\nProcessed Internship Skills:")
print(internship_df[['processed_Required_Skills']].head())
# Creating a set of unique skills
all_Skills = resume_df['processed_Skills'].sum() + internship_df['processed_Required_Skills'].sum()
unique_Skills = set(all_Skills)
Skill_to_index = {skill: idx for idx, skill in enumerate(unique_Skills)}
# Converting skills to numerical vectors
def skills_to_vector(skills):
vector = [0] * len(Skill_to_index)
for skill in skills:
if skill in Skill_to_index:
vector[Skill_to_index[skill]] += 1
return vector
resume_df['Skill_vector'] = resume_df['processed_Skills'].apply(skills_to_vector)
internship_df['Required_Skill_vector'] = internship_df['processed_Required_Skills'].apply(skills_to_vector)
# Function to calculate Jaccard similarity
def calculate_similarity(resume_skills, internship_skills):
set_resume_skills = set(resume_skills)
set_internship_skills = set(internship_skills)
intersection = set_resume_skills.intersection(set_internship_skills)
union = set_resume_skills.union(set_internship_skills)
return len(intersection) / len(union) if len(union) != 0 else 0
# Function to match internships
def match_internships(resume):
results = []
for index, internship in internship_df.iterrows():
similarity_score = calculate_similarity(resume['processed_Skills'], internship['processed_Required_Skills'])
if similarity_score > 0: # Include all matches with a similarity score greater than 0
results.append({
'internship_title': internship['role'],
'company': internship['company_name'],
'type_of_internship': internship['type_of_internship'],
'duration': internship['duration'],
'location': internship['location'],
'description': internship['description_of_internship'],
'skills_required': internship['skills_required'],
'salary': internship['expected_salary'],
'start_date': internship['start_date'],
'end_date': internship['end_date'],
'posted_date': internship['posted_date'],
'similarity_score': similarity_score
})
# Sort by highest similarity score and return the top 5 results
results = sorted(results, key=lambda x: x['similarity_score'], reverse=True)[:5]
return results
# Function to display matched internships
def show_results(results, resume_name):
if not results:
messagebox.showinfo("No Matches", f"No internships matched for {resume_name}.")
return
results_window = tk.Toplevel(root)
results_window.title(f"Top 5 Matched Internships for {resume_name}")
results_window.geometry("800x600")
results_window.configure(bg='#fafafa')
tk.Label(results_window, text=f"Top 5 Matched Internships for {resume_name}", font=('Helvetica', 16, 'bold'), bg='#fafafa').pack(pady=10)
for idx, result in enumerate(results, start=1):
tk.Label(results_window, text=f"#{idx}: {result['internship_title']} at {result['company']}", font=('Helvetica', 14), bg='#fafafa').pack(pady=5)
tk.Label(results_window, text=f"Location: {result['location']}", font=('Helvetica', 12), bg='#fafafa').pack(pady=2)
tk.Label(results_window, text=f"Type: {result['type_of_internship']}", font=('Helvetica', 12), bg='#fafafa').pack(pady=2)
tk.Label(results_window, text=f"Skills Required: {result['skills_required']}", font=('Helvetica', 12), wraplength=700, bg='#fafafa').pack(pady=2)
tk.Label(results_window, text=f"Duration: {result['duration']}", font=('Helvetica', 12), bg='#fafafa').pack(pady=2)
tk.Label(results_window, text=f"Salary: {result['salary']}", font=('Helvetica', 12), bg='#fafafa').pack(pady=2)
tk.Label(results_window, text=f"Start Date: {result['start_date']}", font=('Helvetica', 12), bg='#fafafa').pack(pady=2)
tk.Label(results_window, text=f"End Date: {result['end_date']}", font=('Helvetica', 12), bg='#fafafa').pack(pady=2)
tk.Label(results_window, text=f"Posted Date: {result['posted_date']}", font=('Helvetica', 12), bg='#fafafa').pack(pady=2)
tk.Label(results_window, text=f"Probability of Getting the Job: {result['similarity_score'] * 100:.2f}%", font=('Helvetica', 12, 'bold'), bg='#fafafa').pack(pady=5)
tk.Label(results_window, text="-" * 80, font=('Helvetica', 12), bg='#fafafa').pack(pady=5)
tk.Button(results_window, text="Close", command=results_window.destroy, bg='#00796b', fg='white', font=('Helvetica', 12)).pack(pady=20)
# Function to find and match internships
def find_applicant_and_match_internships():
applicant_name = entry_name.get().strip()
if not applicant_name:
messagebox.showwarning("Input Error", "Please enter a valid applicant name.")
return
matching_resume = resume_df[resume_df['name_of_applicant'].str.contains(applicant_name, case=False)]
if matching_resume.empty:
messagebox.showinfo("No Results", f"No resume found for applicant: {applicant_name}")
else:
resume = matching_resume.iloc[0]
matched_internships = match_internships(resume)
show_results(matched_internships, resume['name_of_applicant'])
# Creating the main application window
root = tk.Tk()
root.title("SkillSync - Resume Based Internship Matcher")
root.geometry("800x600")
root.configure(bg='#e0f7fa')
# UI Styling
style = ttk.Style()
style.configure('TButton', font=('Helvetica', 12), padding=10)
style.configure('TLabel', font=('Helvetica', 12), padding=10)
style.configure('TEntry', font=('Helvetica', 12))
# UI Elements
title_label = ttk.Label(root, text="SkillSync - Resume Based Internship Matcher", font=('Helvetica', 24), background='#e0f7fa')
title_label.pack(pady=20)
name_label = ttk.Label(root, text="Enter Applicant Name:", background='#e0f7fa')
name_label.pack(pady=10)
entry_name = ttk.Entry(root, width=40)
entry_name.pack(pady=10)
search_button = ttk.Button(root, text="Find Matching Internships", command=find_applicant_and_match_internships)
search_button.pack(pady=20)
# Run Tkinter main loop
root.mainloop()
|