SkillSync / complete_code.py
Chaitanya895's picture
Add SkillSync project files to Hugging Face Space
7a65254
# Importing necessary libraries
import mysql.connector
import pandas as pd
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import string
import tkinter as tk
from tkinter import ttk, messagebox
# Download NLTK dependencies
nltk.download('punkt')
nltk.download('stopwords')
# MySQL Database Connection
def connect_db():
return mysql.connector.connect(
host="localhost",
user="root",
password="Chaitu895@", # Change to your actual MySQL password
database="resume_internship_matching"
)
# Fetching data from MySQL
def fetch_data():
conn = connect_db()
cursor = conn.cursor()
# Fetch resumes
cursor.execute("SELECT * FROM resume_info")
resumes = cursor.fetchall()
# Get column names from MySQL tables
cursor.execute("SHOW COLUMNS FROM resume_info")
resume_columns = [column[0] for column in cursor.fetchall()]
# Fetch internships
cursor.execute("SELECT * FROM internship_info")
internships = cursor.fetchall()
cursor.execute("SHOW COLUMNS FROM internship_info")
internship_columns = [column[0] for column in cursor.fetchall()]
resume_df = pd.DataFrame(resumes, columns=resume_columns)
internship_df = pd.DataFrame(internships, columns=internship_columns)
cursor.close()
conn.close()
print(resume_df.head())
print(internship_df.head())
return resume_df, internship_df
# Preprocessing function for skills
def preprocess_skills(skills):
if not isinstance(skills, str) or skills.strip() == '':
return []
tokens = word_tokenize(skills.lower())
tokens = [word for word in tokens if word not in stopwords.words('english') and word not in string.punctuation]
return tokens
# Load data from database
resume_df, internship_df = fetch_data()
# Preprocessing
resume_df.fillna('', inplace=True)
internship_df.fillna('', inplace=True)
resume_df['processed_Skills'] = resume_df['skills'].apply(preprocess_skills)
print("Processed Resume Skills:")
print(resume_df[['processed_Skills']].head())
internship_df['processed_Required_Skills'] = internship_df['skills_required'].apply(preprocess_skills)
print("\nProcessed Internship Skills:")
print(internship_df[['processed_Required_Skills']].head())
# Creating a set of unique skills
all_Skills = resume_df['processed_Skills'].sum() + internship_df['processed_Required_Skills'].sum()
unique_Skills = set(all_Skills)
Skill_to_index = {skill: idx for idx, skill in enumerate(unique_Skills)}
# Converting skills to numerical vectors
def skills_to_vector(skills):
vector = [0] * len(Skill_to_index)
for skill in skills:
if skill in Skill_to_index:
vector[Skill_to_index[skill]] += 1
return vector
resume_df['Skill_vector'] = resume_df['processed_Skills'].apply(skills_to_vector)
internship_df['Required_Skill_vector'] = internship_df['processed_Required_Skills'].apply(skills_to_vector)
# Function to calculate Jaccard similarity
def calculate_similarity(resume_skills, internship_skills):
set_resume_skills = set(resume_skills)
set_internship_skills = set(internship_skills)
intersection = set_resume_skills.intersection(set_internship_skills)
union = set_resume_skills.union(set_internship_skills)
return len(intersection) / len(union) if len(union) != 0 else 0
# Function to match internships
def match_internships(resume):
results = []
for index, internship in internship_df.iterrows():
similarity_score = calculate_similarity(resume['processed_Skills'], internship['processed_Required_Skills'])
if similarity_score > 0: # Include all matches with a similarity score greater than 0
results.append({
'internship_title': internship['role'],
'company': internship['company_name'],
'type_of_internship': internship['type_of_internship'],
'duration': internship['duration'],
'location': internship['location'],
'description': internship['description_of_internship'],
'skills_required': internship['skills_required'],
'salary': internship['expected_salary'],
'start_date': internship['start_date'],
'end_date': internship['end_date'],
'posted_date': internship['posted_date'],
'similarity_score': similarity_score
})
# Sort by highest similarity score and return the top 5 results
results = sorted(results, key=lambda x: x['similarity_score'], reverse=True)[:5]
return results
# Function to display matched internships
def show_results(results, resume_name):
if not results:
messagebox.showinfo("No Matches", f"No internships matched for {resume_name}.")
return
results_window = tk.Toplevel(root)
results_window.title(f"Top 5 Matched Internships for {resume_name}")
results_window.geometry("800x600")
results_window.configure(bg='#fafafa')
tk.Label(results_window, text=f"Top 5 Matched Internships for {resume_name}", font=('Helvetica', 16, 'bold'), bg='#fafafa').pack(pady=10)
for idx, result in enumerate(results, start=1):
tk.Label(results_window, text=f"#{idx}: {result['internship_title']} at {result['company']}", font=('Helvetica', 14), bg='#fafafa').pack(pady=5)
tk.Label(results_window, text=f"Location: {result['location']}", font=('Helvetica', 12), bg='#fafafa').pack(pady=2)
tk.Label(results_window, text=f"Type: {result['type_of_internship']}", font=('Helvetica', 12), bg='#fafafa').pack(pady=2)
tk.Label(results_window, text=f"Skills Required: {result['skills_required']}", font=('Helvetica', 12), wraplength=700, bg='#fafafa').pack(pady=2)
tk.Label(results_window, text=f"Duration: {result['duration']}", font=('Helvetica', 12), bg='#fafafa').pack(pady=2)
tk.Label(results_window, text=f"Salary: {result['salary']}", font=('Helvetica', 12), bg='#fafafa').pack(pady=2)
tk.Label(results_window, text=f"Start Date: {result['start_date']}", font=('Helvetica', 12), bg='#fafafa').pack(pady=2)
tk.Label(results_window, text=f"End Date: {result['end_date']}", font=('Helvetica', 12), bg='#fafafa').pack(pady=2)
tk.Label(results_window, text=f"Posted Date: {result['posted_date']}", font=('Helvetica', 12), bg='#fafafa').pack(pady=2)
tk.Label(results_window, text=f"Probability of Getting the Job: {result['similarity_score'] * 100:.2f}%", font=('Helvetica', 12, 'bold'), bg='#fafafa').pack(pady=5)
tk.Label(results_window, text="-" * 80, font=('Helvetica', 12), bg='#fafafa').pack(pady=5)
tk.Button(results_window, text="Close", command=results_window.destroy, bg='#00796b', fg='white', font=('Helvetica', 12)).pack(pady=20)
# Function to find and match internships
def find_applicant_and_match_internships():
applicant_name = entry_name.get().strip()
if not applicant_name:
messagebox.showwarning("Input Error", "Please enter a valid applicant name.")
return
matching_resume = resume_df[resume_df['name_of_applicant'].str.contains(applicant_name, case=False)]
if matching_resume.empty:
messagebox.showinfo("No Results", f"No resume found for applicant: {applicant_name}")
else:
resume = matching_resume.iloc[0]
matched_internships = match_internships(resume)
show_results(matched_internships, resume['name_of_applicant'])
# Creating the main application window
root = tk.Tk()
root.title("SkillSync - Resume Based Internship Matcher")
root.geometry("800x600")
root.configure(bg='#e0f7fa')
# UI Styling
style = ttk.Style()
style.configure('TButton', font=('Helvetica', 12), padding=10)
style.configure('TLabel', font=('Helvetica', 12), padding=10)
style.configure('TEntry', font=('Helvetica', 12))
# UI Elements
title_label = ttk.Label(root, text="SkillSync - Resume Based Internship Matcher", font=('Helvetica', 24), background='#e0f7fa')
title_label.pack(pady=20)
name_label = ttk.Label(root, text="Enter Applicant Name:", background='#e0f7fa')
name_label.pack(pady=10)
entry_name = ttk.Entry(root, width=40)
entry_name.pack(pady=10)
search_button = ttk.Button(root, text="Find Matching Internships", command=find_applicant_and_match_internships)
search_button.pack(pady=20)
# Run Tkinter main loop
root.mainloop()