Spaces:

Chaitanya895
/

SkillSync

Sleeping

File size: 8,376 Bytes

7a65254

# Importing necessary libraries
import mysql.connector
import pandas as pd
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import string
import tkinter as tk
from tkinter import ttk, messagebox

# Download NLTK dependencies
nltk.download('punkt')
nltk.download('stopwords')

# MySQL Database Connection
def connect_db():
    return mysql.connector.connect(
        host="localhost",
        user="root",
        password="Chaitu895@",  # Change to your actual MySQL password
        database="resume_internship_matching"
    )

# Fetching data from MySQL
def fetch_data():
    conn = connect_db()
    cursor = conn.cursor()

    # Fetch resumes
    cursor.execute("SELECT * FROM resume_info")
    resumes = cursor.fetchall()
    
    # Get column names from MySQL tables
    cursor.execute("SHOW COLUMNS FROM resume_info")
    resume_columns = [column[0] for column in cursor.fetchall()]
    # Fetch internships
    cursor.execute("SELECT * FROM internship_info")
    internships = cursor.fetchall()
    cursor.execute("SHOW COLUMNS FROM internship_info")
    internship_columns = [column[0] for column in cursor.fetchall()]
    resume_df = pd.DataFrame(resumes, columns=resume_columns)
    internship_df = pd.DataFrame(internships, columns=internship_columns)
    cursor.close()
    conn.close()
    print(resume_df.head())
    print(internship_df.head())
    return resume_df, internship_df

# Preprocessing function for skills
def preprocess_skills(skills):
    if not isinstance(skills, str) or skills.strip() == '':
        return []
    tokens = word_tokenize(skills.lower())
    tokens = [word for word in tokens if word not in stopwords.words('english') and word not in string.punctuation]
    return tokens

# Load data from database
resume_df, internship_df = fetch_data()

# Preprocessing
resume_df.fillna('', inplace=True)
internship_df.fillna('', inplace=True)

resume_df['processed_Skills'] = resume_df['skills'].apply(preprocess_skills)
print("Processed Resume Skills:")
print(resume_df[['processed_Skills']].head())
internship_df['processed_Required_Skills'] = internship_df['skills_required'].apply(preprocess_skills)
print("\nProcessed Internship Skills:")
print(internship_df[['processed_Required_Skills']].head())
# Creating a set of unique skills
all_Skills = resume_df['processed_Skills'].sum() + internship_df['processed_Required_Skills'].sum()
unique_Skills = set(all_Skills)
Skill_to_index = {skill: idx for idx, skill in enumerate(unique_Skills)}

# Converting skills to numerical vectors
def skills_to_vector(skills):
    vector = [0] * len(Skill_to_index)
    for skill in skills:
        if skill in Skill_to_index:
            vector[Skill_to_index[skill]] += 1
    return vector

resume_df['Skill_vector'] = resume_df['processed_Skills'].apply(skills_to_vector)
internship_df['Required_Skill_vector'] = internship_df['processed_Required_Skills'].apply(skills_to_vector)

# Function to calculate Jaccard similarity
def calculate_similarity(resume_skills, internship_skills):
    set_resume_skills = set(resume_skills)
    set_internship_skills = set(internship_skills)
    intersection = set_resume_skills.intersection(set_internship_skills)
    union = set_resume_skills.union(set_internship_skills)
    return len(intersection) / len(union) if len(union) != 0 else 0

# Function to match internships
def match_internships(resume):
    results = []
    for index, internship in internship_df.iterrows():
        similarity_score = calculate_similarity(resume['processed_Skills'], internship['processed_Required_Skills'])
        if similarity_score > 0:  # Include all matches with a similarity score greater than 0
            results.append({
                'internship_title': internship['role'],
                'company': internship['company_name'],
                'type_of_internship': internship['type_of_internship'],
                'duration': internship['duration'],
                'location': internship['location'],
                'description': internship['description_of_internship'],
                'skills_required': internship['skills_required'],
                'salary': internship['expected_salary'],
                'start_date': internship['start_date'],
                'end_date': internship['end_date'],
                'posted_date': internship['posted_date'],
                'similarity_score': similarity_score
            })
    
    # Sort by highest similarity score and return the top 5 results
    results = sorted(results, key=lambda x: x['similarity_score'], reverse=True)[:5]
    return results

# Function to display matched internships
def show_results(results, resume_name):
    if not results:
        messagebox.showinfo("No Matches", f"No internships matched for {resume_name}.")
        return
    
    results_window = tk.Toplevel(root)
    results_window.title(f"Top 5 Matched Internships for {resume_name}")
    results_window.geometry("800x600")
    results_window.configure(bg='#fafafa')

    tk.Label(results_window, text=f"Top 5 Matched Internships for {resume_name}", font=('Helvetica', 16, 'bold'), bg='#fafafa').pack(pady=10)

    for idx, result in enumerate(results, start=1):
        tk.Label(results_window, text=f"#{idx}: {result['internship_title']} at {result['company']}", font=('Helvetica', 14), bg='#fafafa').pack(pady=5)
        tk.Label(results_window, text=f"Location: {result['location']}", font=('Helvetica', 12), bg='#fafafa').pack(pady=2)
        tk.Label(results_window, text=f"Type: {result['type_of_internship']}", font=('Helvetica', 12), bg='#fafafa').pack(pady=2)
        tk.Label(results_window, text=f"Skills Required: {result['skills_required']}", font=('Helvetica', 12), wraplength=700, bg='#fafafa').pack(pady=2)
        tk.Label(results_window, text=f"Duration: {result['duration']}", font=('Helvetica', 12), bg='#fafafa').pack(pady=2)
        tk.Label(results_window, text=f"Salary: {result['salary']}", font=('Helvetica', 12), bg='#fafafa').pack(pady=2)
        tk.Label(results_window, text=f"Start Date: {result['start_date']}", font=('Helvetica', 12), bg='#fafafa').pack(pady=2)
        tk.Label(results_window, text=f"End Date: {result['end_date']}", font=('Helvetica', 12), bg='#fafafa').pack(pady=2)
        tk.Label(results_window, text=f"Posted Date: {result['posted_date']}", font=('Helvetica', 12), bg='#fafafa').pack(pady=2)
        tk.Label(results_window, text=f"Probability of Getting the Job: {result['similarity_score'] * 100:.2f}%", font=('Helvetica', 12, 'bold'), bg='#fafafa').pack(pady=5)
        tk.Label(results_window, text="-" * 80, font=('Helvetica', 12), bg='#fafafa').pack(pady=5)

    tk.Button(results_window, text="Close", command=results_window.destroy, bg='#00796b', fg='white', font=('Helvetica', 12)).pack(pady=20)

# Function to find and match internships
def find_applicant_and_match_internships():
    applicant_name = entry_name.get().strip()
    if not applicant_name:
        messagebox.showwarning("Input Error", "Please enter a valid applicant name.")
        return

    matching_resume = resume_df[resume_df['name_of_applicant'].str.contains(applicant_name, case=False)]
    if matching_resume.empty:
        messagebox.showinfo("No Results", f"No resume found for applicant: {applicant_name}")
    else:
        resume = matching_resume.iloc[0]
        matched_internships = match_internships(resume)
        show_results(matched_internships, resume['name_of_applicant'])

# Creating the main application window
root = tk.Tk()
root.title("SkillSync - Resume Based Internship Matcher")
root.geometry("800x600")
root.configure(bg='#e0f7fa')

# UI Styling
style = ttk.Style()
style.configure('TButton', font=('Helvetica', 12), padding=10)
style.configure('TLabel', font=('Helvetica', 12), padding=10)
style.configure('TEntry', font=('Helvetica', 12))

# UI Elements
title_label = ttk.Label(root, text="SkillSync - Resume Based Internship Matcher", font=('Helvetica', 24), background='#e0f7fa')
title_label.pack(pady=20)

name_label = ttk.Label(root, text="Enter Applicant Name:", background='#e0f7fa')
name_label.pack(pady=10)

entry_name = ttk.Entry(root, width=40)
entry_name.pack(pady=10)

search_button = ttk.Button(root, text="Find Matching Internships", command=find_applicant_and_match_internships)
search_button.pack(pady=20)

# Run Tkinter main loop
root.mainloop()