Spaces:

Chaitanya895
/

SkillSync

Sleeping

App Files Files Community

SkillSync / complete_code.py

Chaitanya895

Add SkillSync project files to Hugging Face Space

7a65254 9 months ago

raw

history blame contribute delete

8.38 kB

	# Importing necessary libraries
	import mysql.connector
	import pandas as pd
	import nltk
	from nltk.tokenize import word_tokenize
	from nltk.corpus import stopwords
	import string
	import tkinter as tk
	from tkinter import ttk, messagebox

	# Download NLTK dependencies
	nltk.download('punkt')
	nltk.download('stopwords')

	# MySQL Database Connection
	def connect_db():
	return mysql.connector.connect(
	host="localhost",
	user="root",
	password="Chaitu895@", # Change to your actual MySQL password
	database="resume_internship_matching"
	)

	# Fetching data from MySQL
	def fetch_data():
	conn = connect_db()
	cursor = conn.cursor()

	# Fetch resumes
	cursor.execute("SELECT * FROM resume_info")
	resumes = cursor.fetchall()

	# Get column names from MySQL tables
	cursor.execute("SHOW COLUMNS FROM resume_info")
	resume_columns = [column[0] for column in cursor.fetchall()]
	# Fetch internships
	cursor.execute("SELECT * FROM internship_info")
	internships = cursor.fetchall()
	cursor.execute("SHOW COLUMNS FROM internship_info")
	internship_columns = [column[0] for column in cursor.fetchall()]
	resume_df = pd.DataFrame(resumes, columns=resume_columns)
	internship_df = pd.DataFrame(internships, columns=internship_columns)
	cursor.close()
	conn.close()
	print(resume_df.head())
	print(internship_df.head())
	return resume_df, internship_df

	# Preprocessing function for skills
	def preprocess_skills(skills):
	if not isinstance(skills, str) or skills.strip() == '':
	return []
	tokens = word_tokenize(skills.lower())
	tokens = [word for word in tokens if word not in stopwords.words('english') and word not in string.punctuation]
	return tokens

	# Load data from database
	resume_df, internship_df = fetch_data()

	# Preprocessing
	resume_df.fillna('', inplace=True)
	internship_df.fillna('', inplace=True)

	resume_df['processed_Skills'] = resume_df['skills'].apply(preprocess_skills)
	print("Processed Resume Skills:")
	print(resume_df[['processed_Skills']].head())
	internship_df['processed_Required_Skills'] = internship_df['skills_required'].apply(preprocess_skills)
	print("\nProcessed Internship Skills:")
	print(internship_df[['processed_Required_Skills']].head())
	# Creating a set of unique skills
	all_Skills = resume_df['processed_Skills'].sum() + internship_df['processed_Required_Skills'].sum()
	unique_Skills = set(all_Skills)
	Skill_to_index = {skill: idx for idx, skill in enumerate(unique_Skills)}

	# Converting skills to numerical vectors
	def skills_to_vector(skills):
	vector = [0] * len(Skill_to_index)
	for skill in skills:
	if skill in Skill_to_index:
	vector[Skill_to_index[skill]] += 1
	return vector

	resume_df['Skill_vector'] = resume_df['processed_Skills'].apply(skills_to_vector)
	internship_df['Required_Skill_vector'] = internship_df['processed_Required_Skills'].apply(skills_to_vector)

	# Function to calculate Jaccard similarity
	def calculate_similarity(resume_skills, internship_skills):
	set_resume_skills = set(resume_skills)
	set_internship_skills = set(internship_skills)
	intersection = set_resume_skills.intersection(set_internship_skills)
	union = set_resume_skills.union(set_internship_skills)
	return len(intersection) / len(union) if len(union) != 0 else 0

	# Function to match internships
	def match_internships(resume):
	results = []
	for index, internship in internship_df.iterrows():
	similarity_score = calculate_similarity(resume['processed_Skills'], internship['processed_Required_Skills'])
	if similarity_score > 0: # Include all matches with a similarity score greater than 0
	results.append({
	'internship_title': internship['role'],
	'company': internship['company_name'],
	'type_of_internship': internship['type_of_internship'],
	'duration': internship['duration'],
	'location': internship['location'],
	'description': internship['description_of_internship'],
	'skills_required': internship['skills_required'],
	'salary': internship['expected_salary'],
	'start_date': internship['start_date'],
	'end_date': internship['end_date'],
	'posted_date': internship['posted_date'],
	'similarity_score': similarity_score
	})

	# Sort by highest similarity score and return the top 5 results
	results = sorted(results, key=lambda x: x['similarity_score'], reverse=True)[:5]
	return results

	# Function to display matched internships
	def show_results(results, resume_name):
	if not results:
	messagebox.showinfo("No Matches", f"No internships matched for {resume_name}.")
	return

	results_window = tk.Toplevel(root)
	results_window.title(f"Top 5 Matched Internships for {resume_name}")
	results_window.geometry("800x600")
	results_window.configure(bg='#fafafa')

	tk.Label(results_window, text=f"Top 5 Matched Internships for {resume_name}", font=('Helvetica', 16, 'bold'), bg='#fafafa').pack(pady=10)

	for idx, result in enumerate(results, start=1):
	tk.Label(results_window, text=f"#{idx}: {result['internship_title']} at {result['company']}", font=('Helvetica', 14), bg='#fafafa').pack(pady=5)
	tk.Label(results_window, text=f"Location: {result['location']}", font=('Helvetica', 12), bg='#fafafa').pack(pady=2)
	tk.Label(results_window, text=f"Type: {result['type_of_internship']}", font=('Helvetica', 12), bg='#fafafa').pack(pady=2)
	tk.Label(results_window, text=f"Skills Required: {result['skills_required']}", font=('Helvetica', 12), wraplength=700, bg='#fafafa').pack(pady=2)
	tk.Label(results_window, text=f"Duration: {result['duration']}", font=('Helvetica', 12), bg='#fafafa').pack(pady=2)
	tk.Label(results_window, text=f"Salary: {result['salary']}", font=('Helvetica', 12), bg='#fafafa').pack(pady=2)
	tk.Label(results_window, text=f"Start Date: {result['start_date']}", font=('Helvetica', 12), bg='#fafafa').pack(pady=2)
	tk.Label(results_window, text=f"End Date: {result['end_date']}", font=('Helvetica', 12), bg='#fafafa').pack(pady=2)
	tk.Label(results_window, text=f"Posted Date: {result['posted_date']}", font=('Helvetica', 12), bg='#fafafa').pack(pady=2)
	tk.Label(results_window, text=f"Probability of Getting the Job: {result['similarity_score'] * 100:.2f}%", font=('Helvetica', 12, 'bold'), bg='#fafafa').pack(pady=5)
	tk.Label(results_window, text="-" * 80, font=('Helvetica', 12), bg='#fafafa').pack(pady=5)

	tk.Button(results_window, text="Close", command=results_window.destroy, bg='#00796b', fg='white', font=('Helvetica', 12)).pack(pady=20)

	# Function to find and match internships
	def find_applicant_and_match_internships():
	applicant_name = entry_name.get().strip()
	if not applicant_name:
	messagebox.showwarning("Input Error", "Please enter a valid applicant name.")
	return

	matching_resume = resume_df[resume_df['name_of_applicant'].str.contains(applicant_name, case=False)]
	if matching_resume.empty:
	messagebox.showinfo("No Results", f"No resume found for applicant: {applicant_name}")
	else:
	resume = matching_resume.iloc[0]
	matched_internships = match_internships(resume)
	show_results(matched_internships, resume['name_of_applicant'])

	# Creating the main application window
	root = tk.Tk()
	root.title("SkillSync - Resume Based Internship Matcher")
	root.geometry("800x600")
	root.configure(bg='#e0f7fa')

	# UI Styling
	style = ttk.Style()
	style.configure('TButton', font=('Helvetica', 12), padding=10)
	style.configure('TLabel', font=('Helvetica', 12), padding=10)
	style.configure('TEntry', font=('Helvetica', 12))

	# UI Elements
	title_label = ttk.Label(root, text="SkillSync - Resume Based Internship Matcher", font=('Helvetica', 24), background='#e0f7fa')
	title_label.pack(pady=20)

	name_label = ttk.Label(root, text="Enter Applicant Name:", background='#e0f7fa')
	name_label.pack(pady=10)

	entry_name = ttk.Entry(root, width=40)
	entry_name.pack(pady=10)

	search_button = ttk.Button(root, text="Find Matching Internships", command=find_applicant_and_match_internships)
	search_button.pack(pady=20)

	# Run Tkinter main loop
	root.mainloop()