Spaces:

Deepakkori45
/

Resume-Analyzer

Sleeping

App Files Files Community

Resume-Analyzer / database.py

Deepakkori45

Update database.py

4cee71c verified 11 months ago

raw

history blame contribute delete

13.5 kB

	import sqlite3
	import json
	import pandas as pd
	from datetime import datetime

	class ResumeDatabase:
	def __init__(self, db_path='resumes.db'):
	self.db_path = db_path
	self.create_tables()

	def create_tables(self):
	conn = sqlite3.connect(self.db_path)
	c = conn.cursor()

	c.execute('''CREATE TABLE IF NOT EXISTS resumes
	(id INTEGER PRIMARY KEY AUTOINCREMENT,
	name TEXT,
	email TEXT,
	phone TEXT,
	raw_text TEXT,
	analysis_json TEXT,
	created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP)''')

	conn.commit()
	conn.close()

	def save_analysis(self, analysis_result, raw_text):
	conn = sqlite3.connect(self.db_path)
	c = conn.cursor()

	c.execute('''INSERT INTO resumes (name, email, phone, raw_text, analysis_json)
	VALUES (?, ?, ?, ?, ?)''',
	(analysis_result.get('name', 'Not found'),
	analysis_result.get('email', 'Not found'),
	analysis_result.get('phone', 'Not found'),
	raw_text,
	json.dumps(analysis_result)))

	conn.commit()
	conn.close()

	def calculate_score(self, analysis):
	"""Calculate a comprehensive score based on resume analysis"""
	try:
	# Initialize scores
	education_score = 0
	experience_score = 0
	technical_score = 0
	project_score = 0
	impact_score = 0
	role_specific_score = 0

	# Education Score (max 20 points)
	edu_level = str(analysis.get('education_level', '')).lower()
	if edu_level:
	if 'phd' in edu_level or 'doctorate' in edu_level:
	education_score += 20
	elif 'master' in edu_level or 'ms' in edu_level or 'mtech' in edu_level:
	education_score += 18
	elif 'bachelor' in edu_level or 'bs' in edu_level or 'btech' in edu_level:
	education_score += 15
	else:
	education_score += 10

	# Add points for CGPA if available
	cgpa = analysis.get('cgpa', 'Not found')
	if isinstance(cgpa, (int, float)):
	if cgpa >= 3.5: # Assuming 4.0 scale
	education_score = min(20, education_score + 2)

	# Experience Score (max 20 points)
	years_exp = analysis.get('years_experience', 0)
	if isinstance(years_exp, (int, float)):
	experience_score = min(20, years_exp * 4) # 5 years for max score
	elif isinstance(years_exp, str) and years_exp.replace('.', '').isdigit():
	experience_score = min(20, float(years_exp) * 4)

	# Technical Score (max 20 points)
	tech_skills = {
	'programming_languages': analysis.get('programming_languages', []),
	'technical_skills': analysis.get('technical_skills', []),
	'ml_frameworks': analysis.get('ml_frameworks', []),
	'databases': analysis.get('databases', []),
	'cloud_platforms': analysis.get('cloud_platforms', [])
	}

	total_skills = sum(len(skills) for skills in tech_skills.values())
	technical_score = min(20, total_skills * 2)

	# Project Score (max 15 points)
	projects = len(analysis.get('projects', []))
	research_exp = 1 if analysis.get('research_experience') else 0
	publications = len(analysis.get('publications', []))

	project_score = min(15, projects * 2 + research_exp * 3 + publications * 2)

	# Impact Score (max 15 points)
	leadership = 1 if analysis.get('leadership_experience') else 0
	team_size = analysis.get('team_size', 0)
	if isinstance(team_size, str):
	try:
	team_size = int(''.join(filter(str.isdigit, team_size)))
	except:
	team_size = 0

	certifications = len(analysis.get('certifications', []))
	awards = len(analysis.get('awards', []))

	impact_score = min(15, leadership * 5 + min(5, team_size/2) + min(5, certifications * 2 + awards))

	# Role Specific Score (max 10 points)
	ds_skills = len(analysis.get('ml_frameworks', [])) + len(analysis.get('deep_learning', [])) + \
	len(analysis.get('nlp_skills', [])) + len(analysis.get('computer_vision', []))

	de_skills = len(analysis.get('etl_tools', [])) + len(analysis.get('data_warehousing', [])) + \
	len(analysis.get('orchestration_tools', [])) + len(analysis.get('streaming_tech', []))

	role_specific_score = min(10, max(ds_skills, de_skills))

	# Calculate total score
	total_score = education_score + experience_score + technical_score + \
	project_score + impact_score + role_specific_score

	return {
	'total_score': total_score,
	'education_score': education_score,
	'experience_score': experience_score,
	'technical_score': technical_score,
	'project_score': project_score,
	'impact_score': impact_score,
	'role_specific_score': role_specific_score
	}
	except Exception as e:
	print(f"Error calculating score: {str(e)}")
	return {
	'total_score': 0,
	'education_score': 0,
	'experience_score': 0,
	'technical_score': 0,
	'project_score': 0,
	'impact_score': 0,
	'role_specific_score': 0
	}

	def get_statistics(self):
	"""Get statistics of analyzed resumes"""
	conn = sqlite3.connect(self.db_path)
	df = pd.read_sql_query("SELECT analysis_json FROM resumes", conn)
	conn.close()

	if df.empty:
	return {
	'total_resumes': 0,
	'avg_work_experience': 0,
	'education_levels': {},
	'major_distribution': {},
	'top_programming_languages': {},
	'top_technical_skills': {},
	'top_ml_frameworks': {},
	'top_visualization_tools': {},
	'top_databases': {},
	'top_etl_tools': {},
	'top_streaming_tech': {},
	'top_cloud_platforms': {},
	'top_certifications': {},
	'university_distribution': {}
	}

	analyses = [json.loads(x) for x in df['analysis_json']]

	# Calculate statistics
	stats = {
	'total_resumes': len(analyses),
	'avg_work_experience': 0,
	'education_levels': {},
	'major_distribution': {},
	'top_programming_languages': {},
	'top_technical_skills': {},
	'top_ml_frameworks': {},
	'top_visualization_tools': {},
	'top_databases': {},
	'top_etl_tools': {},
	'top_streaming_tech': {},
	'top_cloud_platforms': {},
	'top_certifications': {},
	'university_distribution': {}
	}

	# Calculate averages and distributions
	total_exp = 0
	valid_exp = 0

	for analysis in analyses:
	# Work experience
	exp = analysis.get('years_experience', 0)
	if isinstance(exp, (int, float)) or (isinstance(exp, str) and exp.replace('.', '').isdigit()):
	try:
	exp = float(exp)
	total_exp += exp
	valid_exp += 1
	except:
	pass

	# Education level
	edu = analysis.get('education_level', 'Not specified')
	stats['education_levels'][edu] = stats['education_levels'].get(edu, 0) + 1

	# Major
	major = analysis.get('major', 'Not specified')
	stats['major_distribution'][major] = stats['major_distribution'].get(major, 0) + 1

	# University
	uni = analysis.get('university', 'Not specified')
	stats['university_distribution'][uni] = stats['university_distribution'].get(uni, 0) + 1

	# Technical skills distributions
	for lang in analysis.get('programming_languages', []):
	stats['top_programming_languages'][lang] = stats['top_programming_languages'].get(lang, 0) + 1

	for skill in analysis.get('technical_skills', []):
	stats['top_technical_skills'][skill] = stats['top_technical_skills'].get(skill, 0) + 1

	for framework in analysis.get('ml_frameworks', []):
	stats['top_ml_frameworks'][framework] = stats['top_ml_frameworks'].get(framework, 0) + 1

	for tool in analysis.get('visualization_tools', []):
	stats['top_visualization_tools'][tool] = stats['top_visualization_tools'].get(tool, 0) + 1

	for db in analysis.get('databases', []):
	stats['top_databases'][db] = stats['top_databases'].get(db, 0) + 1

	for tool in analysis.get('etl_tools', []):
	stats['top_etl_tools'][tool] = stats['top_etl_tools'].get(tool, 0) + 1

	for tech in analysis.get('streaming_tech', []):
	stats['top_streaming_tech'][tech] = stats['top_streaming_tech'].get(tech, 0) + 1

	for platform in analysis.get('cloud_platforms', []):
	stats['top_cloud_platforms'][platform] = stats['top_cloud_platforms'].get(platform, 0) + 1

	for cert in analysis.get('certifications', []):
	stats['top_certifications'][cert] = stats['top_certifications'].get(cert, 0) + 1

	# Calculate average work experience
	stats['avg_work_experience'] = total_exp / valid_exp if valid_exp > 0 else 0

	# Sort and limit distributions
	for key in stats:
	if isinstance(stats[key], dict):
	stats[key] = dict(sorted(stats[key].items(), key=lambda x: x[1], reverse=True)[:10])

	return stats

	def get_candidate_rankings(self, role_type='both', min_score=50):
	"""Get ranked list of candidates based on their scores"""
	conn = sqlite3.connect(self.db_path)
	df = pd.read_sql_query("SELECT analysis_json FROM resumes", conn)
	conn.close()

	if df.empty:
	return []

	rankings = []
	for analysis_json in df['analysis_json']:
	analysis = json.loads(analysis_json)
	scores = self.calculate_score(analysis)

	if scores['total_score'] >= min_score:
	candidate = {
	'name': analysis.get('name', 'Not found'),
	'email': analysis.get('email', 'Not found'),
	'years_experience': analysis.get('years_experience', 'Not found'),
	'education_level': analysis.get('education_level', 'Not found'),
	'key_skills': (
	analysis.get('programming_languages', []) +
	analysis.get('technical_skills', [])
	)[:5], # Top 5 skills
	**scores
	}

	# Filter based on role type
	if role_type == 'data_science':
	ds_score = len(analysis.get('ml_frameworks', [])) + \
	len(analysis.get('deep_learning', [])) + \
	len(analysis.get('nlp_skills', [])) + \
	len(analysis.get('computer_vision', []))
	if ds_score > 0:
	rankings.append(candidate)
	elif role_type == 'data_engineering':
	de_score = len(analysis.get('etl_tools', [])) + \
	len(analysis.get('data_warehousing', [])) + \
	len(analysis.get('orchestration_tools', [])) + \
	len(analysis.get('streaming_tech', []))
	if de_score > 0:
	rankings.append(candidate)
	else: # both
	rankings.append(candidate)

	# Sort by total score
	rankings.sort(key=lambda x: x['total_score'], reverse=True)
	return rankings

	def export_to_csv(self):
	"""Export analyses to CSV"""
	conn = sqlite3.connect(self.db_path)
	df = pd.read_sql_query("SELECT * FROM resumes", conn)
	conn.close()

	csv_path = f"resume_analyses_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
	df.to_csv(csv_path, index=False)
	return csv_path

	def export_to_json(self):
	"""Export analyses to JSON"""
	conn = sqlite3.connect(self.db_path)
	df = pd.read_sql_query("SELECT * FROM resumes", conn)
	conn.close()

	json_path = f"resume_analyses_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
	df.to_json(json_path, orient='records')
	return json_path