Spaces:

datasciencedojo
/

SmartHire-Assistant

Build error

App Files Files Community

SmartHire-Assistant / utils /utils.py

datasciencedojo

Update utils/utils.py

eb7a184 verified about 1 year ago

raw

history blame contribute delete

6.84 kB

	from PyPDF2 import PdfReader
	from agents.agents import get_agent_groq
	import json
	import re
	import time
	from agents import prompts


	def parse_resume(path):
	loader = PdfReader(path)
	text=''
	print(len(loader.pages))
	for i in range(len(loader.pages)):
	text+= loader.pages[i].extract_text()
	return text
	def parse_resumes(resumes_list):
	resumes_text=[]
	for resume in resumes_list:
	loader = PdfReader(resume)
	text=''
	#print(len(loader.pages))
	for i in range(len(loader.pages)):
	text+= loader.pages[i].extract_text()
	resumes_text.append(text)
	return resumes_text

	def parse_(resumes_list):
	resumes_text=[]
	for resume in resumes_list:
	text=parse_resume(resume)
	resumes_text.append(text)
	return resumes_text


	from typing_extensions import Annotated, TypedDict, Optional

	# Define TypedDict for structured output
	class ResumeAnalysis(TypedDict):
	candidate_name: Annotated[str, ..., "Name of the candidate with the highest score"]
	overall_match_score: Annotated[int, ..., "sum of scores for skills_keywords_score, experience_score, education_certifications_score, and preferred_qualifications_score (Whole Number)"]
	skills_keywords_score: Annotated[int, ..., "Score for Skills and Keywords (0-40)"]
	skills_keywords_explanation: Annotated[str, ..., "Explanation for Skills and Keywords"]
	experience_score: Annotated[int, ..., "Score for Experience (0-30)"]
	experience_explanation: Annotated[str, ..., "Explanation for Experience"]
	education_certifications_score: Annotated[int, ..., "Score for Education & Certifications (0-20)"]
	education_certifications_explanation: Annotated[str, ..., "Explanation for Education & Certifications"]
	preferred_qualifications_score: Annotated[int, ..., "Score for Preferred Qualifications (0-10)"]
	preferred_qualifications_explanation: Annotated[str, ..., "Explanation for Preferred Qualifications"]
	score_interpretation: Annotated[str, ..., "donot mention any numbers here, just Interpretation in words of the overall_match_score"]

	# Use structured output with the LLM

	def generate_analysis_new(resume_text, job_listing_text, job_title_text, must_have, prompt_template):
	# Send the structured prompt to the agent and expect a structured response
	agent = get_agent_groq().with_structured_output(ResumeAnalysis)
	# using structured output LLM
	response = agent.invoke(
	prompt_template.format(
	resume=resume_text,
	job_listing=job_listing_text,
	job_title_text=job_title_text,
	must_have=must_have
	)
	)
	response['overall_match_score']=response['skills_keywords_score']+response['education_certifications_score']+response['experience_score']+response['preferred_qualifications_score']
	print(response)
	return response # response is already structured as per ResumeAnalysis

	def generate_analysis(resume_text, job_listing_text,job_title_text, must_have,prompt_template):
	agent = get_agent_groq()
	resp = agent.invoke(prompt_template.format(resume=resume_text, job_listing=job_listing_text,job_title_text=job_title_text,must_have=must_have))
	#print('response of agent',resp)
	text_res=extract(resp.content)
	#text_res=extract(text_res)
	#chain = prompt \| agent
	#print(text_res)
	#text = resp.content
	return text_res

	def generate_sel_analysis(resume_text, job_listing_text,job_title_text, must_have,prompt_template):
	prompt_templates = prompts.prompt_template_modern
	generate_individual_analysis(resume_text, job_listing_text,job_title_text, must_have,prompt_templates)
	#chain = prompt \| agent
	agent = get_agent_groq()
	response = agent.invoke(prompt_template.format(resume=resume_text, job_listing=job_listing_text,job_title_text=job_title_text,must_have=must_have))
	#print(response.content)
	text_res=extract_sel(response.content)
	#print(text_res)
	return text_res


	# Analyzing each resume individually and handling delays to avoid token limits
	def generate_individual_analysis(resumes, job_listing_text, job_title_text, must_have, prompt_template, delay=20):
	#agent = get_agent_groq()
	all_results = []

	for resume_text in resumes:
	structured_response= generate_analysis_new(resume_text, job_listing_text, job_title_text, must_have, prompt_template)
	#agent = get_agent_groq().with_structured_output(ResumeAnalysis)
	# print(response)
	if structured_response:
	all_results.append(structured_response)

	# Adding delay to avoid the 6000 tokens per minute limit
	time.sleep(delay)

	# Sorting results by match score (or any other criteria you prefer)
	best_match = max(all_results, key=lambda x: x.get("overall_match_score", 0))
	print('best_match',best_match)
	print('all_results',all_results)
	return all_results

	def extract(content):

	json_pattern = r'```\n(.*?)\n```'
	json_string = re.search(json_pattern, content, re.DOTALL).group(1)

	# Load the extracted JSON string into a dictionary
	data = json.loads(json_string)
	new={}
	# Print the extracted variables and their values
	for key, value in data.items():
	print(f"{key}: {value}")
	new[key]=value
	return new
	def extract_mist(json_string):
	# Load the extracted JSON string into a dictionary
	data = json.loads(json_string)
	new={}
	# Print the extracted variables and their values
	for key, value in data.items():
	print(f"{key}: {value}")
	new[key]=value
	return new


	def extract_sel(content):
	try:
	# Split the content by identifying each candidate section using the candidate names (bolded)
	candidates = re.split(r'\\(.?)\\*', content) # Split on the pattern of bolded names

	# The split result will have alternating candidate names and JSON sections
	candidate_json_list = []

	for i in range(1, len(candidates), 2): # Iterate over candidate name and their JSON parts
	candidate_name = candidates[i].strip() # Candidate name
	json_string = candidates[i+1].strip() # JSON string part

	# Load the JSON string into a dictionary
	candidate_data = json.loads(json_string)
	candidate_json_list.append(candidate_data)

	return candidate_json_list

	except json.JSONDecodeError as e:
	print(f"Error decoding JSON: {e}")
	return []

	def generate_adv(job_listing_text,job_title_text, prompt_template):
	# if model_selection=="Groq":
	agent = get_agent_groq()
	resp = agent.invoke(prompt_template.format(job_listing=job_listing_text,job_title_text=job_title_text))
	text = resp.content
	print(text)
	return text