Spaces:
Build error
Build error
| from PyPDF2 import PdfReader | |
| from agents.agents import get_agent_groq | |
| import json | |
| import re | |
| import time | |
| from agents import prompts | |
| def parse_resume(path): | |
| loader = PdfReader(path) | |
| text='' | |
| print(len(loader.pages)) | |
| for i in range(len(loader.pages)): | |
| text+= loader.pages[i].extract_text() | |
| return text | |
| def parse_resumes(resumes_list): | |
| resumes_text=[] | |
| for resume in resumes_list: | |
| loader = PdfReader(resume) | |
| text='' | |
| #print(len(loader.pages)) | |
| for i in range(len(loader.pages)): | |
| text+= loader.pages[i].extract_text() | |
| resumes_text.append(text) | |
| return resumes_text | |
| def parse_(resumes_list): | |
| resumes_text=[] | |
| for resume in resumes_list: | |
| text=parse_resume(resume) | |
| resumes_text.append(text) | |
| return resumes_text | |
| from typing_extensions import Annotated, TypedDict, Optional | |
| # Define TypedDict for structured output | |
| class ResumeAnalysis(TypedDict): | |
| candidate_name: Annotated[str, ..., "Name of the candidate with the highest score"] | |
| overall_match_score: Annotated[int, ..., "sum of scores for skills_keywords_score, experience_score, education_certifications_score, and preferred_qualifications_score (Whole Number)"] | |
| skills_keywords_score: Annotated[int, ..., "Score for Skills and Keywords (0-40)"] | |
| skills_keywords_explanation: Annotated[str, ..., "Explanation for Skills and Keywords"] | |
| experience_score: Annotated[int, ..., "Score for Experience (0-30)"] | |
| experience_explanation: Annotated[str, ..., "Explanation for Experience"] | |
| education_certifications_score: Annotated[int, ..., "Score for Education & Certifications (0-20)"] | |
| education_certifications_explanation: Annotated[str, ..., "Explanation for Education & Certifications"] | |
| preferred_qualifications_score: Annotated[int, ..., "Score for Preferred Qualifications (0-10)"] | |
| preferred_qualifications_explanation: Annotated[str, ..., "Explanation for Preferred Qualifications"] | |
| score_interpretation: Annotated[str, ..., "donot mention any numbers here, just Interpretation in words of the overall_match_score"] | |
| # Use structured output with the LLM | |
| def generate_analysis_new(resume_text, job_listing_text, job_title_text, must_have, prompt_template): | |
| # Send the structured prompt to the agent and expect a structured response | |
| agent = get_agent_groq().with_structured_output(ResumeAnalysis) | |
| # using structured output LLM | |
| response = agent.invoke( | |
| prompt_template.format( | |
| resume=resume_text, | |
| job_listing=job_listing_text, | |
| job_title_text=job_title_text, | |
| must_have=must_have | |
| ) | |
| ) | |
| response['overall_match_score']=response['skills_keywords_score']+response['education_certifications_score']+response['experience_score']+response['preferred_qualifications_score'] | |
| print(response) | |
| return response # response is already structured as per ResumeAnalysis | |
| def generate_analysis(resume_text, job_listing_text,job_title_text, must_have,prompt_template): | |
| agent = get_agent_groq() | |
| resp = agent.invoke(prompt_template.format(resume=resume_text, job_listing=job_listing_text,job_title_text=job_title_text,must_have=must_have)) | |
| #print('response of agent',resp) | |
| text_res=extract(resp.content) | |
| #text_res=extract(text_res) | |
| #chain = prompt | agent | |
| #print(text_res) | |
| #text = resp.content | |
| return text_res | |
| def generate_sel_analysis(resume_text, job_listing_text,job_title_text, must_have,prompt_template): | |
| prompt_templates = prompts.prompt_template_modern | |
| generate_individual_analysis(resume_text, job_listing_text,job_title_text, must_have,prompt_templates) | |
| #chain = prompt | agent | |
| agent = get_agent_groq() | |
| response = agent.invoke(prompt_template.format(resume=resume_text, job_listing=job_listing_text,job_title_text=job_title_text,must_have=must_have)) | |
| #print(response.content) | |
| text_res=extract_sel(response.content) | |
| #print(text_res) | |
| return text_res | |
| # Analyzing each resume individually and handling delays to avoid token limits | |
| def generate_individual_analysis(resumes, job_listing_text, job_title_text, must_have, prompt_template, delay=20): | |
| #agent = get_agent_groq() | |
| all_results = [] | |
| for resume_text in resumes: | |
| structured_response= generate_analysis_new(resume_text, job_listing_text, job_title_text, must_have, prompt_template) | |
| #agent = get_agent_groq().with_structured_output(ResumeAnalysis) | |
| # print(response) | |
| if structured_response: | |
| all_results.append(structured_response) | |
| # Adding delay to avoid the 6000 tokens per minute limit | |
| time.sleep(delay) | |
| # Sorting results by match score (or any other criteria you prefer) | |
| best_match = max(all_results, key=lambda x: x.get("overall_match_score", 0)) | |
| print('best_match',best_match) | |
| print('all_results',all_results) | |
| return all_results | |
| def extract(content): | |
| json_pattern = r'```\n(.*?)\n```' | |
| json_string = re.search(json_pattern, content, re.DOTALL).group(1) | |
| # Load the extracted JSON string into a dictionary | |
| data = json.loads(json_string) | |
| new={} | |
| # Print the extracted variables and their values | |
| for key, value in data.items(): | |
| print(f"{key}: {value}") | |
| new[key]=value | |
| return new | |
| def extract_mist(json_string): | |
| # Load the extracted JSON string into a dictionary | |
| data = json.loads(json_string) | |
| new={} | |
| # Print the extracted variables and their values | |
| for key, value in data.items(): | |
| print(f"{key}: {value}") | |
| new[key]=value | |
| return new | |
| def extract_sel(content): | |
| try: | |
| # Split the content by identifying each candidate section using the candidate names (bolded) | |
| candidates = re.split(r'\*\*(.*?)\*\*', content) # Split on the pattern of bolded names | |
| # The split result will have alternating candidate names and JSON sections | |
| candidate_json_list = [] | |
| for i in range(1, len(candidates), 2): # Iterate over candidate name and their JSON parts | |
| candidate_name = candidates[i].strip() # Candidate name | |
| json_string = candidates[i+1].strip() # JSON string part | |
| # Load the JSON string into a dictionary | |
| candidate_data = json.loads(json_string) | |
| candidate_json_list.append(candidate_data) | |
| return candidate_json_list | |
| except json.JSONDecodeError as e: | |
| print(f"Error decoding JSON: {e}") | |
| return [] | |
| def generate_adv(job_listing_text,job_title_text, prompt_template): | |
| # if model_selection=="Groq": | |
| agent = get_agent_groq() | |
| resp = agent.invoke(prompt_template.format(job_listing=job_listing_text,job_title_text=job_title_text)) | |
| text = resp.content | |
| print(text) | |
| return text |