Spaces:
Sleeping
Sleeping
| import json | |
| from typing import List | |
| from langchain_core.pydantic_v1 import BaseModel, Field | |
| from langchain.output_parsers import PydanticOutputParser | |
| from langchain_core.prompts import PromptTemplate | |
| def load_json_file(filename): | |
| try: | |
| with open(filename, 'r', encoding='utf-8') as f: | |
| return json.load(f) | |
| except json.JSONDecodeError as e: | |
| print(f"Error decoding JSON in {filename}: {e}") | |
| return None | |
| cv_structure = load_json_file('cv_structure.json') | |
| cv_sections = load_json_file('cv_sections.json') | |
| class EducationElement(BaseModel): | |
| degree_present: bool = Field(description="Whether the degree is present") | |
| year_present: bool = Field(description="Whether the year is present") | |
| institution_present: bool = Field(description="Whether the institution is present") | |
| score: float = Field(description="Score for this education element", ge=0, le=10) | |
| class Education(BaseModel): | |
| overall_score: float = Field(description="Overall score for the education section", ge=0, le=10) | |
| elements: List[EducationElement] = Field(description="List of education elements") | |
| class WorkExperienceElement(BaseModel): | |
| job_title_present: bool = Field(description="Whether the job title is present") | |
| company_present: bool = Field(description="Whether the company name is present") | |
| dates_present: bool = Field(description="Whether the start and end dates are present") | |
| technologies_present: bool = Field(description="Whether the used technologies are present") | |
| responsibilities_present: bool = Field(description="Whether responsibilities are present") | |
| achievements_present: bool = Field(description="Whether achievements are present") | |
| responsibilities_quality: float = Field(description="Quality of responsibilities description", ge=0, le=10) | |
| achievements_quality: float = Field(description="Quality of achievements description", ge=0, le=10) | |
| score: float = Field(description="Score for this work experience element", ge=0, le=10) | |
| class WorkExperience(BaseModel): | |
| overall_score: float = Field(description="Overall score for the work experience section", ge=0, le=10) | |
| elements: List[WorkExperienceElement] = Field(description="List of work experience elements") | |
| class Profile(BaseModel): | |
| overall_score: float = Field(description="Overall score for the profile section", ge=0, le=10) | |
| brief_overview_present: bool = Field(description="Whether a brief overview is present") | |
| career_goals_present: bool = Field(description="Whether career goals are present") | |
| objective_present: bool = Field(description="Whether an objective is present") | |
| class ResumeQualityEvaluation(BaseModel): | |
| education: Education = Field(description="Evaluation of the education section") | |
| work_experience: WorkExperience = Field(description="Evaluation of the work experience section") | |
| profile: Profile = Field(description="Evaluation of the profile section") | |
| def get_personal_info_prompt(text): | |
| return f"""<s>[INST]Extract the personal information from the following CV text. The text may be in any language. Respond with a JSON object in the format {{"city": {{"extracted city name": true/false}}, "country": {{"extracted country name": true/false}}}}. If you can't find the information, set the value to false. | |
| Text: | |
| {text}[/INST]""" | |
| def get_spelling_grammar_prompt(text): | |
| return f"""<s>[INST]Analyze the following text for spelling and grammar errors. The text may be in any language. Do not correct the errors, just count them. Calculate the percentage of errors. | |
| Text to analyze: | |
| {text} | |
| Respond with a JSON object containing the key 'error_percentage' with the calculated percentage (0-100) of errors.[/INST]""" | |
| def get_section_detection_prompt(text): | |
| if cv_sections is None: | |
| return None | |
| sections_list = ", ".join(cv_sections['sections'].keys()) | |
| return f"""<s>[INST] Analyze this CV text and identify which of the following sections are present: {sections_list}. | |
| A section is considered present if its content is identifiable, even without an explicit title. | |
| Consider synonyms and alternative phrasings for section titles. | |
| Sections to look for: | |
| {sections_list} | |
| CV text: | |
| {text} | |
| Respond with a JSON object with a key "present_sections" containing an array of the identified sections. | |
| Only include sections that are actually present in the CV. [/INST]""" | |
| def get_content_quality_prompt(text): | |
| parser = PydanticOutputParser(pydantic_object=ResumeQualityEvaluation) | |
| prompt = PromptTemplate( | |
| template="""<s>[INST]Evaluate the quality of the following resume sections: | |
| {resume} | |
| Provide a detailed evaluation following this format: | |
| {format_instructions} | |
| For each section, evaluate the presence and quality of required elements: | |
| 1. Education: | |
| - Check for the presence of Degree, Year, and Institution for each education entry | |
| - Provide a score (0-10) for each education entry based on completeness and clarity | |
| 2. Work Experience: | |
| - Check for the presence of Job title, Company, dates, used technologies, Responsibilities, and Achievements for each work experience entry | |
| - Evaluate the quality of Responsibilities description (0-10) | |
| - Evaluate the quality of Achievements description (0-10) | |
| - Provide a score (0-10) for each work experience entry based on completeness, clarity, and the quality of descriptions | |
| 3. Profile: | |
| - Check for the presence of a brief overview, career goals, and objective | |
| - Provide an overall score (0-10) based on the completeness and clarity of the profile | |
| Provide an overall score for each section on a scale of 0-10 based on the presence of elements and their quality where applicable.[/INST]""", | |
| input_variables=["resume"], | |
| partial_variables={"format_instructions": parser.get_format_instructions()} | |
| ) | |
| return prompt.format(resume=text) | |
| def calculate_section_detection_score(detected_sections): | |
| total_score = 0 | |
| for section in detected_sections: | |
| if section in cv_sections['sections']: | |
| total_score += cv_sections['sections'][section] | |
| return total_score | |
| def calculate_overall_score(evaluation: ResumeQualityEvaluation) -> float: | |
| education_weight = 0.3 | |
| work_experience_weight = 0.5 | |
| profile_weight = 0.2 | |
| overall_score = ( | |
| evaluation.education.overall_score * education_weight + | |
| evaluation.work_experience.overall_score * work_experience_weight + | |
| evaluation.profile.overall_score * profile_weight | |
| ) | |
| return round(overall_score, 2) | |
| __all__ = ['ResumeQualityEvaluation', 'get_personal_info_prompt', 'get_spelling_grammar_prompt', | |
| 'get_section_detection_prompt', 'get_content_quality_prompt', | |
| 'calculate_section_detection_score', 'calculate_overall_score'] |