Resume-ATS / Process /ats_parser.py
HARISH20205's picture
format
4209761
import re
import logging
from .response import get_response
from pydantic import BaseModel, TypeAdapter
import json
import traceback
# Set up logging
logger = logging.getLogger(__name__)
class Section:
name: str
email: str
phone: str
skills: str
experience: str
education: str
certifications: str
areas_of_interest: str
def deep_get(dictionary, keys, default=None):
logger.debug(f"Accessing deep keys {keys} in dictionary")
try:
for key in keys:
if isinstance(dictionary, dict):
dictionary = dictionary.get(key, {})
else:
logger.warning(f"Could not access key {key}, returning default value")
return default
return dictionary if dictionary != {} else default
except Exception as e:
logger.error(f"Error in deep_get function: {e}")
return default
def extract_resume_details(resume: str):
logger.info("Starting resume details extraction")
"""
This function processes a given resume text to:
1. Extract structured data into predefined fields.
Parameters:
resume (str): The raw text of the resume.
Returns:
JSON: A JSON containing the structured data in JSON format.
"""
system_ins = """Analyze the provided resume and extract structured information as follows:
1. Extract the resume's content into a structured JSON format with these fields:
{
"structured_data": {
"name": null,
"email": null,
"github": null,
"phone": null,
"skills": null,
"experience": (give each point in list),
"education": null,
"certifications": null,
"areas_of_interest": null,
"projects": (give each point in list),
"languages": null,
"awards_and_achievements": null,
"volunteer_experience": null,
"hobbies_and_interests": null,
"publications": null,
"conferences_and_presentations": null,
"patents": null,
"professional_affiliations": null,
"portfolio_links": null,
"summary_or_objective": null
}
}
Give in this format for experience, education, project
Experience {
title: string;
company: string;
start_date: string;
end_date: string;
description: list(each point);
}
Education {
institution: string;
degree: string;
gpa: string;
start_date: string;
end_date: string;
}
Project {
project: string;
name: string;
description: list(each points);
link: string;
}
Instructions:
- Return the exact JSON structure shown above with the key "structured_data"
- Preserve bullet points and formatting in descriptions where present, if the data is mixed content make it as separate points
- For experience, education, and projects, maintain chronological order (most recent first)
- Use null for missing or unidentifiable fields (not empty strings or empty arrays)
- Format phone numbers consistently if found (e.g., +X-XXX-XXX-XXXX format if possible)
- Keep the content exactly as in the original resume - don't shorten or add anything extra
- Include all relevant text content, preserving the original meaning, details, and dates
- Extract GitHub URLs completely including the repository path if available
"""
try:
logger.info("Sending resume to get_response function")
combined_output = get_response(prompt=resume, task=system_ins)
logger.debug("Raw response received from get_response")
logger.info("Attempting to parse response to JSON")
result = json.loads(combined_output)
logger.debug("Successfully parsed response to JSON")
logger.info("Extracting structured data from result")
structured_data = result["structured_data"]
logger.info("Resume structured data extraction completed successfully")
return structured_data
except json.JSONDecodeError as e:
error_msg = f"JSON parsing error: {e}"
logger.error(error_msg)
logger.debug(f"Failed JSON content: {combined_output}")
return {"structured_data_error": error_msg}
except KeyError as e:
error_msg = f"Missing key in response: {e}"
logger.error(error_msg)
return {"structured_data_error": error_msg}
except Exception as e:
error_msg = f"Unexpected error in extract_resume_details: {e}"
logger.error(error_msg)
logger.debug(traceback.format_exc())
return {"structured_data_error": error_msg}