Spaces:
Sleeping
Sleeping
File size: 4,568 Bytes
ebed33b 655b0dc 9ac3eaa ebed33b 655b0dc ebed33b 9ac3eaa ebed33b 9ac3eaa ebed33b 655b0dc ebed33b 9ac3eaa ebed33b 655b0dc ebed33b ee816fa ebed33b ee816fa ebed33b ee816fa a2802bc ee816fa a2802bc ee816fa ebed33b 4209761 ee816fa ebed33b 655b0dc ebed33b 655b0dc 9ac3eaa 655b0dc ebed33b 655b0dc ebed33b 655b0dc ebed33b 655b0dc ebed33b 655b0dc 08384df 655b0dc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
import re
import logging
from .response import get_response
from pydantic import BaseModel, TypeAdapter
import json
import traceback
# Set up logging
logger = logging.getLogger(__name__)
class Section:
name: str
email: str
phone: str
skills: str
experience: str
education: str
certifications: str
areas_of_interest: str
def deep_get(dictionary, keys, default=None):
logger.debug(f"Accessing deep keys {keys} in dictionary")
try:
for key in keys:
if isinstance(dictionary, dict):
dictionary = dictionary.get(key, {})
else:
logger.warning(f"Could not access key {key}, returning default value")
return default
return dictionary if dictionary != {} else default
except Exception as e:
logger.error(f"Error in deep_get function: {e}")
return default
def extract_resume_details(resume: str):
logger.info("Starting resume details extraction")
"""
This function processes a given resume text to:
1. Extract structured data into predefined fields.
Parameters:
resume (str): The raw text of the resume.
Returns:
JSON: A JSON containing the structured data in JSON format.
"""
system_ins = """Analyze the provided resume and extract structured information as follows:
1. Extract the resume's content into a structured JSON format with these fields:
{
"structured_data": {
"name": null,
"email": null,
"github": null,
"phone": null,
"skills": null,
"experience": (give each point in list),
"education": null,
"certifications": null,
"areas_of_interest": null,
"projects": (give each point in list),
"languages": null,
"awards_and_achievements": null,
"volunteer_experience": null,
"hobbies_and_interests": null,
"publications": null,
"conferences_and_presentations": null,
"patents": null,
"professional_affiliations": null,
"portfolio_links": null,
"summary_or_objective": null
}
}
Give in this format for experience, education, project
Experience {
title: string;
company: string;
start_date: string;
end_date: string;
description: list(each point);
}
Education {
institution: string;
degree: string;
gpa: string;
start_date: string;
end_date: string;
}
Project {
project: string;
name: string;
description: list(each points);
link: string;
}
Instructions:
- Return the exact JSON structure shown above with the key "structured_data"
- Preserve bullet points and formatting in descriptions where present, if the data is mixed content make it as separate points
- For experience, education, and projects, maintain chronological order (most recent first)
- Use null for missing or unidentifiable fields (not empty strings or empty arrays)
- Format phone numbers consistently if found (e.g., +X-XXX-XXX-XXXX format if possible)
- Keep the content exactly as in the original resume - don't shorten or add anything extra
- Include all relevant text content, preserving the original meaning, details, and dates
- Extract GitHub URLs completely including the repository path if available
"""
try:
logger.info("Sending resume to get_response function")
combined_output = get_response(prompt=resume, task=system_ins)
logger.debug("Raw response received from get_response")
logger.info("Attempting to parse response to JSON")
result = json.loads(combined_output)
logger.debug("Successfully parsed response to JSON")
logger.info("Extracting structured data from result")
structured_data = result["structured_data"]
logger.info("Resume structured data extraction completed successfully")
return structured_data
except json.JSONDecodeError as e:
error_msg = f"JSON parsing error: {e}"
logger.error(error_msg)
logger.debug(f"Failed JSON content: {combined_output}")
return {"structured_data_error": error_msg}
except KeyError as e:
error_msg = f"Missing key in response: {e}"
logger.error(error_msg)
return {"structured_data_error": error_msg}
except Exception as e:
error_msg = f"Unexpected error in extract_resume_details: {e}"
logger.error(error_msg)
logger.debug(traceback.format_exc())
return {"structured_data_error": error_msg}
|