File size: 4,568 Bytes
ebed33b
655b0dc
9ac3eaa
ebed33b
 
655b0dc
 
 
 
ebed33b
9ac3eaa
ebed33b
 
 
 
 
 
 
 
 
 
9ac3eaa
ebed33b
655b0dc
 
 
 
 
 
 
 
 
 
 
 
ebed33b
9ac3eaa
ebed33b
655b0dc
ebed33b
 
 
 
 
 
 
 
 
 
 
 
ee816fa
ebed33b
ee816fa
ebed33b
 
ee816fa
 
 
 
 
 
a2802bc
ee816fa
 
 
a2802bc
ee816fa
 
 
 
 
 
 
 
 
 
 
ebed33b
 
4209761
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ee816fa
 
 
 
 
 
 
 
 
ebed33b
 
655b0dc
ebed33b
655b0dc
9ac3eaa
655b0dc
ebed33b
655b0dc
ebed33b
655b0dc
ebed33b
655b0dc
ebed33b
 
655b0dc
 
 
 
 
 
 
 
 
08384df
655b0dc
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import re
import logging
from .response import get_response
from pydantic import BaseModel, TypeAdapter
import json
import traceback

# Set up logging
logger = logging.getLogger(__name__)


class Section:
    name: str
    email: str
    phone: str
    skills: str
    experience: str
    education: str
    certifications: str
    areas_of_interest: str


def deep_get(dictionary, keys, default=None):
    logger.debug(f"Accessing deep keys {keys} in dictionary")
    try:
        for key in keys:
            if isinstance(dictionary, dict):
                dictionary = dictionary.get(key, {})
            else:
                logger.warning(f"Could not access key {key}, returning default value")
                return default
        return dictionary if dictionary != {} else default
    except Exception as e:
        logger.error(f"Error in deep_get function: {e}")
        return default


def extract_resume_details(resume: str):
    logger.info("Starting resume details extraction")
    """
    This function processes a given resume text to:
    1. Extract structured data into predefined fields.
    

    Parameters:
        resume (str): The raw text of the resume.

    Returns:
        JSON: A JSON containing the structured data in JSON format.
    """

    system_ins = """Analyze the provided resume and extract structured information as follows:

1. Extract the resume's content into a structured JSON format with these fields:

{
    "structured_data": {
        "name": null,
        "email": null,
        "github": null,
        "phone": null,
        "skills": null,
        "experience": (give each point in list),
        "education": null,
        "certifications": null,
        "areas_of_interest": null,
        "projects": (give each point in list),
        "languages": null,
        "awards_and_achievements": null,
        "volunteer_experience": null,
        "hobbies_and_interests": null,
        "publications": null,
        "conferences_and_presentations": null,
        "patents": null,
        "professional_affiliations": null,
        "portfolio_links": null,
        "summary_or_objective": null
    }
}

    Give in this format for experience, education, project

    Experience {
    title: string;
    company: string;
    start_date: string;
    end_date: string;
    description: list(each point);
    }

    Education {
    institution: string;
    degree: string;
    gpa: string;
    start_date: string;
    end_date: string;
    }

    Project {
    project: string;
    name: string;
    description: list(each points);
    link: string;
    }
Instructions:
- Return the exact JSON structure shown above with the key "structured_data"
- Preserve bullet points and formatting in descriptions where present, if the data is mixed content make it as separate points
- For experience, education, and projects, maintain chronological order (most recent first)
- Use null for missing or unidentifiable fields (not empty strings or empty arrays)
- Format phone numbers consistently if found (e.g., +X-XXX-XXX-XXXX format if possible)
- Keep the content exactly as in the original resume - don't shorten or add anything extra
- Include all relevant text content, preserving the original meaning, details, and dates
- Extract GitHub URLs completely including the repository path if available
"""
    try:
        logger.info("Sending resume to get_response function")
        combined_output = get_response(prompt=resume, task=system_ins)
        logger.debug("Raw response received from get_response")

        logger.info("Attempting to parse response to JSON")
        result = json.loads(combined_output)
        logger.debug("Successfully parsed response to JSON")

        logger.info("Extracting structured data from result")
        structured_data = result["structured_data"]
        logger.info("Resume structured data extraction completed successfully")

        return structured_data
    except json.JSONDecodeError as e:
        error_msg = f"JSON parsing error: {e}"
        logger.error(error_msg)
        logger.debug(f"Failed JSON content: {combined_output}")
        return {"structured_data_error": error_msg}
    except KeyError as e:
        error_msg = f"Missing key in response: {e}"
        logger.error(error_msg)
        return {"structured_data_error": error_msg}
    except Exception as e:
        error_msg = f"Unexpected error in extract_resume_details: {e}"
        logger.error(error_msg)
        logger.debug(traceback.format_exc())
        return {"structured_data_error": error_msg}