File size: 7,676 Bytes
123646a
 
 
 
 
 
b5f8089
 
123646a
b5f8089
 
 
 
 
 
 
 
123646a
b5f8089
 
 
 
 
 
 
123646a
b5f8089
 
 
 
 
 
 
123646a
b5f8089
123646a
b5f8089
123646a
b5f8089
 
123646a
b5f8089
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3027706
b5f8089
 
 
3027706
b5f8089
 
123646a
b5f8089
3027706
b5f8089
3027706
b5f8089
 
 
 
 
 
3027706
b5f8089
 
3027706
b5f8089
 
 
 
 
3027706
123646a
3027706
 
b5f8089
6ff26ad
b5f8089
 
3027706
b5f8089
3027706
b5f8089
3027706
 
 
b5f8089
 
6ff26ad
 
b5f8089
3027706
6ff26ad
3027706
b5f8089
 
 
 
6ff26ad
 
 
 
b5f8089
 
 
6ff26ad
 
b5f8089
123646a
6ff26ad
 
 
 
 
 
 
 
123646a
6ff26ad
 
 
 
 
b5f8089
 
 
123646a
b5f8089
123646a
b5f8089
 
6ff26ad
123646a
b5f8089
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6ff26ad
 
 
 
 
 
 
b5f8089
 
 
 
 
 
6ff26ad
 
 
 
 
 
 
 
b5f8089
6ff26ad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b5f8089
6ff26ad
b5f8089
3027706
b5f8089
3027706
6ff26ad
123646a
 
b5f8089
6ff26ad
b5f8089
6ff26ad
b5f8089
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
import os
import gradio as gr
import PyPDF2
import docx
import requests
import json
import re
from typing import Union, Dict, Any

# Text extraction functions
def extract_text_from_pdf(file) -> str:
    """Extract text from PDF file"""
    try:
        pdf_reader = PyPDF2.PdfReader(file)
        return " ".join(page.extract_text() or "" for page in pdf_reader.pages)
    except Exception as e:
        raise ValueError(f"PDF extraction failed: {str(e)}")

def extract_text_from_docx(file) -> str:
    """Extract text from Word document"""
    try:
        doc = docx.Document(file)
        return "\n".join(para.text for para in doc.paragraphs if para.text)
    except Exception as e:
        raise ValueError(f"DOCX extraction failed: {str(e)}")

def process_uploaded_file(file) -> str:
    """Process uploaded file based on its type"""
    if not file.name:
        raise ValueError("No filename provided")
    
    filename = file.name.lower()
    if filename.endswith(".pdf"):
        return extract_text_from_pdf(file)
    elif filename.endswith(".docx"):
        return extract_text_from_docx(file)
    raise ValueError("Unsupported file format. Please upload PDF or Word document.")

def extract_json_from_text(text: str) -> Union[Dict[str, Any], None]:
    """Extract JSON from text response with robust error handling"""
    try:
        # First try parsing the entire text as JSON
        return json.loads(text)
    except json.JSONDecodeError:
        # If that fails, try to find JSON within the text
        json_match = re.search(r'\{[\s\S]*\}', text)
        if json_match:
            try:
                return json.loads(json_match.group())
            except json.JSONDecodeError:
                pass
    return None

def analyze_with_huggingface(file, job_description: str) -> Dict[str, Any]:
    """Analyze resume using Hugging Face API with enhanced error handling"""
    try:
        # Process file and validate inputs
        resume_text = process_uploaded_file(file)
        if not resume_text.strip():
            return {"error": "Extracted resume text is empty"}
        
        HF_API_KEY = os.getenv("HUGGINGFACE_API_KEY")
        if not HF_API_KEY:
            return {"error": "Hugging Face API key not configured"}
        
        # Prepare the prompt with strict JSON instructions
        prompt = """<s>[INST] <<SYS>>
You are an ATS resume analyzer. Return ONLY valid JSON with this exact structure:
{
  "analysis": {
    "keywords": {"score": 0-100, "recommendations": []},
    "formatting": {"score": 0-100, "recommendations": []},
    "skills": {"score": 0-100, "recommendations": []},
    "experience": {"score": 0-100, "recommendations": []},
    "education": {"score": 0-100, "recommendations": []}
  },
  "overall_score": 0-100,
  "summary": "Brief overall assessment"
}
Important:
1. Generate actual scores based on content
2. Provide specific recommendations
3. Return ONLY the JSON object
4. No additional text or explanations
<</SYS>>

Resume: {resume}
Job Description: {jd}
[/INST]""".format(
            resume=resume_text[:3000],
            jd=job_description[:1000]
        )

        # Make API request
        response = requests.post(
            "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3",
            headers={"Authorization": f"Bearer {HF_API_KEY}"},
            json={
                "inputs": prompt,
                "parameters": {
                    "max_new_tokens": 800,
                    "temperature": 0.3,
                    "do_sample": False
                }
            },
            timeout=45
        )

        # Handle API errors
        if response.status_code != 200:
            error_msg = response.json().get("error", "Unknown API error")
            return {
                "error": f"API request failed: {error_msg}",
                "api_response": response.text
            }

        # Process response
        raw_output = response.json()[0]['generated_text']
        
        # Try to extract JSON
        result = extract_json_from_text(raw_output)
        
        if result:
            # Validate JSON structure
            required_keys = {"analysis", "overall_score"}
            if all(key in result for key in required_keys):
                return {
                    "structured_result": result,
                    "raw_response": raw_output
                }
        
        # If JSON extraction failed, return raw output
        return {
            "raw_response": raw_output,
            "error": "Could not extract valid JSON"
        }

    except requests.exceptions.RequestException as e:
        return {"error": f"Network error: {str(e)}"}
    except Exception as e:
        return {"error": f"Analysis failed: {str(e)}"}

# Enhanced Gradio Interface
with gr.Blocks(title="ATS Resume Analyzer", theme=gr.themes.Soft()) as demo:
    gr.Markdown("""# ATS Resume Analyzer""")
    
    with gr.Row():
        with gr.Column():
            file_input = gr.File(
                label="Upload Resume (PDF or DOCX)",
                file_types=[".pdf", ".docx"],
                type="filepath"
            )
            jd_input = gr.Textbox(
                label="Job Description", 
                lines=8,
                placeholder="Paste the job description here..."
            )
            analyze_btn = gr.Button("Analyze", variant="primary")
        
        with gr.Column():
            output_tabs = gr.Tabs()
            with output_tabs:
                with gr.Tab("Analysis Results"):
                    json_output = gr.JSON(label="Structured Analysis")
                    summary_output = gr.Textbox(label="Summary", interactive=False)
                with gr.Tab("API Response"):
                    raw_output = gr.Textbox(label="Raw API Response", lines=10)
                with gr.Tab("Debug Info"):
                    status_output = gr.Textbox(label="Status Info", lines=5)
            status = gr.Textbox(label="Status", interactive=False)

    def display_results(file, job_description):
        """Handle results display with proper error handling"""
        result = analyze_with_huggingface(file, job_description)
        
        output = {
            json_output: None,
            summary_output: None,
            raw_output: None,
            status_output: None,
            status: ""
        }
        
        if "error" in result:
            output.update({
                status: f"Error: {result['error']}",
                status_output: str(result),
                raw_output: result.get("api_response") or result.get("raw_response") or ""
            })
        elif "structured_result" in result:
            structured = result["structured_result"]
            output.update({
                json_output: structured["analysis"],
                summary_output: structured.get("summary", ""),
                raw_output: result["raw_response"],
                status: "Analysis complete!",
                status_output: "Successfully parsed JSON response"
            })
        else:
            output.update({
                raw_output: result.get("raw_response", "No response received"),
                status: "Received non-JSON response",
                status_output: "The API returned a response but it couldn't be parsed as JSON"
            })
        
        return output

    analyze_btn.click(
        fn=display_results,
        inputs=[file_input, jd_input],
        outputs=[json_output, summary_output, raw_output, status_output, status]
    )

if __name__ == "__main__":
    demo.queue().launch(
        allowed_paths=["*"],
        
    )