Mangesh223 commited on
Commit
b5f8089
·
verified ·
1 Parent(s): 3027706

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +155 -45
app.py CHANGED
@@ -4,82 +4,192 @@ import PyPDF2
4
  import docx
5
  import requests
6
  import json
 
 
7
 
8
- # Text extraction functions (unchanged)
9
- def extract_text_from_pdf(file):
10
- pdf_reader = PyPDF2.PdfReader(file)
11
- return " ".join(page.extract_text() for page in pdf_reader.pages)
 
 
 
 
12
 
13
- def extract_text_from_docx(file):
14
- doc = docx.Document(file)
15
- return "\n".join(para.text for para in doc.paragraphs)
 
 
 
 
16
 
17
- def process_uploaded_file(file):
18
- if file.name.lower().endswith(".pdf"):
 
 
 
 
 
19
  return extract_text_from_pdf(file)
20
- elif file.name.lower().endswith(".docx"):
21
  return extract_text_from_docx(file)
22
- raise ValueError("Unsupported file format")
23
 
24
- # Updated Hugging Face API function
25
- def analyze_with_huggingface(file, job_description):
26
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  resume_text = process_uploaded_file(file)
 
 
 
28
  HF_API_KEY = os.getenv("HUGGINGFACE_API_KEY")
29
- API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3"
 
30
 
 
31
  prompt = """<s>[INST] <<SYS>>
32
- You are an ATS resume analyzer. Return ONLY this JSON format:
33
  {
34
- "ATS Parameters": {
35
- "Keywords": {"Match": 0-100, "Recommendation": []},
36
- "Formatting": {"Match": 0-100, "Recommendation": []},
37
- "Skills": {"Match": 0-100, "Recommendation": []},
38
- "Experience": {"Match": 0-100, "Recommendation": []},
39
- "Education": {"Match": 0-100, "Recommendation": []}
40
  },
41
- "Score": {
42
- "Overall": 0-100,
43
- "Details": {"Keywords": 0-100, "Formatting": 0-100, "Skills": 0-100, "Experience": 0-100, "Education": 0-100}
44
- }
45
  }
 
 
 
 
 
46
  <</SYS>>
47
 
48
  Resume: {resume}
49
  Job Description: {jd}
50
- [/INST]""".format(resume=resume_text[:3000], jd=job_description[:1000])
 
 
 
51
 
 
52
  response = requests.post(
53
- API_URL,
54
  headers={"Authorization": f"Bearer {HF_API_KEY}"},
55
  json={
56
  "inputs": prompt,
57
- "parameters": {"max_new_tokens": 800, "temperature": 0.7}
 
 
 
 
58
  },
59
- timeout=30
60
  )
 
 
 
 
 
 
 
 
 
61
 
62
- # Extract JSON from response
63
- result = response.json()[0]['generated_text']
64
- json_str = result[result.find('{'):result.rfind('}')+1]
65
- return json.loads(json_str)
 
66
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  except Exception as e:
68
- return {"error": str(e)}
69
 
70
- # Gradio Interface
71
- with gr.Blocks() as demo:
72
- gr.Markdown("## Resume Analyzer")
73
- with gr.Row():
74
- file_input = gr.File(label="Upload Resume", file_types=[".pdf", ".docx"])
75
- jd_input = gr.Textbox(label="Job Description", lines=10)
76
- analyze_btn = gr.Button("Analyze")
77
- output = gr.JSON()
78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  analyze_btn.click(
80
- fn=analyze_with_huggingface,
81
  inputs=[file_input, jd_input],
82
- outputs=output
83
  )
84
 
85
- demo.queue().launch(allowed_paths=["*"])
 
 
 
 
 
 
4
  import docx
5
  import requests
6
  import json
7
+ import re
8
+ from typing import Union, Dict, Any
9
 
10
+ # Text extraction functions
11
+ def extract_text_from_pdf(file) -> str:
12
+ """Extract text from PDF file"""
13
+ try:
14
+ pdf_reader = PyPDF2.PdfReader(file)
15
+ return " ".join(page.extract_text() or "" for page in pdf_reader.pages)
16
+ except Exception as e:
17
+ raise ValueError(f"PDF extraction failed: {str(e)}")
18
 
19
+ def extract_text_from_docx(file) -> str:
20
+ """Extract text from Word document"""
21
+ try:
22
+ doc = docx.Document(file)
23
+ return "\n".join(para.text for para in doc.paragraphs if para.text)
24
+ except Exception as e:
25
+ raise ValueError(f"DOCX extraction failed: {str(e)}")
26
 
27
+ def process_uploaded_file(file) -> str:
28
+ """Process uploaded file based on its type"""
29
+ if not file.name:
30
+ raise ValueError("No filename provided")
31
+
32
+ filename = file.name.lower()
33
+ if filename.endswith(".pdf"):
34
  return extract_text_from_pdf(file)
35
+ elif filename.endswith(".docx"):
36
  return extract_text_from_docx(file)
37
+ raise ValueError("Unsupported file format. Please upload PDF or Word document.")
38
 
39
+ def extract_json_from_text(text: str) -> Union[Dict[str, Any], None]:
40
+ """Extract JSON from text response with robust error handling"""
41
  try:
42
+ # First try parsing the entire text as JSON
43
+ return json.loads(text)
44
+ except json.JSONDecodeError:
45
+ # If that fails, try to find JSON within the text
46
+ json_match = re.search(r'\{[\s\S]*\}', text)
47
+ if json_match:
48
+ try:
49
+ return json.loads(json_match.group())
50
+ except json.JSONDecodeError:
51
+ pass
52
+ return None
53
+
54
+ def analyze_with_huggingface(file, job_description: str) -> Dict[str, Any]:
55
+ """Analyze resume using Hugging Face API with enhanced error handling"""
56
+ try:
57
+ # Process file and validate inputs
58
  resume_text = process_uploaded_file(file)
59
+ if not resume_text.strip():
60
+ return {"error": "Extracted resume text is empty"}
61
+
62
  HF_API_KEY = os.getenv("HUGGINGFACE_API_KEY")
63
+ if not HF_API_KEY:
64
+ return {"error": "Hugging Face API key not configured"}
65
 
66
+ # Prepare the prompt with strict JSON instructions
67
  prompt = """<s>[INST] <<SYS>>
68
+ You are an ATS resume analyzer. Return ONLY valid JSON with this exact structure:
69
  {
70
+ "analysis": {
71
+ "keywords": {"score": 0-100, "recommendations": []},
72
+ "formatting": {"score": 0-100, "recommendations": []},
73
+ "skills": {"score": 0-100, "recommendations": []},
74
+ "experience": {"score": 0-100, "recommendations": []},
75
+ "education": {"score": 0-100, "recommendations": []}
76
  },
77
+ "overall_score": 0-100,
78
+ "summary": "Brief overall assessment"
 
 
79
  }
80
+ Important:
81
+ 1. Generate actual scores based on content
82
+ 2. Provide specific recommendations
83
+ 3. Return ONLY the JSON object
84
+ 4. No additional text or explanations
85
  <</SYS>>
86
 
87
  Resume: {resume}
88
  Job Description: {jd}
89
+ [/INST]""".format(
90
+ resume=resume_text[:3000], # Truncate to prevent token limit issues
91
+ jd=job_description[:1000]
92
+ )
93
 
94
+ # Make API request
95
  response = requests.post(
96
+ "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3",
97
  headers={"Authorization": f"Bearer {HF_API_KEY}"},
98
  json={
99
  "inputs": prompt,
100
+ "parameters": {
101
+ "max_new_tokens": 800,
102
+ "temperature": 0.3, # Lower for more consistent JSON
103
+ "do_sample": False # Disable randomness
104
+ }
105
  },
106
+ timeout=45 # Increased timeout
107
  )
108
+
109
+ # Handle API errors
110
+ if response.status_code != 200:
111
+ error_msg = response.json().get("error", "Unknown API error")
112
+ return {"error": f"API request failed: {error_msg}"}
113
+
114
+ # Process response
115
+ raw_output = response.json()[0]['generated_text']
116
+ result = extract_json_from_text(raw_output)
117
 
118
+ if not result:
119
+ return {
120
+ "error": "Could not extract valid JSON",
121
+ "raw_response": raw_output # Include raw response for debugging
122
+ }
123
 
124
+ # Validate JSON structure
125
+ required_keys = {"analysis", "overall_score"}
126
+ if not all(key in result for key in required_keys):
127
+ return {
128
+ "error": "Incomplete analysis in response",
129
+ "partial_response": result
130
+ }
131
+
132
+ return result
133
+
134
+ except requests.exceptions.RequestException as e:
135
+ return {"error": f"Network error: {str(e)}"}
136
  except Exception as e:
137
+ return {"error": f"Analysis failed: {str(e)}"}
138
 
139
+ # Enhanced Gradio Interface
140
+ with gr.Blocks(title="ATS Resume Analyzer", theme=gr.themes.Soft()) as demo:
141
+ gr.Markdown("""# ATS Resume Analyzer
142
+ Upload your resume and job description to get ATS compatibility analysis""")
 
 
 
 
143
 
144
+ with gr.Row():
145
+ with gr.Column():
146
+ file_input = gr.File(
147
+ label="Upload Resume (PDF or DOCX)",
148
+ file_types=[".pdf", ".docx"],
149
+ type="filepath"
150
+ )
151
+ jd_input = gr.Textbox(
152
+ label="Job Description",
153
+ lines=8,
154
+ placeholder="Paste the job description here..."
155
+ )
156
+ analyze_btn = gr.Button("Analyze", variant="primary")
157
+
158
+ with gr.Column():
159
+ output_tabs = gr.Tabs()
160
+ with output_tabs:
161
+ with gr.Tab("Structured Analysis", id="json"):
162
+ json_output = gr.JSON(label="Analysis Results")
163
+ with gr.Tab("Raw Response", id="raw"):
164
+ raw_output = gr.Textbox(label="API Response", interactive=False)
165
+ status = gr.Textbox(label="Status", interactive=False)
166
+
167
+ def display_results(file, job_description):
168
+ """Handle results display with proper error handling"""
169
+ result = analyze_with_huggingface(file, job_description)
170
+
171
+ if "error" in result:
172
+ return {
173
+ json_output: None,
174
+ raw_output: result.get("raw_response", str(result)),
175
+ status: f"Error: {result['error']}"
176
+ }
177
+
178
+ return {
179
+ json_output: result,
180
+ raw_output: json.dumps(result, indent=2),
181
+ status: "Analysis complete!"
182
+ }
183
+
184
  analyze_btn.click(
185
+ fn=display_results,
186
  inputs=[file_input, jd_input],
187
+ outputs=[json_output, raw_output, status]
188
  )
189
 
190
+ # Launch with queueing and CORS support
191
+ if __name__ == "__main__":
192
+ demo.queue(concurrency_count=3).launch(
193
+ allowed_paths=["*"],
194
+ server_port=7860
195
+ )