Mangesh223 commited on
Commit
aac2ac6
·
verified ·
1 Parent(s): 06b3165

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +116 -135
app.py CHANGED
@@ -1,168 +1,149 @@
1
  import gradio as gr
2
  import PyPDF2
3
  import io
4
- import json
5
  import os
6
- import gc
7
- from huggingface_hub import login
8
  from dotenv import load_dotenv
9
 
10
- # --- Configuration --- #
11
- load_dotenv()
12
- login(token=os.getenv("HF_TOKEN"))
13
-
14
  def extract_text_from_pdf(pdf_file):
15
- """Extract raw text from PDF"""
 
 
 
 
 
 
 
 
16
  if pdf_file is None:
17
  raise ValueError("No PDF file uploaded")
18
 
19
- if isinstance(pdf_file, str):
20
- with open(pdf_file, 'rb') as f:
21
- file_bytes = f.read()
22
- elif isinstance(pdf_file, bytes):
23
- file_bytes = pdf_file
24
- else:
25
- raise TypeError(f"Expected file path or bytes, got {type(pdf_file)}")
26
-
27
  try:
 
 
 
 
 
 
 
 
 
 
28
  pdf_reader = PyPDF2.PdfReader(io.BytesIO(file_bytes))
29
- text = "\n".join(page.extract_text() or "" for page in pdf_reader.pages)
30
- if not text.strip():
31
- raise ValueError("No text extracted")
32
- return text[:10000] # Limit to avoid overwhelming AI
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  except Exception as e:
34
- raise Exception(f"Extraction error: {str(e)}")
35
- finally:
36
- gc.collect()
37
 
38
- # Placeholder inference function to match your example output
39
- def dummy_inference_fn(prompt):
40
- """Temporary inference function - replace with your actual AI model"""
41
- # Simulate response based on your resume and job description in prompt
42
- resume_text = prompt.split("Analyze this resume: '")[1].split("' against job description")[0]
43
- job_desc = prompt.split("against job description: '")[1].split("'")[0]
44
-
45
- # Default response mimicking your example
46
- response = {
47
- "score": {
48
- "total": 85,
49
- "breakdown": {
50
- "competency": 25,
51
- "experience": 15,
52
- "impact": 20,
53
- "potential": 5,
54
- "leadership": 0,
55
- "adaptability": 20
56
- }
57
- },
58
- "analysis": {
59
- "strengths": [
60
- "Strong frontend skills (React.js, JavaScript, UI components)",
61
- "Proven performance impact (e.g., 30% code redundancy reduction, 20% efficiency boost)",
62
- "Matches experience requirement (3+ years with relevant projects)"
63
- ],
64
- "improvements": [
65
- "Emphasize UI/UX contributions in projects",
66
- "Add leadership or teamwork examples for well-roundedness"
67
- ],
68
- "missing_skills": [],
69
- "flags": []
70
- }
71
- }
72
 
73
- # Adjust slightly if no job description
74
- if job_desc == "None":
75
- response["score"]["adaptability"] = 10
76
- response["score"]["total"] = 75
77
- response["analysis"]["strengths"] = [
78
- "Strong technical skills (MERN stack, blockchain)",
79
- "Proven project impact (e.g., 25% session time increase)",
80
- "Solid experience (3+ years)"
81
- ]
82
- response["analysis"]["improvements"] = [
83
- "Add leadership or teamwork examples",
84
- "Highlight learning initiatives"
85
- ]
86
 
87
- return json.dumps(response) # Return as JSON string
 
 
 
 
 
 
 
 
88
 
89
- # Real inference function example (uncomment and configure if you have a model)
90
- """
91
- from transformers import pipeline
92
- def inference_fn(prompt):
93
- model = pipeline("text-generation", model="gpt2", token=os.getenv("HF_TOKEN")) # Replace with your model
94
- response = model(prompt, max_length=2000, num_return_sequences=1)[0]["generated_text"]
95
- start = response.find("[Return valid JSON]:") + len("[Return valid JSON]:")
96
- return response[start:].strip()
97
- """
 
98
 
99
- def analyze_resume(pdf_file, job_desc=None, inference_fn=dummy_inference_fn):
100
- """Smart ATS relying fully on AI for analysis"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  try:
 
102
  resume_text = extract_text_from_pdf(pdf_file)
103
- except Exception as e:
104
- return f"Extraction failed: {str(e)}", {"error": str(e)}
105
-
106
- # Fallback if no inference function
107
- if not inference_fn:
108
- print("No inference function provided - using fallback")
109
- basic_analysis = {
110
- "score": {"total": 10, "breakdown": {"competency": 10}},
111
- "analysis": {
112
- "strengths": ["Resume text extracted"],
113
- "improvements": ["Provide a job description for detailed analysis" if not job_desc else "Add more details"],
114
- "missing_skills": [],
115
- "flags": []
116
- },
117
- "raw_text_sample": resume_text[:200]
118
  }
119
- return resume_text[:5000], basic_analysis
120
-
121
- # AI-driven analysis
122
- prompt = f"""[Return valid JSON]: You are a smart ATS designed to evaluate resumes without rejecting worthy candidates. Analyze this resume: '{resume_text[:2000]}' against job description: '{job_desc or "None"}'.
123
- Provide:
124
- - "score": {{total: X (0-100), breakdown: {{competency: X (technical/non-technical skills), experience: X (duration and depth), impact: X (achievements), potential: X (learning ability), leadership: X (influence), adaptability: X (fit to role or general)}}}}
125
- - "analysis": {{"strengths": [2-3 items, e.g., "Strong React skills"], "improvements": [2-3 items, e.g., "Add teamwork examples"], "missing_skills": [0-3 items, only if job_desc provided], "flags": [0-2 items, e.g., "High potential candidate"]}}
126
- Rules:
127
- - Detect skills, experience, achievements, learning signals, and leadership dynamically from the resume text.
128
- - If no job description, assess general potential across technical and non-technical domains.
129
- - If job description exists, prioritize role-relevant traits but don’t penalize unrelated strengths.
130
- - Infer skills (e.g., 'MERN' → 'MongoDB'), normalize variations (e.g., 'React.js' = 'React'), and weigh recent tech (e.g., 'Next.js') over outdated (e.g., 'jQuery').
131
- - Focus on potential: Highlight capability even if formatting or keywords don’t perfectly match.
132
- - Avoid rejection: Low scores should still come with positive feedback or flags for human review.
133
- Return valid JSON only."""
134
 
135
- try:
136
- print("Calling inference_fn with prompt:", prompt[:200]) # Debug
137
- result = inference_fn(prompt)
138
- print("Inference result:", result) # Debug
139
- if result and result.strip():
140
- analysis = json.loads(result)
141
- analysis["raw_text_sample"] = resume_text[:200]
142
- return resume_text[:5000], analysis
143
- else:
144
- raise ValueError("Empty AI response")
145
  except Exception as e:
146
- print(f"AI analysis error: {str(e)}")
147
- return resume_text[:5000], {
148
- "score": {"total": 10, "breakdown": {"competency": 10}},
149
- "analysis": {"strengths": ["Text processed"], "improvements": [f"Analysis failed: {str(e)}"], "missing_skills": [], "flags": []},
150
- "raw_text_sample": resume_text[:200]
151
  }
152
 
153
- # --- Gradio Interface --- #
154
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
155
  with gr.Row():
156
  with gr.Column(scale=1):
157
- pdf_input = gr.File(label="PDF Resume", type="binary")
158
  job_desc_input = gr.Textbox(label="Job Description (Optional)", lines=3)
159
- submit_btn = gr.Button("Analyze")
 
160
  with gr.Column(scale=2):
161
- extracted_text = gr.Textbox(label="Extracted Text", lines=10, interactive=False)
162
- analysis_output = gr.JSON(label="Analysis Results")
163
 
164
- submit_btn.click(
165
- fn=analyze_resume,
166
  inputs=[pdf_input, job_desc_input],
167
  outputs=[extracted_text, analysis_output]
168
  )
 
1
  import gradio as gr
2
  import PyPDF2
3
  import io
 
4
  import os
 
 
5
  from dotenv import load_dotenv
6
 
 
 
 
 
7
  def extract_text_from_pdf(pdf_file):
8
+ """
9
+ Robust PDF text extraction with comprehensive error handling
10
+
11
+ Args:
12
+ pdf_file (str/bytes): PDF file path or bytes
13
+
14
+ Returns:
15
+ str: Extracted text from PDF
16
+ """
17
  if pdf_file is None:
18
  raise ValueError("No PDF file uploaded")
19
 
 
 
 
 
 
 
 
 
20
  try:
21
+ # Handle different input types
22
+ if isinstance(pdf_file, str):
23
+ with open(pdf_file, 'rb') as f:
24
+ file_bytes = f.read()
25
+ elif isinstance(pdf_file, bytes):
26
+ file_bytes = pdf_file
27
+ else:
28
+ raise TypeError(f"Unsupported file type: {type(pdf_file)}")
29
+
30
+ # Advanced PDF text extraction
31
  pdf_reader = PyPDF2.PdfReader(io.BytesIO(file_bytes))
32
+
33
+ # Extract text from all pages, handle potential encoding issues
34
+ pages_text = []
35
+ for page in pdf_reader.pages:
36
+ try:
37
+ page_text = page.extract_text() or ""
38
+ pages_text.append(page_text.strip())
39
+ except Exception as page_error:
40
+ print(f"Error extracting page text: {page_error}")
41
+
42
+ # Join pages, handle empty extraction
43
+ full_text = "\n".join(pages_text)
44
+
45
+ if not full_text.strip():
46
+ raise ValueError("No text could be extracted from the PDF")
47
+
48
+ # Limit text to prevent overwhelming AI
49
+ return full_text[:15000] # Increased limit for more comprehensive analysis
50
+
51
  except Exception as e:
52
+ raise ValueError(f"PDF Extraction Error: {str(e)}")
 
 
53
 
54
+ def prepare_resume_prompt(resume_text, job_description=None):
55
+ """
56
+ Prepare a structured, clear prompt for AI analysis
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
+ Args:
59
+ resume_text (str): Extracted resume text
60
+ job_description (str, optional): Job description for context
 
 
 
 
 
 
 
 
 
 
61
 
62
+ Returns:
63
+ str: Formatted prompt for AI analysis
64
+ """
65
+ prompt = f"""Professional Resume Analysis:
66
+
67
+ Resume Content:
68
+ {resume_text[:10000]}
69
+
70
+ {'Job Description: ' + job_description if job_description else 'No specific job description provided'}
71
 
72
+ Instructions for Analysis:
73
+ 1. Perform a comprehensive assessment of the resume
74
+ 2. Evaluate professional skills, experience, and potential
75
+ 3. Provide a structured JSON response with:
76
+ - Overall Score (0-100)
77
+ - Skill Match Percentage
78
+ - Key Strengths
79
+ - Areas for Improvement
80
+ - Potential Red Flags
81
+ - Recommended Next Steps
82
 
83
+ Output Format (JSON):
84
+ {{
85
+ "total_score": int,
86
+ "skill_match_percentage": int,
87
+ "strengths": [str],
88
+ "improvements": [str],
89
+ "red_flags": [str],
90
+ "recommended_actions": [str]
91
+ }}"""
92
+
93
+ return prompt
94
+
95
+ def analyze_resume(pdf_file, job_description=None):
96
+ """
97
+ Main resume analysis function
98
+
99
+ Args:
100
+ pdf_file (bytes): Uploaded PDF file
101
+ job_description (str, optional): Job description for context
102
+
103
+ Returns:
104
+ tuple: Extracted text and AI analysis
105
+ """
106
  try:
107
+ # Extract text from PDF
108
  resume_text = extract_text_from_pdf(pdf_file)
109
+
110
+ # Prepare prompt for AI
111
+ ai_prompt = prepare_resume_prompt(resume_text, job_description)
112
+
113
+ # Note: Replace this with actual Mistral-7B inference
114
+ # This is a placeholder - you'll need to integrate your actual AI model
115
+ print("AI Prompt Prepared. Replace this with actual model inference.")
116
+
117
+ return resume_text, {
118
+ "total_score": 75,
119
+ "skill_match_percentage": 80,
120
+ "strengths": ["Robust text extraction", "Structured prompt generation"],
121
+ "improvements": ["Integrate actual AI model inference"],
122
+ "red_flags": [],
123
+ "recommended_actions": ["Connect Mistral-7B model"]
124
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
 
 
 
 
 
 
 
 
 
 
126
  except Exception as e:
127
+ return str(e), {
128
+ "error": str(e),
129
+ "total_score": 0,
130
+ "skill_match_percentage": 0
 
131
  }
132
 
133
+ # Gradio Interface
134
+ with gr.Blocks() as demo:
135
  with gr.Row():
136
  with gr.Column(scale=1):
137
+ pdf_input = gr.File(label="Upload Resume PDF", type="binary")
138
  job_desc_input = gr.Textbox(label="Job Description (Optional)", lines=3)
139
+ analyze_btn = gr.Button("Analyze Resume")
140
+
141
  with gr.Column(scale=2):
142
+ extracted_text = gr.Textbox(label="Extracted Text", lines=10)
143
+ analysis_output = gr.JSON(label="AI Analysis")
144
 
145
+ analyze_btn.click(
146
+ fn=analyze_resume,
147
  inputs=[pdf_input, job_desc_input],
148
  outputs=[extracted_text, analysis_output]
149
  )