DreamStream-1 commited on
Commit
5019102
·
verified ·
1 Parent(s): 40f08cb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -90
app.py CHANGED
@@ -1,97 +1,89 @@
1
  import os
2
  import gradio as gr
3
- from transformers import pipeline
4
- import torch
5
  import PyPDF2
6
- import io
7
- import re
8
- from datetime import datetime
9
 
10
- # Initialize sentiment analysis pipeline
11
- sentiment_analyzer = pipeline("sentiment-analysis")
 
 
 
 
12
 
13
  def extract_text_from_pdf(file):
14
- """Extract text from uploaded PDF file"""
15
  if file is None:
16
  return ""
17
  try:
18
- pdf_reader = PyPDF2.PdfReader(io.BytesIO(file))
19
  text = ""
20
  for page in pdf_reader.pages:
21
- text += page.extract_text()
 
22
  return text
23
  except Exception as e:
24
  return f"Error extracting PDF text: {str(e)}"
25
 
26
  def extract_text_from_file(file):
27
- """Extract text from uploaded file (PDF or TXT)"""
28
  if file is None:
29
  return ""
30
 
31
- file_content = file.read()
32
-
33
  if file.name.endswith('.pdf'):
34
- return extract_text_from_pdf(file_content)
35
  elif file.name.endswith('.txt'):
36
- return file_content.decode('utf-8')
37
  else:
38
  return "Unsupported file format. Please upload PDF or TXT files only."
39
 
40
  def extract_skills(text):
41
- """Extract skills from text using keyword matching"""
42
- # Common programming languages and technologies
43
- skills_keywords = [
44
- 'python', 'java', 'javascript', 'react', 'angular', 'vue', 'node.js',
45
- 'sql', 'mongodb', 'aws', 'docker', 'kubernetes', 'machine learning',
46
- 'artificial intelligence', 'data science', 'html', 'css', 'git'
47
- ]
48
-
49
- found_skills = []
50
- for skill in skills_keywords:
51
- if re.search(r'\b' + re.escape(skill) + r'\b', text.lower()):
52
- found_skills.append(skill)
53
-
54
- return found_skills
55
-
56
- def extract_education(text):
57
- """Extract education information from text"""
58
- education_patterns = [
59
- r'\b(B\.?S\.?|B\.?A\.?|M\.?S\.?|M\.?A\.?|Ph\.?D\.?|Bachelor\'?s?|Master\'?s?|Doctorate)\b',
60
- r'\b(Computer Science|Information Technology|Software Engineering|Information Systems)\b'
61
- ]
62
-
63
- education = []
64
- for pattern in education_patterns:
65
- matches = re.finditer(pattern, text, re.IGNORECASE)
66
- education.extend(match.group() for match in matches)
67
-
68
- return list(set(education))
69
 
70
- def extract_experience(text):
71
- """Extract years of experience and job titles"""
72
- experience_pattern = r'(\d+)\+?\s*(?:years?|yrs?)(?:\s+of)?\s+experience'
73
- job_titles_pattern = r'\b(Software Engineer|Developer|Architect|Manager|Lead|Director)\b'
74
-
75
- experience_matches = re.findall(experience_pattern, text, re.IGNORECASE)
76
- years = [int(year) for year in experience_matches]
77
-
78
- job_titles = re.findall(job_titles_pattern, text)
79
 
80
  return {
81
- 'years': max(years) if years else 0,
82
- 'titles': list(set(job_titles))
83
  }
84
 
85
  def calculate_match_percentage(resume_skills, job_skills):
86
- """Calculate the match percentage between resume skills and job requirements"""
87
  if not job_skills:
88
  return 0
89
 
90
  matching_skills = set(resume_skills).intersection(set(job_skills))
91
  return (len(matching_skills) / len(job_skills)) * 100
92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  def analyze_resume_and_job(resume_file, job_desc_file):
94
- """Main function to analyze resume and job description"""
95
  try:
96
  # Extract text from files
97
  resume_text = extract_text_from_file(resume_file)
@@ -104,26 +96,27 @@ def analyze_resume_and_job(resume_file, job_desc_file):
104
 
105
  # Extract information from resume
106
  resume_skills = extract_skills(resume_text)
107
- resume_education = extract_education(resume_text)
108
- resume_experience = extract_experience(resume_text)
109
 
110
  # Extract information from job description
111
  job_skills = extract_skills(job_desc_text)
112
- job_education = extract_education(job_desc_text)
113
- job_experience = extract_experience(job_desc_text)
114
 
115
  # Calculate match percentages
116
  skills_match = calculate_match_percentage(resume_skills, job_skills)
117
 
118
- # Analyze sentiment of resume
119
- sentiment_result = sentiment_analyzer(resume_text[:512])[0]
 
 
 
120
 
121
  # Prepare analysis results
122
  summary = f"""
123
  ### Summary Analysis
124
  - Overall Skills Match: {skills_match:.1f}%
125
- - Experience: {resume_experience['years']} years
126
- - Sentiment: {sentiment_result['label']} ({sentiment_result['score']:.2f})
127
  """
128
 
129
  skills = f"""
@@ -141,42 +134,32 @@ Missing Skills:
141
  qualifications = f"""
142
  ### Qualifications
143
  Education Found:
144
- {', '.join(resume_education)}
145
 
146
  Required Education:
147
- {', '.join(job_education)}
148
  """
149
 
150
- experience = f"""
151
- ### Experience Analysis
152
- - Years of Experience: {resume_experience['years']}
153
- - Recent Positions: {', '.join(resume_experience['titles'])}
154
- - Required Experience: {job_experience['years']} years
155
- """
156
-
157
- # Generate recommendation
158
- if skills_match >= 70 and resume_experience['years'] >= job_experience['years']:
159
- recommendation = "Strong Match - Recommended for interview"
160
  elif skills_match >= 50:
161
- recommendation = "Moderate Match - Consider for interview with focus on missing skills"
162
  else:
163
- recommendation = "Low Match - May not meet core requirements"
164
 
165
  recommendation = f"""
166
  ### Recommendation
167
  {recommendation}
168
-
169
- Key Strengths:
170
- - {'High' if skills_match >= 70 else 'Moderate' if skills_match >= 50 else 'Low'} skill match
171
- - {'Sufficient' if resume_experience['years'] >= job_experience['years'] else 'Insufficient'} experience
172
  """
173
 
174
  return {
175
  "summary": summary.strip(),
176
  "skills": skills.strip(),
177
  "qualifications": qualifications.strip(),
178
- "experience": experience.strip(),
179
- "recommendation": recommendation.strip()
180
  }
181
 
182
  except Exception as e:
@@ -203,10 +186,10 @@ def create_interface():
203
  skills_output = gr.Markdown()
204
  with gr.TabItem("Qualifications"):
205
  qualifications_output = gr.Markdown()
206
- with gr.TabItem("Experience"):
207
- experience_output = gr.Markdown()
208
  with gr.TabItem("Recommendation"):
209
  recommendation_output = gr.Markdown()
 
 
210
 
211
  def analyze(resume_file, job_desc_file):
212
  if not resume_file or not job_desc_file:
@@ -221,15 +204,14 @@ def create_interface():
221
  result["summary"],
222
  result["skills"],
223
  result["qualifications"],
224
- result["experience"],
225
- result["recommendation"]
226
  )
227
 
228
  analyze_button.click(
229
  analyze,
230
  inputs=[resume_input, job_desc_input],
231
- outputs=[summary_output, skills_output, qualifications_output,
232
- experience_output, recommendation_output]
233
  )
234
 
235
  return demo
 
1
  import os
2
  import gradio as gr
3
+ import requests
 
4
  import PyPDF2
5
+ import spacy
 
 
6
 
7
+ # Load spaCy for NER tasks
8
+ nlp = spacy.load("en_core_web_sm")
9
+
10
+ # Set up your Groq API endpoint and API key
11
+ GROQ_API_URL = "https://api.groq.com/v1/llama"
12
+ GROQ_API_KEY = "YOUR_API_KEY" # Replace with your actual API key
13
 
14
  def extract_text_from_pdf(file):
15
+ """Extract text from uploaded PDF file."""
16
  if file is None:
17
  return ""
18
  try:
19
+ pdf_reader = PyPDF2.PdfReader(file)
20
  text = ""
21
  for page in pdf_reader.pages:
22
+ page_text = page.extract_text() or ""
23
+ text += page_text
24
  return text
25
  except Exception as e:
26
  return f"Error extracting PDF text: {str(e)}"
27
 
28
  def extract_text_from_file(file):
29
+ """Extract text from uploaded file (PDF or TXT)."""
30
  if file is None:
31
  return ""
32
 
 
 
33
  if file.name.endswith('.pdf'):
34
+ return extract_text_from_pdf(file)
35
  elif file.name.endswith('.txt'):
36
+ return file.read().decode('utf-8')
37
  else:
38
  return "Unsupported file format. Please upload PDF or TXT files only."
39
 
40
  def extract_skills(text):
41
+ """Extract skills from text using a pre-trained NER model."""
42
+ doc = nlp(text)
43
+ skills = [ent.text for ent in doc.ents if ent.label_ == "SKILL"]
44
+ return list(set(skills))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
+ def extract_education_and_experience(text):
47
+ """Extract education and experience information from text using NER."""
48
+ doc = nlp(text)
49
+ education = [ent.text for ent in doc.ents if ent.label_ in ["EDUCATION", "DEGREE"]]
50
+ experience = [ent.text for ent in doc.ents if ent.label_ == "EXPERIENCE"]
 
 
 
 
51
 
52
  return {
53
+ 'education': list(set(education)),
54
+ 'experience': list(set(experience))
55
  }
56
 
57
  def calculate_match_percentage(resume_skills, job_skills):
58
+ """Calculate the match percentage between resume skills and job requirements."""
59
  if not job_skills:
60
  return 0
61
 
62
  matching_skills = set(resume_skills).intersection(set(job_skills))
63
  return (len(matching_skills) / len(job_skills)) * 100
64
 
65
+ def call_groq_api(prompt):
66
+ """Call the Groq API with the prompt and return the response."""
67
+ headers = {
68
+ "Authorization": f"Bearer {GROQ_API_KEY}",
69
+ "Content-Type": "application/json"
70
+ }
71
+
72
+ payload = {
73
+ "model": "llama3-8b-8192", # Use the specified LLaMA model
74
+ "prompt": prompt,
75
+ "max_tokens": 150 # Adjust as needed
76
+ }
77
+
78
+ response = requests.post(GROQ_API_URL, headers=headers, json=payload)
79
+
80
+ if response.status_code == 200:
81
+ return response.json().get("output", "No output received.")
82
+ else:
83
+ return f"API call failed with status {response.status_code}: {response.text}"
84
+
85
  def analyze_resume_and_job(resume_file, job_desc_file):
86
+ """Main function to analyze resume and job description."""
87
  try:
88
  # Extract text from files
89
  resume_text = extract_text_from_file(resume_file)
 
96
 
97
  # Extract information from resume
98
  resume_skills = extract_skills(resume_text)
99
+ resume_info = extract_education_and_experience(resume_text)
 
100
 
101
  # Extract information from job description
102
  job_skills = extract_skills(job_desc_text)
103
+ job_info = extract_education_and_experience(job_desc_text)
 
104
 
105
  # Calculate match percentages
106
  skills_match = calculate_match_percentage(resume_skills, job_skills)
107
 
108
+ # Prepare input for LLaMA via Groq API
109
+ input_prompt = f"Analyze the following resume: {resume_text[:300]} and job description: {job_desc_text[:300]}."
110
+
111
+ # Call Groq API to analyze using LLaMA
112
+ llama_analysis = call_groq_api(input_prompt)
113
 
114
  # Prepare analysis results
115
  summary = f"""
116
  ### Summary Analysis
117
  - Overall Skills Match: {skills_match:.1f}%
118
+ - Experience Found: {', '.join(resume_info['experience'])}
119
+ - Education Found: {', '.join(resume_info['education'])}
120
  """
121
 
122
  skills = f"""
 
134
  qualifications = f"""
135
  ### Qualifications
136
  Education Found:
137
+ {', '.join(resume_info['education'])}
138
 
139
  Required Education:
140
+ {', '.join(job_info['education'])}
141
  """
142
 
143
+ # Generate recommendation based on skills match
144
+ recommendation = "Recommendation based on skills match and experience."
145
+ if skills_match >= 70:
146
+ recommendation = "Strong Match - Recommended for interview."
 
 
 
 
 
 
147
  elif skills_match >= 50:
148
+ recommendation = "Moderate Match - Consider for interview with focus on missing skills."
149
  else:
150
+ recommendation = "Low Match - May not meet core requirements."
151
 
152
  recommendation = f"""
153
  ### Recommendation
154
  {recommendation}
 
 
 
 
155
  """
156
 
157
  return {
158
  "summary": summary.strip(),
159
  "skills": skills.strip(),
160
  "qualifications": qualifications.strip(),
161
+ "recommendation": recommendation.strip(),
162
+ "llama_analysis": llama_analysis.strip()
163
  }
164
 
165
  except Exception as e:
 
186
  skills_output = gr.Markdown()
187
  with gr.TabItem("Qualifications"):
188
  qualifications_output = gr.Markdown()
 
 
189
  with gr.TabItem("Recommendation"):
190
  recommendation_output = gr.Markdown()
191
+ with gr.TabItem("LLaMA Analysis"):
192
+ llama_output = gr.Markdown()
193
 
194
  def analyze(resume_file, job_desc_file):
195
  if not resume_file or not job_desc_file:
 
204
  result["summary"],
205
  result["skills"],
206
  result["qualifications"],
207
+ result["recommendation"],
208
+ result["llama_analysis"]
209
  )
210
 
211
  analyze_button.click(
212
  analyze,
213
  inputs=[resume_input, job_desc_input],
214
+ outputs=[summary_output, skills_output, qualifications_output, recommendation_output, llama_output]
 
215
  )
216
 
217
  return demo