DreamStream-1 commited on
Commit
6bbfa30
·
verified ·
1 Parent(s): 70688b5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -50
app.py CHANGED
@@ -10,23 +10,20 @@ from fuzzywuzzy import fuzz
10
  from nltk.corpus import stopwords
11
  from nltk.tokenize import word_tokenize
12
  from nltk.stem import WordNetLemmatizer
13
- import fitz # PyMuPDF
14
  from typing import List, Dict, Set
15
  import docx
16
  import tempfile
17
- from pathlib import Path # Add the missing import for Path
18
 
19
- # ResumeAnalyzer class that processes resumes, calculates match percentage, and uses AI analysis
20
  class ResumeAnalyzer:
21
  def __init__(self):
22
- """Initialize the ResumeAnalyzer with required resources."""
23
  self._initialize_logging()
24
  self._initialize_nltk()
25
  self._initialize_spacy()
26
  self._setup_api_key()
27
 
28
  def _initialize_logging(self):
29
- """Set up logging for the class."""
30
  self.logger = logging.getLogger(__name__)
31
  logging.basicConfig(
32
  level=logging.INFO,
@@ -34,7 +31,6 @@ class ResumeAnalyzer:
34
  )
35
 
36
  def _initialize_nltk(self) -> None:
37
- """Initialize NLTK resources safely."""
38
  try:
39
  nltk.data.path.append(os.getcwd())
40
  for resource in ['punkt', 'stopwords', 'wordnet']:
@@ -49,7 +45,6 @@ class ResumeAnalyzer:
49
  raise
50
 
51
  def _initialize_spacy(self) -> None:
52
- """Initialize spaCy model safely."""
53
  try:
54
  self.nlp = spacy.load("en_core_web_sm")
55
  except OSError:
@@ -59,7 +54,6 @@ class ResumeAnalyzer:
59
  self.nlp = spacy.load("en_core_web_sm")
60
 
61
  def _setup_api_key(self) -> None:
62
- """Set up Google API key from Hugging Face Spaces secrets."""
63
  try:
64
  self.google_api_key = os.environ.get("GOOGLE_API_KEY")
65
  if not self.google_api_key:
@@ -70,17 +64,15 @@ class ResumeAnalyzer:
70
  raise
71
 
72
  def extract_text_from_pdf(self, file_path: str) -> str:
73
- """Extract text from a PDF file."""
74
  try:
75
  with fitz.open(file_path) as doc:
76
- text = " ".join(page.get_text("text") for page in doc)
77
  return text
78
  except Exception as e:
79
  self.logger.error(f"Error extracting text from PDF: {str(e)}")
80
  return ""
81
 
82
  def extract_text_from_docx(self, file_path: str) -> str:
83
- """Extract text from a DOCX file."""
84
  try:
85
  doc = docx.Document(file_path)
86
  return "\n".join(para.text for para in doc.paragraphs)
@@ -89,7 +81,6 @@ class ResumeAnalyzer:
89
  return ""
90
 
91
  def preprocess_text(self, text: str) -> str:
92
- """Preprocess the text."""
93
  try:
94
  text = text.lower()
95
  text = re.sub(r'\s+', ' ', text)
@@ -104,62 +95,46 @@ class ResumeAnalyzer:
104
  return text
105
 
106
  def extract_named_entities(self, text: str) -> Set[str]:
107
- """Extract named entities from text."""
108
  try:
109
- # Limit text length to prevent memory issues
110
  doc = self.nlp(text[:100000])
111
- return {ent.text for ent in doc.ents}
112
  except Exception as e:
113
  self.logger.error(f"Error in named entity extraction: {str(e)}")
114
  return set()
115
 
116
  def calculate_match_percentage(self, resume_text: str, job_desc_text: str) -> float:
117
- """Calculate the match percentage between resume and job description."""
118
  try:
119
  resume_text = self.preprocess_text(resume_text)
120
  job_desc_text = self.preprocess_text(job_desc_text)
121
- return fuzz.partial_ratio(resume_text, job_desc_text)
122
  except Exception as e:
123
  self.logger.error(f"Error calculating match percentage: {str(e)}")
124
  return 0.0
125
 
126
  def gemini_analysis(self, text: str) -> str:
127
- """Analyze text using Gemini API."""
128
  try:
 
129
  prompt = f"""Analyze this resume text and provide a brief summary of key skills and experience:
130
  {text[:1000]}..."""
131
- response = genai.generate_text(prompt=prompt)
132
  return response.text
133
  except Exception as e:
134
  self.logger.error(f"Error in Gemini analysis: {str(e)}")
135
  return "AI analysis failed"
136
 
137
- def process_file(self, file: gr.File, job_desc: str) -> dict:
138
- """Process a single resume file."""
139
  try:
140
- # Handle file input correctly using `file.name` and `.read()`
141
- file_content = file.read() # This is the correct way to read the file content in Gradio
142
-
143
- # Save the uploaded file content to a temporary file
144
- with tempfile.NamedTemporaryFile(delete=False, suffix=Path(file.name).suffix) as temp_file:
145
- temp_file.write(file_content) # Write content to the temporary file
146
- temp_path = temp_file.name
147
-
148
  # Extract text based on file type
149
- if file.name.lower().endswith('.pdf'):
150
- text = self.extract_text_from_pdf(temp_path)
151
- elif file.name.lower().endswith('.docx'):
152
- text = self.extract_text_from_docx(temp_path)
153
  else:
154
- return {"Resume": file.name, "Match Percentage": "Invalid File Type"}
155
-
156
- # Clean up the temporary file after processing
157
- os.unlink(temp_path)
158
 
159
  if not text.strip():
160
- return {"Resume": file.name, "Match Percentage": "No text extracted"}
161
 
162
- # Further processing like calculating match percentage and analysis
163
  entities = self.extract_named_entities(text)
164
  job_entities = self.extract_named_entities(job_desc)
165
 
@@ -172,18 +147,17 @@ class ResumeAnalyzer:
172
  gemini_analysis = self.gemini_analysis(text)
173
 
174
  return {
175
- "Resume": file.name,
176
  "Match Percentage": round(match_percentage, 2),
177
  "Entity Match (%)": round(entity_match, 2),
178
  "AI Analysis": gemini_analysis
179
  }
180
 
181
  except Exception as e:
182
- self.logger.error(f"Error processing file {file.name}: {str(e)}")
183
- return {"Resume": file.name, "Error": str(e)}
184
 
185
- def process_uploaded_resumes(self, resume_files: List[gr.File], job_desc: str) -> pd.DataFrame:
186
- """Process multiple resume files."""
187
  if not resume_files:
188
  return pd.DataFrame({"Message": ["Please upload at least one resume."]})
189
 
@@ -191,8 +165,8 @@ class ResumeAnalyzer:
191
  return pd.DataFrame({"Message": ["Please provide a job description."]})
192
 
193
  results = []
194
- for file in resume_files:
195
- result = self.process_file(file, job_desc)
196
  results.append(result)
197
 
198
  return pd.DataFrame(results)
@@ -203,10 +177,10 @@ analyzer = ResumeAnalyzer()
203
  interface = gr.Interface(
204
  fn=analyzer.process_uploaded_resumes,
205
  inputs=[
206
- gr.Files(
207
  label="Upload Resumes (PDF or DOCX)",
208
  file_types=[".pdf", ".docx"],
209
- type="filepath"
210
  ),
211
  gr.Textbox(
212
  label="Job Description",
@@ -227,6 +201,5 @@ interface = gr.Interface(
227
  theme=gr.themes.Soft()
228
  )
229
 
230
- # Launch the interface
231
  if __name__ == "__main__":
232
- interface.launch()
 
10
  from nltk.corpus import stopwords
11
  from nltk.tokenize import word_tokenize
12
  from nltk.stem import WordNetLemmatizer
13
+ import fitz
14
  from typing import List, Dict, Set
15
  import docx
16
  import tempfile
17
+ from pathlib import Path
18
 
 
19
  class ResumeAnalyzer:
20
  def __init__(self):
 
21
  self._initialize_logging()
22
  self._initialize_nltk()
23
  self._initialize_spacy()
24
  self._setup_api_key()
25
 
26
  def _initialize_logging(self):
 
27
  self.logger = logging.getLogger(__name__)
28
  logging.basicConfig(
29
  level=logging.INFO,
 
31
  )
32
 
33
  def _initialize_nltk(self) -> None:
 
34
  try:
35
  nltk.data.path.append(os.getcwd())
36
  for resource in ['punkt', 'stopwords', 'wordnet']:
 
45
  raise
46
 
47
  def _initialize_spacy(self) -> None:
 
48
  try:
49
  self.nlp = spacy.load("en_core_web_sm")
50
  except OSError:
 
54
  self.nlp = spacy.load("en_core_web_sm")
55
 
56
  def _setup_api_key(self) -> None:
 
57
  try:
58
  self.google_api_key = os.environ.get("GOOGLE_API_KEY")
59
  if not self.google_api_key:
 
64
  raise
65
 
66
  def extract_text_from_pdf(self, file_path: str) -> str:
 
67
  try:
68
  with fitz.open(file_path) as doc:
69
+ text = " ".join(page.get_text() for page in doc)
70
  return text
71
  except Exception as e:
72
  self.logger.error(f"Error extracting text from PDF: {str(e)}")
73
  return ""
74
 
75
  def extract_text_from_docx(self, file_path: str) -> str:
 
76
  try:
77
  doc = docx.Document(file_path)
78
  return "\n".join(para.text for para in doc.paragraphs)
 
81
  return ""
82
 
83
  def preprocess_text(self, text: str) -> str:
 
84
  try:
85
  text = text.lower()
86
  text = re.sub(r'\s+', ' ', text)
 
95
  return text
96
 
97
  def extract_named_entities(self, text: str) -> Set[str]:
 
98
  try:
 
99
  doc = self.nlp(text[:100000])
100
+ return {ent.text.lower() for ent in doc.ents}
101
  except Exception as e:
102
  self.logger.error(f"Error in named entity extraction: {str(e)}")
103
  return set()
104
 
105
  def calculate_match_percentage(self, resume_text: str, job_desc_text: str) -> float:
 
106
  try:
107
  resume_text = self.preprocess_text(resume_text)
108
  job_desc_text = self.preprocess_text(job_desc_text)
109
+ return fuzz.token_set_ratio(resume_text, job_desc_text)
110
  except Exception as e:
111
  self.logger.error(f"Error calculating match percentage: {str(e)}")
112
  return 0.0
113
 
114
  def gemini_analysis(self, text: str) -> str:
 
115
  try:
116
+ model = genai.GenerativeModel('gemini-pro')
117
  prompt = f"""Analyze this resume text and provide a brief summary of key skills and experience:
118
  {text[:1000]}..."""
119
+ response = model.generate_content(prompt)
120
  return response.text
121
  except Exception as e:
122
  self.logger.error(f"Error in Gemini analysis: {str(e)}")
123
  return "AI analysis failed"
124
 
125
+ def process_file(self, file_path: str, job_desc: str) -> dict:
 
126
  try:
 
 
 
 
 
 
 
 
127
  # Extract text based on file type
128
+ if file_path.lower().endswith('.pdf'):
129
+ text = self.extract_text_from_pdf(file_path)
130
+ elif file_path.lower().endswith('.docx'):
131
+ text = self.extract_text_from_docx(file_path)
132
  else:
133
+ return {"Resume": Path(file_path).name, "Match Percentage": "Invalid File Type"}
 
 
 
134
 
135
  if not text.strip():
136
+ return {"Resume": Path(file_path).name, "Match Percentage": "No text extracted"}
137
 
 
138
  entities = self.extract_named_entities(text)
139
  job_entities = self.extract_named_entities(job_desc)
140
 
 
147
  gemini_analysis = self.gemini_analysis(text)
148
 
149
  return {
150
+ "Resume": Path(file_path).name,
151
  "Match Percentage": round(match_percentage, 2),
152
  "Entity Match (%)": round(entity_match, 2),
153
  "AI Analysis": gemini_analysis
154
  }
155
 
156
  except Exception as e:
157
+ self.logger.error(f"Error processing file {file_path}: {str(e)}")
158
+ return {"Resume": Path(file_path).name, "Error": str(e)}
159
 
160
+ def process_uploaded_resumes(self, resume_files: List[str], job_desc: str) -> pd.DataFrame:
 
161
  if not resume_files:
162
  return pd.DataFrame({"Message": ["Please upload at least one resume."]})
163
 
 
165
  return pd.DataFrame({"Message": ["Please provide a job description."]})
166
 
167
  results = []
168
+ for file_path in resume_files:
169
+ result = self.process_file(file_path, job_desc)
170
  results.append(result)
171
 
172
  return pd.DataFrame(results)
 
177
  interface = gr.Interface(
178
  fn=analyzer.process_uploaded_resumes,
179
  inputs=[
180
+ gr.File(
181
  label="Upload Resumes (PDF or DOCX)",
182
  file_types=[".pdf", ".docx"],
183
+ multiple=True
184
  ),
185
  gr.Textbox(
186
  label="Job Description",
 
201
  theme=gr.themes.Soft()
202
  )
203
 
 
204
  if __name__ == "__main__":
205
+ interface.launch()