Anupam007 commited on
Commit
9dd0e86
·
verified ·
1 Parent(s): b08d49c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +160 -2
app.py CHANGED
@@ -46,6 +46,164 @@ def initialize_model():
46
 
47
  model = initialize_model()
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  # Function to extract text from a PDF resume
50
  def extract_resume_text(pdf_file_path):
51
  logging.info("Extracting resume text")
@@ -288,7 +446,7 @@ def calculate_match_score(resume_text, job_description):
288
  ])])
289
  if not skills_section:
290
  skills_section = resume_text.lower()
291
- logging.warning("No specific skills section found, using full resume text for matching")
292
 
293
  resume_embedding = model.encode(skills_section, convert_to_tensor=True)
294
  job_embedding = model.encode(job_description, convert_to_tensor=True)
@@ -665,7 +823,7 @@ def gradio_interface(resume_file, job_title, location, user_email, user_password
665
  # Setup instructions for Gradio in Colab
666
  def setup_and_run():
667
  print("Installing dependencies...")
668
- # !pip install PyPDF2 beautifulsoup4 sentence-transformers scikit-learn torch numpy pandas requests gradio
669
  print("Starting Gradio interface...")
670
 
671
  iface = gr.Interface(
 
46
 
47
  model = initialize_model()
48
 
49
+ # Function to extract text from a PDF resume
50
+ def extract_resume_text(pdf_file_path):
51
+ logging.info("Extracting resume text")
52
+ try:
53
+ with open(pdf_file_path, 'rb') as f:
54
+ pdf_reader = PdfReader(f)
55
+ text = ""
56
+ for page in pdf_reader.pages:
57
+ extracted = page.extract_text()
58
+ if extracted:
59
+ text += extracted
60
+ if not text.strip():
61
+ raise Exception("No text extracted from PDF. Ensure the PDF is not image-based.")
62
+ logging.info(f"Extracted resume text (first 200 chars): {text[:200]}")
63
+ return text
64
+ except Exception as e:
65
+ logging.error(f"Error extracting text from PDF: {str(e)}")
66
+ raise Exception(f"Error extracting text from PDF: {str(e)}")
67
+
68
+ # Function to parse resume and extract key information
69
+ def parse_resume(resume_text):
70
+ logging.info("Parsing resume")
71
+ parsed_info = {
72
+ "skills": [],
73
+ "education": [],
74
+ "experience": [],
75
+ "personal_info": {},
76
+ "react_experience": "0",
77
+ "redux_experience": "0",
78
+ "javascript_experience": "0",
79
+ "education_details": [],
80
+ "work_history": []
81
+ }
82
+
83
+ # Split resume into sections based on candidate headers
84
+ candidate_pattern = r'(IM A\. SAMPLE [IVX]+)\s*'
85
+ candidate_sections = re.split(candidate_pattern, resume_text, flags=re.IGNORECASE)
86
+ candidates = []
87
+ for i in range(1, len(candidate_sections), 2):
88
+ candidates.append((candidate_sections[i], candidate_sections[i+1]))
89
+
90
+ if not candidates:
91
+ candidates = [("Unknown Candidate", resume_text)]
92
+
93
+ candidate_name, candidate_text = candidates[0]
94
+ parsed_info["personal_info"]["name"] = candidate_name.strip()
95
+ logging.info(f"Parsed candidate name: {candidate_name}")
96
+
97
+ # Extract email
98
+ email_pattern = r'[\w\.-]+@[\w\.-]+\.\w+'
99
+ email_matches = re.findall(email_pattern, candidate_text, re.IGNORECASE)
100
+ if email_matches:
101
+ parsed_info["personal_info"]["email"] = email_matches[0]
102
+ else:
103
+ logging.warning("No email found in resume")
104
+
105
+ # Extract phone number
106
+ phone_pattern = r'\(?\d{3}\)?[\s.-]?\d{3}[\s.-]?\d{4}'
107
+ phone_matches = re.findall(phone_pattern, candidate_text)
108
+ if phone_matches:
109
+ parsed_info["personal_info"]["phone"] = phone_matches[0]
110
+ else:
111
+ logging.warning("No phone number found in resume")
112
+
113
+ # Extract address
114
+ address_pattern = r'(\d+\s+[A-Za-z\s]+,\s*[A-Za-z\s]+,\s*[A-Z]{2}\s*\d{5})'
115
+ address_matches = re.findall(address_pattern, candidate_text, re.IGNORECASE)
116
+ if address_matches:
117
+ parsed_info["personal_info"]["address"] = address_matches[0]
118
+ else:
119
+ parsed_info["personal_info"]["address"] = "Not found"
120
+ logging.warning("No address found in resume")
121
+
122
+ # Extract skills (expanded list and more permissive matching)
123
+ skill_keywords = [
124
+ "python", "java", "javascript", "html", "css", "sql", "react",
125
+ "node", "aws", "azure", "docker", "git", "c++", "visual basic",
126
+ "perl", "asp", "php", "cobol", "xml", "asp.net", "quickbooks",
127
+ "ms office", "ms access", "spss", "typescript", "angular", "vue",
128
+ "mysql", "mongodb", "linux", "bash", "kubernetes", "jenkins"
129
+ ]
130
+ resume_lower = candidate_text.lower()
131
+ for skill in skill_keywords:
132
+ if skill.lower() in resume_lower or f"{skill.lower()} " in resume_lower:
133
+ parsed_info["skills"].append(skill)
134
+ if not parsed_info["skills"]:
135
+ logging.warning("No skills extracted from resume")
136
+
137
+ # Extract specific experience
138
+ patterns = {
139
+ "react_experience": r'(\d+)[\s\+]*(years?|yrs?)[\s\+]*(?:of)?[\s\+]*(?:experience)?[\s\+]*(?:with|in)?[\s\+]*React',
140
+ "redux_experience": r'(\d+)[\s\+]*(years?|yrs?)[\s\+]*(?:of)?[\s\+]*(?:experience)?[\s\+]*(?:with|in)?[\s\+]*Redux',
141
+ "javascript_experience": r'(\d+)[\s\+]*(years?|yrs?)[\s\+]*(?:of)?[\s\+]*(?:experience)?[\s\+]*(?:with|in)?[\s\+]*(?:JavaScript|JS)'
142
+ }
143
+
144
+ for key, pattern in patterns.items():
145
+ matches = re.findall(pattern, candidate_text, re.IGNORECASE)
146
+ ifर्म
147
+
148
+ System: It looks like the provided code was cut off. I'll complete the `app.py` code, ensuring the fix for the `ImportError` related to `cached_download` by pinning compatible versions of `sentence-transformers` and `huggingface_hub` in the `setup_and_run` function. The rest of the code will remain consistent with the previous version, including the fix for the `IndentationError` (correcting `utput` to `output`). I'll also ensure the code is complete and properly formatted for use in a Hugging Face Space or similar environment.
149
+
150
+ ### Explanation of Changes
151
+ 1. **Pinned Dependencies**: In the `setup_and_run` function, I updated the `pip install` command to explicitly install `sentence-transformers==2.2.2` and `huggingface_hub==0.7.0`. These versions are compatible, as `huggingface_hub==0.7.0` still includes the `cached_download` function required by `sentence-transformers==2.2.2`.
152
+ 2. **Retained Previous Fix**: The `format_results` function retains the correction from `utput` to `output` to prevent the `IndentationError`.
153
+ 3. **Complete Code**: The code is provided in full to ensure no truncation occurs, covering all functions from your original `app.py`.
154
+ 4. **Environment Considerations**: The code includes logic for running in Google Colab (e.g., `files.download`), but it should work in a Hugging Face Space with the pinned dependencies. If running outside Colab, you may need to adjust the `files.download` logic or mock it.
155
+
156
+ ### Updated Code
157
+
158
+ <xaiArtifact artifact_id="44e9cd70-9153-4e94-9962-aa9dfcd076ae" artifact_version_id="abe337a8-8ff0-4f13-bf78-329d64463346" title="app.py" contentType="text/python">
159
+ import os
160
+ import io
161
+ import re
162
+ import json
163
+ import random
164
+ import time
165
+ import smtplib
166
+ import requests
167
+ import numpy as np
168
+ import pandas as pd
169
+ from email.mime.text import MIMEText
170
+ from email.mime.multipart import MIMEMultipart
171
+ from email.mime.application import MIMEApplication
172
+ from datetime import datetime, timedelta
173
+ from PyPDF2 import PdfReader
174
+ from bs4 import BeautifulSoup
175
+ from sentence_transformers import SentenceTransformer
176
+ from sklearn.metrics.pairwise import cosine_similarity
177
+ import torch
178
+ import logging
179
+ import gradio as gr
180
+
181
+ # Set up logging
182
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
183
+ log_file = os.path.join(os.getcwd(), "application_log.txt") # Relative path
184
+ logging.getLogger().addHandler(logging.FileHandler(log_file))
185
+
186
+ # Set up GPU if available
187
+ if torch.cuda.is_available():
188
+ device = torch.device("cuda")
189
+ logging.info(f"Using GPU: {torch.cuda.get_device_name(0)}")
190
+ else:
191
+ device = torch.device("cpu")
192
+ logging.info("GPU not available, using CPU instead")
193
+
194
+ # Initialize the sentence transformer model
195
+ @torch.no_grad()
196
+ def initialize_model():
197
+ logging.info("Initializing sentence transformer model")
198
+ try:
199
+ model = SentenceTransformer('paraphrase-MiniLM-L6-v2', device=device)
200
+ return model
201
+ except Exception as e:
202
+ logging.error(f"Failed to initialize model: {str(e)}")
203
+ raise
204
+
205
+ model = initialize_model()
206
+
207
  # Function to extract text from a PDF resume
208
  def extract_resume_text(pdf_file_path):
209
  logging.info("Extracting resume text")
 
446
  ])])
447
  if not skills_section:
448
  skills_section = resume_text.lower()
449
+ logging.warning("No specific skills section found, using full resume text to match")
450
 
451
  resume_embedding = model.encode(skills_section, convert_to_tensor=True)
452
  job_embedding = model.encode(job_description, convert_to_tensor=True)
 
823
  # Setup instructions for Gradio in Colab
824
  def setup_and_run():
825
  print("Installing dependencies...")
826
+ # !pip install PyPDF2==3.0.1 beautifulsoup4==4.12.2 sentence-transformers==2.2.2 huggingface_hub==0.7.0 scikit-learn==1.5.0 torch==2.0.1 numpy==1.26.4 pandas==2.2.2 requests==2.31.0 gradio==4.31.0
827
  print("Starting Gradio interface...")
828
 
829
  iface = gr.Interface(