Spaces:
Runtime error
Runtime error
| import google.generativeai as genai | |
| import fitz # PyMuPDF for PDF text extraction | |
| import streamlit as st | |
| import spacy | |
| from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline | |
| from docx import Document | |
| import re | |
| import dateparser | |
| from datetime import datetime | |
| import os | |
| from typing import List, Dict | |
| import logging | |
| # Configure logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| # Load SpaCy model for dependency parsing and NER | |
| nlp_spacy = spacy.load('en_core_web_sm') | |
| # Load the NER model | |
| tokenizer = AutoTokenizer.from_pretrained("Babelscape/wikineural-multilingual-ner") | |
| model = AutoModelForTokenClassification.from_pretrained("Babelscape/wikineural-multilingual-ner") | |
| nlp_ner = pipeline('ner', model=model, tokenizer=tokenizer, aggregation_strategy="simple") | |
| def authenticate_gemini() -> genai.GenerativeModel: | |
| api_key = "AIzaSyCG-qpFRqJc0QOJT-AcAaO5XIEdE-nk3Tc" | |
| if not api_key: | |
| st.error("Google Gemini API key not found. Please set it in the Hugging Face Spaces secrets.") | |
| return None | |
| try: | |
| genai.configure(api_key=api_key) | |
| model = genai.GenerativeModel(model_name="gemini-pro") | |
| st.success("Gemini API successfully configured.") | |
| return model | |
| except Exception as e: | |
| logger.error(f"Error configuring Gemini API: {e}") | |
| st.error(f"Error configuring Gemini API. Please check your API key and try again.") | |
| return None | |
| def refine_org_entities(entities: List[str]) -> List[str]: | |
| refined_entities = set() | |
| company_suffixes = ['Inc', 'LLC', 'Corporation', 'Corp', 'Ltd', 'Co', 'GmbH', 'S.A.', 'Company', 'Group'] | |
| for entity in entities: | |
| # Remove common prefixes that might interfere with company names | |
| entity = re.sub(r'^(The|A|An)\s+', '', entity).strip() | |
| if any(entity.endswith(suffix) for suffix in company_suffixes): | |
| refined_entities.add(entity) | |
| elif re.match(r'([A-Z][a-z]+\s?)+', entity): # Match sequences of capitalized words | |
| refined_entities.add(entity) | |
| return list(refined_entities) | |
| def extract_orgs(text: str) -> List[str]: | |
| ner_results = nlp_ner(text) | |
| orgs = set() | |
| for entity in ner_results: | |
| if entity['entity_group'] == 'ORG': | |
| orgs.add(entity['word']) | |
| return refine_org_entities(orgs) | |
| def extract_text_from_pdf(pdf_file) -> str: | |
| try: | |
| doc = fitz.open(stream=pdf_file.read(), filetype="pdf") | |
| text = "" | |
| for page_num in range(doc.page_count): | |
| page = doc.load_page(page_num) | |
| text += page.get_text() | |
| return text | |
| except Exception as e: | |
| logger.error(f"Error extracting text from PDF: {e}") | |
| return "" | |
| def extract_text_from_doc(doc_file) -> str: | |
| try: | |
| doc = Document(doc_file) | |
| text = '\n'.join([para.text for para in doc.paragraphs]) | |
| return text | |
| except Exception as e: | |
| logger.error(f"Error extracting text from DOCX: {e}") | |
| return "" | |
| def generate_summary(text: str, model: genai.GenerativeModel) -> str: | |
| prompt = f"Summarize the following resume in 100 words, highlighting key skills and experiences:\n\n{text}" | |
| try: | |
| response = model.generate_content(prompt) | |
| return response.text | |
| except Exception as e: | |
| logger.error(f"Error generating summary: {e}") | |
| return "Error generating summary. Please try again." | |
| def extract_experience(text: str) -> str: | |
| # Patterns to match experience in years and months | |
| experience_patterns = [ | |
| r'(\d+)\s*(?:years?|yrs?)', # e.g., 5 years, 2 yrs | |
| r'(\d+)\s*(?:months?|mos?)', # e.g., 6 months | |
| r'(\d+)\s*(?:years?|yrs?)\s*(?:and)?\s*(\d+)\s*(?:months?|mos?)' # e.g., 2 years and 6 months | |
| ] | |
| # Extract and prioritize years of experience | |
| total_years = 0 | |
| for pattern in experience_patterns: | |
| matches = re.findall(pattern, text, re.IGNORECASE) | |
| for match in matches: | |
| if len(match) == 1: # Only years or months | |
| value = int(match[0]) | |
| if 'year' in pattern: | |
| total_years += value | |
| # We ignore months in this case | |
| elif len(match) == 2: # Years and months | |
| years, _ = int(match[0]), int(match[1]) | |
| total_years += years | |
| # Return only the number of years (ignore months) | |
| if total_years > 0: | |
| return f"{total_years} years" | |
| else: | |
| return "Experience not found" | |
| def extract_phone(text: str) -> str: | |
| phone_patterns = [ | |
| r'\b(?:\+?1[-.\s]?)?(?:\(\d{3}\)|\d{3})[-.\s]?\d{3}[-.\s]?\d{4}\b', | |
| r'\b\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b' | |
| ] | |
| for pattern in phone_patterns: | |
| match = re.search(pattern, text) | |
| if match: | |
| return match.group() | |
| return "Not found" | |
| def extract_email(text: str) -> str: | |
| email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b' | |
| match = re.search(email_pattern, text) | |
| return match.group() if match else "Not found" | |
| def extract_colleges(doc) -> List[str]: | |
| colleges = set() | |
| edu_keywords = ["university", "college", "institute", "school"] | |
| for ent in doc.ents: | |
| if ent.label_ == "ORG" and any(keyword in ent.text.lower() for keyword in edu_keywords): | |
| colleges.add(ent.text) | |
| return list(colleges) | |
| def extract_linkedin(text: str) -> str: | |
| linkedin_patterns = [ | |
| r'(?:https?:)?\/\/(?:[\w]+\.)?linkedin\.com\/in\/[A-z0-9_-]+\/?', | |
| r'linkedin\.com\/in\/[A-z0-9_-]+', | |
| r'@[A-z0-9_-]+\s+\(LinkedIn\)' | |
| ] | |
| for pattern in linkedin_patterns: | |
| match = re.search(pattern, text, re.IGNORECASE) | |
| if match: | |
| return match.group() | |
| return "Not found" | |
| def analyze_resume(text: str, model: genai.GenerativeModel) -> Dict: | |
| doc = nlp_spacy(text) | |
| return { | |
| "companies": extract_orgs(text), | |
| "summary": generate_summary(text, model), | |
| "experience": extract_experience(text), | |
| "phone": extract_phone(text), | |
| "email": extract_email(text), | |
| "colleges": extract_colleges(doc), | |
| "linkedin": extract_linkedin(text) | |
| } | |
| def main(): | |
| st.title("Enhanced Resume Analyzer") | |
| st.write("Upload a resume to extract information, generate a summary, and analyze details.") | |
| model = authenticate_gemini() | |
| if model is None: | |
| return | |
| uploaded_file = st.file_uploader("Choose a PDF or DOCX file", type=["pdf", "docx", "doc"]) | |
| if uploaded_file is not None: | |
| try: | |
| file_ext = uploaded_file.name.split('.')[-1].lower() | |
| if file_ext == 'pdf': | |
| resume_text = extract_text_from_pdf(uploaded_file) | |
| elif file_ext in ['docx', 'doc']: | |
| resume_text = extract_text_from_doc(uploaded_file) | |
| else: | |
| st.error("Unsupported file format.") | |
| return | |
| if not resume_text.strip(): | |
| st.error("The resume appears to be empty or couldn't be read.") | |
| return | |
| with st.spinner("Analyzing resume..."): | |
| results = analyze_resume(resume_text, model) | |
| st.subheader("Extracted Information") | |
| st.write(f"Experience: {results['experience']}") | |
| st.write("Companies Worked For:") | |
| st.write(", ".join(results['companies'])) | |
| st.write(f"Phone Number: {results['phone']}") | |
| st.write(f"Email ID: {results['email']}") | |
| st.write("Colleges Attended:") | |
| st.write(", ".join(results['colleges'])) | |
| st.write(f"LinkedIn: {results['linkedin']}") | |
| st.subheader("Generated Summary") | |
| st.write(results['summary']) | |
| except Exception as e: | |
| logger.error(f"Error during resume analysis: {e}") | |
| st.error("An error occurred during resume analysis. Please try again or contact support if the issue persists.") | |
| if __name__ == "__main__": | |
| main() |