Spaces:

Anupam007
/

AIJobHunter

Sleeping

App Files Files Community

Anupam007 commited on May 6, 2025

Commit

9dd0e86

verified ·

1 Parent(s): b08d49c

Update app.py

Browse files

Files changed (1) hide show

app.py +160 -2

app.py CHANGED Viewed

@@ -46,6 +46,164 @@ def initialize_model():
 model = initialize_model()
 # Function to extract text from a PDF resume
 def extract_resume_text(pdf_file_path):
     logging.info("Extracting resume text")
@@ -288,7 +446,7 @@ def calculate_match_score(resume_text, job_description):
         ])])
         if not skills_section:
             skills_section = resume_text.lower()
-            logging.warning("No specific skills section found, using full resume text for matching")
         resume_embedding = model.encode(skills_section, convert_to_tensor=True)
         job_embedding = model.encode(job_description, convert_to_tensor=True)
@@ -665,7 +823,7 @@ def gradio_interface(resume_file, job_title, location, user_email, user_password
 # Setup instructions for Gradio in Colab
 def setup_and_run():
     print("Installing dependencies...")
-    # !pip install PyPDF2 beautifulsoup4 sentence-transformers scikit-learn torch numpy pandas requests gradio
     print("Starting Gradio interface...")
     iface = gr.Interface(

 model = initialize_model()
+# Function to extract text from a PDF resume
+def extract_resume_text(pdf_file_path):
+    logging.info("Extracting resume text")
+    try:
+        with open(pdf_file_path, 'rb') as f:
+            pdf_reader = PdfReader(f)
+            text = ""
+            for page in pdf_reader.pages:
+                extracted = page.extract_text()
+                if extracted:
+                    text += extracted
+            if not text.strip():
+                raise Exception("No text extracted from PDF. Ensure the PDF is not image-based.")
+            logging.info(f"Extracted resume text (first 200 chars): {text[:200]}")
+            return text
+    except Exception as e:
+        logging.error(f"Error extracting text from PDF: {str(e)}")
+        raise Exception(f"Error extracting text from PDF: {str(e)}")
+# Function to parse resume and extract key information
+def parse_resume(resume_text):
+    logging.info("Parsing resume")
+    parsed_info = {
+        "skills": [],
+        "education": [],
+        "experience": [],
+        "personal_info": {},
+        "react_experience": "0",
+        "redux_experience": "0",
+        "javascript_experience": "0",
+        "education_details": [],
+        "work_history": []
+    }
+    # Split resume into sections based on candidate headers
+    candidate_pattern = r'(IM A\. SAMPLE [IVX]+)\s*'
+    candidate_sections = re.split(candidate_pattern, resume_text, flags=re.IGNORECASE)
+    candidates = []
+    for i in range(1, len(candidate_sections), 2):
+        candidates.append((candidate_sections[i], candidate_sections[i+1]))
+    if not candidates:
+        candidates = [("Unknown Candidate", resume_text)]
+    candidate_name, candidate_text = candidates[0]
+    parsed_info["personal_info"]["name"] = candidate_name.strip()
+    logging.info(f"Parsed candidate name: {candidate_name}")
+    # Extract email
+    email_pattern = r'[\w\.-]+@[\w\.-]+\.\w+'
+    email_matches = re.findall(email_pattern, candidate_text, re.IGNORECASE)
+    if email_matches:
+        parsed_info["personal_info"]["email"] = email_matches[0]
+    else:
+        logging.warning("No email found in resume")
+    # Extract phone number
+    phone_pattern = r'\(?\d{3}\)?[\s.-]?\d{3}[\s.-]?\d{4}'
+    phone_matches = re.findall(phone_pattern, candidate_text)
+    if phone_matches:
+        parsed_info["personal_info"]["phone"] = phone_matches[0]
+    else:
+        logging.warning("No phone number found in resume")
+    # Extract address
+    address_pattern = r'(\d+\s+[A-Za-z\s]+,\s*[A-Za-z\s]+,\s*[A-Z]{2}\s*\d{5})'
+    address_matches = re.findall(address_pattern, candidate_text, re.IGNORECASE)
+    if address_matches:
+        parsed_info["personal_info"]["address"] = address_matches[0]
+    else:
+        parsed_info["personal_info"]["address"] = "Not found"
+        logging.warning("No address found in resume")
+    # Extract skills (expanded list and more permissive matching)
+    skill_keywords = [
+        "python", "java", "javascript", "html", "css", "sql", "react",
+        "node", "aws", "azure", "docker", "git", "c++", "visual basic",
+        "perl", "asp", "php", "cobol", "xml", "asp.net", "quickbooks",
+        "ms office", "ms access", "spss", "typescript", "angular", "vue",
+        "mysql", "mongodb", "linux", "bash", "kubernetes", "jenkins"
+    ]
+    resume_lower = candidate_text.lower()
+    for skill in skill_keywords:
+        if skill.lower() in resume_lower or f"{skill.lower()} " in resume_lower:
+            parsed_info["skills"].append(skill)
+    if not parsed_info["skills"]:
+        logging.warning("No skills extracted from resume")
+    # Extract specific experience
+    patterns = {
+        "react_experience": r'(\d+)[\s\+]*(years?|yrs?)[\s\+]*(?:of)?[\s\+]*(?:experience)?[\s\+]*(?:with|in)?[\s\+]*React',
+        "redux_experience": r'(\d+)[\s\+]*(years?|yrs?)[\s\+]*(?:of)?[\s\+]*(?:experience)?[\s\+]*(?:with|in)?[\s\+]*Redux',
+        "javascript_experience": r'(\d+)[\s\+]*(years?|yrs?)[\s\+]*(?:of)?[\s\+]*(?:experience)?[\s\+]*(?:with|in)?[\s\+]*(?:JavaScript|JS)'
+    }
+    for key, pattern in patterns.items():
+        matches = re.findall(pattern, candidate_text, re.IGNORECASE)
+        ifर्म
+System: It looks like the provided code was cut off. I'll complete the `app.py` code, ensuring the fix for the `ImportError` related to `cached_download` by pinning compatible versions of `sentence-transformers` and `huggingface_hub` in the `setup_and_run` function. The rest of the code will remain consistent with the previous version, including the fix for the `IndentationError` (correcting `utput` to `output`). I'll also ensure the code is complete and properly formatted for use in a Hugging Face Space or similar environment.
+### Explanation of Changes
+1. **Pinned Dependencies**: In the `setup_and_run` function, I updated the `pip install` command to explicitly install `sentence-transformers==2.2.2` and `huggingface_hub==0.7.0`. These versions are compatible, as `huggingface_hub==0.7.0` still includes the `cached_download` function required by `sentence-transformers==2.2.2`.
+2. **Retained Previous Fix**: The `format_results` function retains the correction from `utput` to `output` to prevent the `IndentationError`.
+3. **Complete Code**: The code is provided in full to ensure no truncation occurs, covering all functions from your original `app.py`.
+4. **Environment Considerations**: The code includes logic for running in Google Colab (e.g., `files.download`), but it should work in a Hugging Face Space with the pinned dependencies. If running outside Colab, you may need to adjust the `files.download` logic or mock it.
+### Updated Code
+<xaiArtifact artifact_id="44e9cd70-9153-4e94-9962-aa9dfcd076ae" artifact_version_id="abe337a8-8ff0-4f13-bf78-329d64463346" title="app.py" contentType="text/python">
+import os
+import io
+import re
+import json
+import random
+import time
+import smtplib
+import requests
+import numpy as np
+import pandas as pd
+from email.mime.text import MIMEText
+from email.mime.multipart import MIMEMultipart
+from email.mime.application import MIMEApplication
+from datetime import datetime, timedelta
+from PyPDF2 import PdfReader
+from bs4 import BeautifulSoup
+from sentence_transformers import SentenceTransformer
+from sklearn.metrics.pairwise import cosine_similarity
+import torch
+import logging
+import gradio as gr
+# Set up logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+log_file = os.path.join(os.getcwd(), "application_log.txt")  # Relative path
+logging.getLogger().addHandler(logging.FileHandler(log_file))
+# Set up GPU if available
+if torch.cuda.is_available():
+    device = torch.device("cuda")
+    logging.info(f"Using GPU: {torch.cuda.get_device_name(0)}")
+else:
+    device = torch.device("cpu")
+    logging.info("GPU not available, using CPU instead")
+# Initialize the sentence transformer model
+@torch.no_grad()
+def initialize_model():
+    logging.info("Initializing sentence transformer model")
+    try:
+        model = SentenceTransformer('paraphrase-MiniLM-L6-v2', device=device)
+        return model
+    except Exception as e:
+        logging.error(f"Failed to initialize model: {str(e)}")
+        raise
+model = initialize_model()
 # Function to extract text from a PDF resume
 def extract_resume_text(pdf_file_path):
     logging.info("Extracting resume text")
         ])])
         if not skills_section:
             skills_section = resume_text.lower()
+            logging.warning("No specific skills section found, using full resume text to match")
         resume_embedding = model.encode(skills_section, convert_to_tensor=True)
         job_embedding = model.encode(job_description, convert_to_tensor=True)
 # Setup instructions for Gradio in Colab
 def setup_and_run():
     print("Installing dependencies...")
+    # !pip install PyPDF2==3.0.1 beautifulsoup4==4.12.2 sentence-transformers==2.2.2 huggingface_hub==0.7.0 scikit-learn==1.5.0 torch==2.0.1 numpy==1.26.4 pandas==2.2.2 requests==2.31.0 gradio==4.31.0
     print("Starting Gradio interface...")
     iface = gr.Interface(