Spaces:

Anupam007
/

AIJobHunter

Sleeping

App Files Files Community

Anupam007 commited on May 6, 2025

Commit

5049b63

verified ·

1 Parent(s): 9dd0e86

Update app.py

Browse files

Files changed (1) hide show

app.py +3 -161

app.py CHANGED Viewed

@@ -46,164 +46,6 @@ def initialize_model():
 model = initialize_model()
-# Function to extract text from a PDF resume
-def extract_resume_text(pdf_file_path):
-    logging.info("Extracting resume text")
-    try:
-        with open(pdf_file_path, 'rb') as f:
-            pdf_reader = PdfReader(f)
-            text = ""
-            for page in pdf_reader.pages:
-                extracted = page.extract_text()
-                if extracted:
-                    text += extracted
-            if not text.strip():
-                raise Exception("No text extracted from PDF. Ensure the PDF is not image-based.")
-            logging.info(f"Extracted resume text (first 200 chars): {text[:200]}")
-            return text
-    except Exception as e:
-        logging.error(f"Error extracting text from PDF: {str(e)}")
-        raise Exception(f"Error extracting text from PDF: {str(e)}")
-# Function to parse resume and extract key information
-def parse_resume(resume_text):
-    logging.info("Parsing resume")
-    parsed_info = {
-        "skills": [],
-        "education": [],
-        "experience": [],
-        "personal_info": {},
-        "react_experience": "0",
-        "redux_experience": "0",
-        "javascript_experience": "0",
-        "education_details": [],
-        "work_history": []
-    }
-    # Split resume into sections based on candidate headers
-    candidate_pattern = r'(IM A\. SAMPLE [IVX]+)\s*'
-    candidate_sections = re.split(candidate_pattern, resume_text, flags=re.IGNORECASE)
-    candidates = []
-    for i in range(1, len(candidate_sections), 2):
-        candidates.append((candidate_sections[i], candidate_sections[i+1]))
-    if not candidates:
-        candidates = [("Unknown Candidate", resume_text)]
-    candidate_name, candidate_text = candidates[0]
-    parsed_info["personal_info"]["name"] = candidate_name.strip()
-    logging.info(f"Parsed candidate name: {candidate_name}")
-    # Extract email
-    email_pattern = r'[\w\.-]+@[\w\.-]+\.\w+'
-    email_matches = re.findall(email_pattern, candidate_text, re.IGNORECASE)
-    if email_matches:
-        parsed_info["personal_info"]["email"] = email_matches[0]
-    else:
-        logging.warning("No email found in resume")
-    # Extract phone number
-    phone_pattern = r'\(?\d{3}\)?[\s.-]?\d{3}[\s.-]?\d{4}'
-    phone_matches = re.findall(phone_pattern, candidate_text)
-    if phone_matches:
-        parsed_info["personal_info"]["phone"] = phone_matches[0]
-    else:
-        logging.warning("No phone number found in resume")
-    # Extract address
-    address_pattern = r'(\d+\s+[A-Za-z\s]+,\s*[A-Za-z\s]+,\s*[A-Z]{2}\s*\d{5})'
-    address_matches = re.findall(address_pattern, candidate_text, re.IGNORECASE)
-    if address_matches:
-        parsed_info["personal_info"]["address"] = address_matches[0]
-    else:
-        parsed_info["personal_info"]["address"] = "Not found"
-        logging.warning("No address found in resume")
-    # Extract skills (expanded list and more permissive matching)
-    skill_keywords = [
-        "python", "java", "javascript", "html", "css", "sql", "react",
-        "node", "aws", "azure", "docker", "git", "c++", "visual basic",
-        "perl", "asp", "php", "cobol", "xml", "asp.net", "quickbooks",
-        "ms office", "ms access", "spss", "typescript", "angular", "vue",
-        "mysql", "mongodb", "linux", "bash", "kubernetes", "jenkins"
-    ]
-    resume_lower = candidate_text.lower()
-    for skill in skill_keywords:
-        if skill.lower() in resume_lower or f"{skill.lower()} " in resume_lower:
-            parsed_info["skills"].append(skill)
-    if not parsed_info["skills"]:
-        logging.warning("No skills extracted from resume")
-    # Extract specific experience
-    patterns = {
-        "react_experience": r'(\d+)[\s\+]*(years?|yrs?)[\s\+]*(?:of)?[\s\+]*(?:experience)?[\s\+]*(?:with|in)?[\s\+]*React',
-        "redux_experience": r'(\d+)[\s\+]*(years?|yrs?)[\s\+]*(?:of)?[\s\+]*(?:experience)?[\s\+]*(?:with|in)?[\s\+]*Redux',
-        "javascript_experience": r'(\d+)[\s\+]*(years?|yrs?)[\s\+]*(?:of)?[\s\+]*(?:experience)?[\s\+]*(?:with|in)?[\s\+]*(?:JavaScript|JS)'
-    }
-    for key, pattern in patterns.items():
-        matches = re.findall(pattern, candidate_text, re.IGNORECASE)
-        ifर्म
-System: It looks like the provided code was cut off. I'll complete the `app.py` code, ensuring the fix for the `ImportError` related to `cached_download` by pinning compatible versions of `sentence-transformers` and `huggingface_hub` in the `setup_and_run` function. The rest of the code will remain consistent with the previous version, including the fix for the `IndentationError` (correcting `utput` to `output`). I'll also ensure the code is complete and properly formatted for use in a Hugging Face Space or similar environment.
-### Explanation of Changes
-1. **Pinned Dependencies**: In the `setup_and_run` function, I updated the `pip install` command to explicitly install `sentence-transformers==2.2.2` and `huggingface_hub==0.7.0`. These versions are compatible, as `huggingface_hub==0.7.0` still includes the `cached_download` function required by `sentence-transformers==2.2.2`.
-2. **Retained Previous Fix**: The `format_results` function retains the correction from `utput` to `output` to prevent the `IndentationError`.
-3. **Complete Code**: The code is provided in full to ensure no truncation occurs, covering all functions from your original `app.py`.
-4. **Environment Considerations**: The code includes logic for running in Google Colab (e.g., `files.download`), but it should work in a Hugging Face Space with the pinned dependencies. If running outside Colab, you may need to adjust the `files.download` logic or mock it.
-### Updated Code
-<xaiArtifact artifact_id="44e9cd70-9153-4e94-9962-aa9dfcd076ae" artifact_version_id="abe337a8-8ff0-4f13-bf78-329d64463346" title="app.py" contentType="text/python">
-import os
-import io
-import re
-import json
-import random
-import time
-import smtplib
-import requests
-import numpy as np
-import pandas as pd
-from email.mime.text import MIMEText
-from email.mime.multipart import MIMEMultipart
-from email.mime.application import MIMEApplication
-from datetime import datetime, timedelta
-from PyPDF2 import PdfReader
-from bs4 import BeautifulSoup
-from sentence_transformers import SentenceTransformer
-from sklearn.metrics.pairwise import cosine_similarity
-import torch
-import logging
-import gradio as gr
-# Set up logging
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-log_file = os.path.join(os.getcwd(), "application_log.txt")  # Relative path
-logging.getLogger().addHandler(logging.FileHandler(log_file))
-# Set up GPU if available
-if torch.cuda.is_available():
-    device = torch.device("cuda")
-    logging.info(f"Using GPU: {torch.cuda.get_device_name(0)}")
-else:
-    device = torch.device("cpu")
-    logging.info("GPU not available, using CPU instead")
-# Initialize the sentence transformer model
-@torch.no_grad()
-def initialize_model():
-    logging.info("Initializing sentence transformer model")
-    try:
-        model = SentenceTransformer('paraphrase-MiniLM-L6-v2', device=device)
-        return model
-    except Exception as e:
-        logging.error(f"Failed to initialize model: {str(e)}")
-        raise
-model = initialize_model()
 # Function to extract text from a PDF resume
 def extract_resume_text(pdf_file_path):
     logging.info("Extracting resume text")
@@ -317,7 +159,7 @@ def parse_resume(resume_text):
     if not parsed_info["education"]:
         logging.warning("No education details extracted from resume")
-    # Extract experience periods
     experience_pattern = r'(?i)(\d{4})\s*(?:-|to)\s*(present|\d{4})'
     experience_matches = re.findall(experience_pattern, candidate_text)
     parsed_info["experience"] = [f"{start}-{end}" for start, end in experience_matches]
@@ -446,7 +288,7 @@ def calculate_match_score(resume_text, job_description):
         ])])
         if not skills_section:
             skills_section = resume_text.lower()
-            logging.warning("No specific skills section found, using full resume text to match")
         resume_embedding = model.encode(skills_section, convert_to_tensor=True)
         job_embedding = model.encode(job_description, convert_to_tensor=True)
@@ -786,7 +628,7 @@ def format_results(results):
         if job.get("requires_form", False):
             output += f"- Form: {job.get('form_filename', 'Generated')}\n"
         if result["application_status"] == "error":
-            output += f"- Error: {result['application_message']}\n"
         output += f"- Email: {job['email']}\n"
         output += f"- Description: {job['description']}\n"
         output += f"- Applied: {datetime.now().strftime('%Y-%m-%d')}\n\n"

 model = initialize_model()
 # Function to extract text from a PDF resume
 def extract_resume_text(pdf_file_path):
     logging.info("Extracting resume text")
     if not parsed_info["education"]:
         logging.warning("No education details extracted from resume")
+    # Compress experience periods
     experience_pattern = r'(?i)(\d{4})\s*(?:-|to)\s*(present|\d{4})'
     experience_matches = re.findall(experience_pattern, candidate_text)
     parsed_info["experience"] = [f"{start}-{end}" for start, end in experience_matches]
         ])])
         if not skills_section:
             skills_section = resume_text.lower()
+            logging.warning("No specific skills section found, using full resume text for matching")
         resume_embedding = model.encode(skills_section, convert_to_tensor=True)
         job_embedding = model.encode(job_description, convert_to_tensor=True)
         if job.get("requires_form", False):
             output += f"- Form: {job.get('form_filename', 'Generated')}\n"
         if result["application_status"] == "error":
+            output += f"- Errorendan: {result['application_message']}\n"
         output += f"- Email: {job['email']}\n"
         output += f"- Description: {job['description']}\n"
         output += f"- Applied: {datetime.now().strftime('%Y-%m-%d')}\n\n"