Spaces:

DreamStream-1
/

New-1

Build error

App Files Files Community

DreamStream-1 commited on Nov 24, 2024

Commit

6bbfa30

verified ·

1 Parent(s): 70688b5

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -50

app.py CHANGED Viewed

@@ -10,23 +10,20 @@ from fuzzywuzzy import fuzz
 from nltk.corpus import stopwords
 from nltk.tokenize import word_tokenize
 from nltk.stem import WordNetLemmatizer
-import fitz  # PyMuPDF
 from typing import List, Dict, Set
 import docx
 import tempfile
-from pathlib import Path  # Add the missing import for Path
-# ResumeAnalyzer class that processes resumes, calculates match percentage, and uses AI analysis
 class ResumeAnalyzer:
     def __init__(self):
-        """Initialize the ResumeAnalyzer with required resources."""
         self._initialize_logging()
         self._initialize_nltk()
         self._initialize_spacy()
         self._setup_api_key()
     def _initialize_logging(self):
-        """Set up logging for the class."""
         self.logger = logging.getLogger(__name__)
         logging.basicConfig(
             level=logging.INFO,
@@ -34,7 +31,6 @@ class ResumeAnalyzer:
         )
     def _initialize_nltk(self) -> None:
-        """Initialize NLTK resources safely."""
         try:
             nltk.data.path.append(os.getcwd())
             for resource in ['punkt', 'stopwords', 'wordnet']:
@@ -49,7 +45,6 @@ class ResumeAnalyzer:
             raise
     def _initialize_spacy(self) -> None:
-        """Initialize spaCy model safely."""
         try:
             self.nlp = spacy.load("en_core_web_sm")
         except OSError:
@@ -59,7 +54,6 @@ class ResumeAnalyzer:
             self.nlp = spacy.load("en_core_web_sm")
     def _setup_api_key(self) -> None:
-        """Set up Google API key from Hugging Face Spaces secrets."""
         try:
             self.google_api_key = os.environ.get("GOOGLE_API_KEY")
             if not self.google_api_key:
@@ -70,17 +64,15 @@ class ResumeAnalyzer:
             raise
     def extract_text_from_pdf(self, file_path: str) -> str:
-        """Extract text from a PDF file."""
         try:
             with fitz.open(file_path) as doc:
-                text = " ".join(page.get_text("text") for page in doc)
             return text
         except Exception as e:
             self.logger.error(f"Error extracting text from PDF: {str(e)}")
             return ""
     def extract_text_from_docx(self, file_path: str) -> str:
-        """Extract text from a DOCX file."""
         try:
             doc = docx.Document(file_path)
             return "\n".join(para.text for para in doc.paragraphs)
@@ -89,7 +81,6 @@ class ResumeAnalyzer:
             return ""
     def preprocess_text(self, text: str) -> str:
-        """Preprocess the text."""
         try:
             text = text.lower()
             text = re.sub(r'\s+', ' ', text)
@@ -104,62 +95,46 @@ class ResumeAnalyzer:
             return text
     def extract_named_entities(self, text: str) -> Set[str]:
-        """Extract named entities from text."""
         try:
-            # Limit text length to prevent memory issues
             doc = self.nlp(text[:100000])
-            return {ent.text for ent in doc.ents}
         except Exception as e:
             self.logger.error(f"Error in named entity extraction: {str(e)}")
             return set()
     def calculate_match_percentage(self, resume_text: str, job_desc_text: str) -> float:
-        """Calculate the match percentage between resume and job description."""
         try:
             resume_text = self.preprocess_text(resume_text)
             job_desc_text = self.preprocess_text(job_desc_text)
-            return fuzz.partial_ratio(resume_text, job_desc_text)
         except Exception as e:
             self.logger.error(f"Error calculating match percentage: {str(e)}")
             return 0.0
     def gemini_analysis(self, text: str) -> str:
-        """Analyze text using Gemini API."""
         try:
             prompt = f"""Analyze this resume text and provide a brief summary of key skills and experience:
                         {text[:1000]}..."""
-            response = genai.generate_text(prompt=prompt)
             return response.text
         except Exception as e:
             self.logger.error(f"Error in Gemini analysis: {str(e)}")
             return "AI analysis failed"
-    def process_file(self, file: gr.File, job_desc: str) -> dict:
-        """Process a single resume file."""
         try:
-            # Handle file input correctly using `file.name` and `.read()`
-            file_content = file.read()  # This is the correct way to read the file content in Gradio
-            # Save the uploaded file content to a temporary file
-            with tempfile.NamedTemporaryFile(delete=False, suffix=Path(file.name).suffix) as temp_file:
-                temp_file.write(file_content)  # Write content to the temporary file
-                temp_path = temp_file.name
             # Extract text based on file type
-            if file.name.lower().endswith('.pdf'):
-                text = self.extract_text_from_pdf(temp_path)
-            elif file.name.lower().endswith('.docx'):
-                text = self.extract_text_from_docx(temp_path)
             else:
-                return {"Resume": file.name, "Match Percentage": "Invalid File Type"}
-            # Clean up the temporary file after processing
-            os.unlink(temp_path)
             if not text.strip():
-                return {"Resume": file.name, "Match Percentage": "No text extracted"}
-            # Further processing like calculating match percentage and analysis
             entities = self.extract_named_entities(text)
             job_entities = self.extract_named_entities(job_desc)
@@ -172,18 +147,17 @@ class ResumeAnalyzer:
             gemini_analysis = self.gemini_analysis(text)
             return {
-                "Resume": file.name,
                 "Match Percentage": round(match_percentage, 2),
                 "Entity Match (%)": round(entity_match, 2),
                 "AI Analysis": gemini_analysis
             }
         except Exception as e:
-            self.logger.error(f"Error processing file {file.name}: {str(e)}")
-            return {"Resume": file.name, "Error": str(e)}
-    def process_uploaded_resumes(self, resume_files: List[gr.File], job_desc: str) -> pd.DataFrame:
-        """Process multiple resume files."""
         if not resume_files:
             return pd.DataFrame({"Message": ["Please upload at least one resume."]})
@@ -191,8 +165,8 @@ class ResumeAnalyzer:
             return pd.DataFrame({"Message": ["Please provide a job description."]})
         results = []
-        for file in resume_files:
-            result = self.process_file(file, job_desc)
             results.append(result)
         return pd.DataFrame(results)
@@ -203,10 +177,10 @@ analyzer = ResumeAnalyzer()
 interface = gr.Interface(
     fn=analyzer.process_uploaded_resumes,
     inputs=[
-        gr.Files(
             label="Upload Resumes (PDF or DOCX)",
             file_types=[".pdf", ".docx"],
-            type="filepath"
         ),
         gr.Textbox(
             label="Job Description",
@@ -227,6 +201,5 @@ interface = gr.Interface(
     theme=gr.themes.Soft()
 )
-# Launch the interface
 if __name__ == "__main__":
-    interface.launch()

 from nltk.corpus import stopwords
 from nltk.tokenize import word_tokenize
 from nltk.stem import WordNetLemmatizer
+import fitz
 from typing import List, Dict, Set
 import docx
 import tempfile
+from pathlib import Path
 class ResumeAnalyzer:
     def __init__(self):
         self._initialize_logging()
         self._initialize_nltk()
         self._initialize_spacy()
         self._setup_api_key()
     def _initialize_logging(self):
         self.logger = logging.getLogger(__name__)
         logging.basicConfig(
             level=logging.INFO,
         )
     def _initialize_nltk(self) -> None:
         try:
             nltk.data.path.append(os.getcwd())
             for resource in ['punkt', 'stopwords', 'wordnet']:
             raise
     def _initialize_spacy(self) -> None:
         try:
             self.nlp = spacy.load("en_core_web_sm")
         except OSError:
             self.nlp = spacy.load("en_core_web_sm")
     def _setup_api_key(self) -> None:
         try:
             self.google_api_key = os.environ.get("GOOGLE_API_KEY")
             if not self.google_api_key:
             raise
     def extract_text_from_pdf(self, file_path: str) -> str:
         try:
             with fitz.open(file_path) as doc:
+                text = " ".join(page.get_text() for page in doc)
             return text
         except Exception as e:
             self.logger.error(f"Error extracting text from PDF: {str(e)}")
             return ""
     def extract_text_from_docx(self, file_path: str) -> str:
         try:
             doc = docx.Document(file_path)
             return "\n".join(para.text for para in doc.paragraphs)
             return ""
     def preprocess_text(self, text: str) -> str:
         try:
             text = text.lower()
             text = re.sub(r'\s+', ' ', text)
             return text
     def extract_named_entities(self, text: str) -> Set[str]:
         try:
             doc = self.nlp(text[:100000])
+            return {ent.text.lower() for ent in doc.ents}
         except Exception as e:
             self.logger.error(f"Error in named entity extraction: {str(e)}")
             return set()
     def calculate_match_percentage(self, resume_text: str, job_desc_text: str) -> float:
         try:
             resume_text = self.preprocess_text(resume_text)
             job_desc_text = self.preprocess_text(job_desc_text)
+            return fuzz.token_set_ratio(resume_text, job_desc_text)
         except Exception as e:
             self.logger.error(f"Error calculating match percentage: {str(e)}")
             return 0.0
     def gemini_analysis(self, text: str) -> str:
         try:
+            model = genai.GenerativeModel('gemini-pro')
             prompt = f"""Analyze this resume text and provide a brief summary of key skills and experience:
                         {text[:1000]}..."""
+            response = model.generate_content(prompt)
             return response.text
         except Exception as e:
             self.logger.error(f"Error in Gemini analysis: {str(e)}")
             return "AI analysis failed"
+    def process_file(self, file_path: str, job_desc: str) -> dict:
         try:
             # Extract text based on file type
+            if file_path.lower().endswith('.pdf'):
+                text = self.extract_text_from_pdf(file_path)
+            elif file_path.lower().endswith('.docx'):
+                text = self.extract_text_from_docx(file_path)
             else:
+                return {"Resume": Path(file_path).name, "Match Percentage": "Invalid File Type"}
             if not text.strip():
+                return {"Resume": Path(file_path).name, "Match Percentage": "No text extracted"}
             entities = self.extract_named_entities(text)
             job_entities = self.extract_named_entities(job_desc)
             gemini_analysis = self.gemini_analysis(text)
             return {
+                "Resume": Path(file_path).name,
                 "Match Percentage": round(match_percentage, 2),
                 "Entity Match (%)": round(entity_match, 2),
                 "AI Analysis": gemini_analysis
             }
         except Exception as e:
+            self.logger.error(f"Error processing file {file_path}: {str(e)}")
+            return {"Resume": Path(file_path).name, "Error": str(e)}
+    def process_uploaded_resumes(self, resume_files: List[str], job_desc: str) -> pd.DataFrame:
         if not resume_files:
             return pd.DataFrame({"Message": ["Please upload at least one resume."]})
             return pd.DataFrame({"Message": ["Please provide a job description."]})
         results = []
+        for file_path in resume_files:
+            result = self.process_file(file_path, job_desc)
             results.append(result)
         return pd.DataFrame(results)
 interface = gr.Interface(
     fn=analyzer.process_uploaded_resumes,
     inputs=[
+        gr.File(
             label="Upload Resumes (PDF or DOCX)",
             file_types=[".pdf", ".docx"],
+            multiple=True
         ),
         gr.Textbox(
             label="Job Description",
     theme=gr.themes.Soft()
 )
 if __name__ == "__main__":
+    interface.launch()