Spaces:

DreamStream-1
/

CDF-HR

Sleeping

App Files Files Community

DreamStream-1 commited on Nov 25, 2024

Commit

3cd34b3

verified ·

1 Parent(s): cca8029

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -69

app.py CHANGED Viewed

@@ -1,30 +1,17 @@
-import re
 import streamlit as st
 import requests
-import fitz  # PyMuPDF for text extraction
-import pdfplumber  # pdfplumber for more accurate text extraction
 from docx import Document
-import io
 # Set up API key for Google Generative Language
 API_KEY = st.secrets["GOOGLE_API_KEY"]
-def extract_text_from_pdf_pymupdf(pdf_file):
-    """Extract text from PDF using PyMuPDF (fitz)."""
     text = ""
-    # Open the PDF file from the byte stream
-    pdf_document = fitz.open(stream=pdf_file.read(), filetype="pdf")
-    for page_num in range(len(pdf_document)):
-        page = pdf_document.load_page(page_num)
-        text += page.get_text("text")  # Extract text from page
-    return text
-def extract_text_from_pdf_pdfplumber(pdf_file):
-    """Extract text from PDF using pdfplumber."""
-    text = ""
-    with pdfplumber.open(pdf_file) as pdf:
-        for page in pdf.pages:
-            text += page.extract_text()  # Extract text from page
     return text
 def extract_text_from_docx(docx_file):
@@ -57,51 +44,14 @@ def analyze_documents(resume_text, job_description):
         ]
     }
     response = requests.post(url, headers=headers, json=data)
-    # Return the full API response
     return response.json()
-def extract_full_analysis(response):
-    """Extract the full analysis (match percentage, missing keywords, etc.) from the API response."""
-    try:
-        # Get the analysis content from the API response
-        analysis_content = response.get("choices", [{}])[0].get("text", "")
-        # Regex to extract the match percentage, missing keywords, final thoughts, and recommendations
-        match_percentage = re.search(r"Match Percentage:.*?([a-zA-Z0-9\s\-\(\)<>\d]+%)", analysis_content)
-        missing_keywords = re.search(r"Missing Keywords:([\s\S]*?)(?=\n\n|Final Thoughts)", analysis_content)
-        final_thoughts = re.search(r"Final Thoughts:\n\n([\s\S]*?)(?=\n\n|Recommendations)", analysis_content)
-        recommendations = re.search(r"Recommendations:\n\n([\s\S]*?)(?=\n\n|$)", analysis_content)
-        # Extracted content
-        match_percentage = match_percentage.group(1) if match_percentage else "Match Percentage: N/A"
-        missing_keywords = missing_keywords.group(1).strip() if missing_keywords else "No missing keywords identified."
-        final_thoughts = final_thoughts.group(1).strip() if final_thoughts else "No final thoughts provided."
-        recommendations = recommendations.group(1).strip() if recommendations else "No recommendations provided."
-        return {
-            "match_percentage": match_percentage,
-            "missing_keywords": missing_keywords,
-            "final_thoughts": final_thoughts,
-            "recommendations": recommendations
-        }
-    except Exception as e:
-        st.error(f"Error extracting analysis: {str(e)}")
-        return {
-            "match_percentage": "Match Percentage: N/A",
-            "missing_keywords": "Error extracting missing keywords.",
-            "final_thoughts": "Error extracting final thoughts.",
-            "recommendations": "Error extracting recommendations."
-        }
 def display_resume(file, index):
     """Display uploaded resume content."""
     file_type = file.name.split('.')[-1].lower()
     unique_key = f"{file.name}_{index}"  # Ensure the key is unique by appending an index
     if file_type == 'pdf':
-        # Read the PDF file into memory and extract text
-        text = extract_text_from_pdf_pymupdf(file)
         st.text_area(f"Parsed Resume Content - {file.name}", text, height=400, key=unique_key)
     elif file_type == 'docx':
         text = extract_text_from_docx(file)
@@ -110,27 +60,55 @@ def display_resume(file, index):
         st.error(f"Unsupported file type for {file.name}. Please upload a PDF or DOCX file.")
 def analyze_multiple_resumes(resumes, job_description):
-    """Analyze multiple resumes."""
     for index, resume in enumerate(resumes):
         resume.seek(0)  # Reset file pointer
         file_type = resume.name.split('.')[-1].lower()
         # Extract resume text based on file type
         if file_type == 'pdf':
-            text = extract_text_from_pdf_pymupdf(resume)  # Use PyMuPDF
         elif file_type == 'docx':
-            text = extract_text_from_docx(resume)
-        # Analyze the resume once
-        analysis = analyze_documents(text, job_description)
-        full_analysis = extract_full_analysis(analysis)
-        # Display the full analysis result
-        st.write(f"### Match Percentage: {full_analysis['match_percentage']}")
-        st.write(f"### Missing Keywords: {full_analysis['missing_keywords']}")
-        st.write(f"### Final Thoughts:\n{full_analysis['final_thoughts']}")
-        st.write(f"### Recommendations:\n{full_analysis['recommendations']}")
 # Streamlit app configuration
 st.set_page_config(page_title="ATS Resume Evaluation System", layout="wide")

 import streamlit as st
 import requests
+from PyPDF2 import PdfReader
 from docx import Document
 # Set up API key for Google Generative Language
 API_KEY = st.secrets["GOOGLE_API_KEY"]
+def extract_text_from_pdf(pdf_file):
+    """Extract text from PDF file."""
+    reader = PdfReader(pdf_file)
     text = ""
+    for page in reader.pages:
+        text += page.extract_text()
     return text
 def extract_text_from_docx(docx_file):
         ]
     }
     response = requests.post(url, headers=headers, json=data)
     return response.json()
 def display_resume(file, index):
     """Display uploaded resume content."""
     file_type = file.name.split('.')[-1].lower()
     unique_key = f"{file.name}_{index}"  # Ensure the key is unique by appending an index
     if file_type == 'pdf':
+        text = extract_text_from_pdf(file)
         st.text_area(f"Parsed Resume Content - {file.name}", text, height=400, key=unique_key)
     elif file_type == 'docx':
         text = extract_text_from_docx(file)
         st.error(f"Unsupported file type for {file.name}. Please upload a PDF or DOCX file.")
 def analyze_multiple_resumes(resumes, job_description):
+    """Analyze multiple resumes and display the results."""
+    match_percentages = []
     for index, resume in enumerate(resumes):
         resume.seek(0)  # Reset file pointer
         file_type = resume.name.split('.')[-1].lower()
         # Extract resume text based on file type
         if file_type == 'pdf':
+            resume_text = extract_text_from_pdf(resume)
         elif file_type == 'docx':
+            resume_text = extract_text_from_docx(resume)
+        # Analyze the resume text
+        analysis = analyze_documents(resume_text, job_description)
+        if "candidates" in analysis:
+            for candidate in analysis["candidates"]:
+                if "content" in candidate and "parts" in candidate["content"]:
+                    for part in candidate["content"]["parts"]:
+                        response_text = part["text"]
+                        st.write(response_text)
+                        # Extract match percentage safely
+                        lines = response_text.split("\n")
+                        match_percentage = None
+                        for line in lines:
+                            if "match percentage" in line.lower():
+                                # Try to extract the match percentage
+                                percentage_str = ''.join(filter(str.isdigit, line.split(":")[-1].strip()))
+                                if percentage_str:  # If there's a valid numeric match percentage
+                                    try:
+                                        match_percentage = int(percentage_str)
+                                        # Cap the match percentage to 100
+                                        if match_percentage > 100:
+                                            match_percentage = 100
+                                    except ValueError:
+                                        st.error(f"Error processing match percentage in resume {resume.name}")
+                                        match_percentage = 0  # Default to 0 if there's an issue
+                        if match_percentage is not None:
+                            match_percentages.append(match_percentage)
+                            st.write(f"### Match Percentage for {resume.name}: {match_percentage}%")
+                            st.progress(match_percentage / 100)  # Convert to decimal format
+    # Display overall match percentage across all resumes
+    if match_percentages:
+        avg_match_percentage = sum(match_percentages) / len(match_percentages)
+        st.write(f"### Average Match Percentage for All Resumes: {avg_match_percentage:.2f}%")
+        st.progress(avg_match_percentage / 100)  # Convert to decimal format
 # Streamlit app configuration
 st.set_page_config(page_title="ATS Resume Evaluation System", layout="wide")