Spaces:

DreamStream-1
/

CDF-HR

Sleeping

App Files Files Community

DreamStream-1 commited on Nov 25, 2024

Commit

14dcd22

verified ·

1 Parent(s): 24e857d

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -46

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ from PyPDF2 import PdfReader
 from docx import Document
 import pandas as pd
 import matplotlib.pyplot as plt
 # Set up API key for Google Generative Language
 API_KEY = st.secrets["GOOGLE_API_KEY"]
@@ -24,16 +25,45 @@ def extract_text_from_docx(docx_file):
         text += para.text + "\n"
     return text
 def analyze_documents(resume_text, job_description):
     """Analyze resume text against the job description."""
     custom_prompt = f"""
     Please analyze the following resume in the context of the job description provided.
-    Check every single line in the job description and analyze the resume for an exact match.
-    Focus on hard skills and soft skills, maintaining high ATS standards. Provide:
-    1. The match percentage of the resume to the job description.
-    2. A list of missing keywords.
-    3. Final thoughts on the resume's overall match in 3 lines.
-    4. Recommendations on how to add missing keywords and improve the resume in 3-4 points.
     Job Description: {job_description}
     Resume: {resume_text}
     """
@@ -54,9 +84,11 @@ def display_resume(file, index):
     unique_key = f"{file.name}_{index}"  # Ensure the key is unique by appending an index
     if file_type == 'pdf':
         text = extract_text_from_pdf(file)
         st.text_area(f"Parsed Resume Content - {file.name}", text, height=400, key=unique_key)
     elif file_type == 'docx':
         text = extract_text_from_docx(file)
         st.text_area(f"Parsed Resume Content - {file.name}", text, height=400, key=unique_key)
     else:
         st.error(f"Unsupported file type for {file.name}. Please upload a PDF or DOCX file.")
@@ -67,53 +99,34 @@ def analyze_multiple_resumes(resumes, job_description):
     for index, resume in enumerate(resumes):
         resume.seek(0)  # Reset file pointer
         file_type = resume.name.split('.')[-1].lower()
         # Extract resume text based on file type
         if file_type == 'pdf':
             resume_text = extract_text_from_pdf(resume)
         elif file_type == 'docx':
             resume_text = extract_text_from_docx(resume)
         # Analyze the resume text
         analysis = analyze_documents(resume_text, job_description)
-        if "candidates" in analysis:
-            for candidate in analysis["candidates"]:
-                if "content" in candidate and "parts" in candidate["content"]:
-                    for part in candidate["content"]["parts"]:
-                        response_text = part["text"]
-                        st.write(response_text)
-                        # Extract match percentage safely
-                        lines = response_text.split("\n")
-                        match_percentage = None
-                        for line in lines:
-                            if "match percentage" in line.lower():
-                                # Try to extract the match percentage
-                                percentage_str = ''.join(filter(str.isdigit, line.split(":")[-1].strip()))
-                                if percentage_str:  # If there's a valid numeric match percentage
-                                    try:
-                                        match_percentage = int(percentage_str)
-                                        # Cap the match percentage to 100
-                                        if match_percentage > 100:
-                                            match_percentage = 100
-                                    except ValueError:
-                                        st.error(f"Error processing match percentage in resume {resume.name}")
-                                        match_percentage = 0  # Default to 0 if there's an issue
-                        if match_percentage is not None:
-                            match_percentages.append(match_percentage)
-                            st.write(f"### Match Percentage for {resume.name}: {match_percentage}%")
-                            st.progress(match_percentage / 100)  # Convert to decimal format
-                            # Pie chart for skills
-                            labels = ["Matched", "Missing"]
-                            sizes = [match_percentage, 100 - match_percentage]
-                            fig, ax = plt.subplots()
-                            ax.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=140)
-                            ax.axis('equal')
-                            st.pyplot(fig)
     # Display overall match percentage across all resumes
     if match_percentages:
         avg_match_percentage = sum(match_percentages) / len(match_percentages)

 from docx import Document
 import pandas as pd
 import matplotlib.pyplot as plt
+import re
 # Set up API key for Google Generative Language
 API_KEY = st.secrets["GOOGLE_API_KEY"]
         text += para.text + "\n"
     return text
+def normalize_text(text):
+    """Normalize text by removing extra spaces, newlines, and standardizing formatting."""
+    # Remove extra spaces and newlines
+    text = " ".join(text.split())
+    return text.lower()  # Convert to lowercase to avoid case differences
+def clean_resume_text(text):
+    """Clean resume text by removing unwanted characters or formatting."""
+    text = re.sub(r'\s+', ' ', text)  # Replace multiple spaces/newlines with a single space
+    text = re.sub(r'[\r\n\t]+', ' ', text)  # Remove any carriage returns or tabs
+    return text.strip()
+def extract_match_percentage(response):
+    """Extract match percentage from API response with additional checks."""
+    try:
+        analysis_content = response.get("choices", [{}])[0].get("text", "")
+        match_percentage = None
+        # Check for the match percentage in the response text
+        for line in analysis_content.split("\n"):
+            if "match percentage" in line.lower():
+                # Try to find the percentage value
+                percentage_str = ''.join(filter(str.isdigit, line.split(":")[-1].strip()))
+                if percentage_str:
+                    match_percentage = int(percentage_str)
+                    match_percentage = min(100, max(0, match_percentage))  # Ensure it's between 0 and 100
+        return match_percentage if match_percentage is not None else 0
+    except Exception as e:
+        st.error(f"Error extracting match percentage: {str(e)}")
+        return 0  # Default to 0 if there's an error
 def analyze_documents(resume_text, job_description):
     """Analyze resume text against the job description."""
     custom_prompt = f"""
     Please analyze the following resume in the context of the job description provided.
+    Consider exact matches for hard skills, soft skills, and experience keywords.
+    1. Report the exact match percentage of the resume to the job description.
+    2. List the missing keywords from the resume.
+    3. Provide final feedback on the resume’s overall relevance to the job description.
     Job Description: {job_description}
     Resume: {resume_text}
     """
     unique_key = f"{file.name}_{index}"  # Ensure the key is unique by appending an index
     if file_type == 'pdf':
         text = extract_text_from_pdf(file)
+        text = normalize_text(text)  # Normalize text
         st.text_area(f"Parsed Resume Content - {file.name}", text, height=400, key=unique_key)
     elif file_type == 'docx':
         text = extract_text_from_docx(file)
+        text = normalize_text(text)  # Normalize text
         st.text_area(f"Parsed Resume Content - {file.name}", text, height=400, key=unique_key)
     else:
         st.error(f"Unsupported file type for {file.name}. Please upload a PDF or DOCX file.")
     for index, resume in enumerate(resumes):
         resume.seek(0)  # Reset file pointer
         file_type = resume.name.split('.')[-1].lower()
         # Extract resume text based on file type
         if file_type == 'pdf':
             resume_text = extract_text_from_pdf(resume)
         elif file_type == 'docx':
             resume_text = extract_text_from_docx(resume)
+        # Normalize and clean extracted text
+        resume_text = clean_resume_text(normalize_text(resume_text))
         # Analyze the resume text
         analysis = analyze_documents(resume_text, job_description)
+        if "choices" in analysis:
+            match_percentage = extract_match_percentage(analysis)
+            match_percentages.append(match_percentage)
+            st.write(f"### Match Percentage for {resume.name}: {match_percentage}%")
+            st.progress(match_percentage / 100)  # Convert to decimal format
+            # Pie chart for skills
+            labels = ["Matched", "Missing"]
+            sizes = [match_percentage, 100 - match_percentage]
+            fig, ax = plt.subplots()
+            ax.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=140)
+            ax.axis('equal')
+            st.pyplot(fig)
     # Display overall match percentage across all resumes
     if match_percentages:
         avg_match_percentage = sum(match_percentages) / len(match_percentages)