DreamStream-1 commited on
Commit
3cd34b3
·
verified ·
1 Parent(s): cca8029

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -69
app.py CHANGED
@@ -1,30 +1,17 @@
1
- import re
2
  import streamlit as st
3
  import requests
4
- import fitz # PyMuPDF for text extraction
5
- import pdfplumber # pdfplumber for more accurate text extraction
6
  from docx import Document
7
- import io
8
 
9
  # Set up API key for Google Generative Language
10
  API_KEY = st.secrets["GOOGLE_API_KEY"]
11
 
12
- def extract_text_from_pdf_pymupdf(pdf_file):
13
- """Extract text from PDF using PyMuPDF (fitz)."""
 
14
  text = ""
15
- # Open the PDF file from the byte stream
16
- pdf_document = fitz.open(stream=pdf_file.read(), filetype="pdf")
17
- for page_num in range(len(pdf_document)):
18
- page = pdf_document.load_page(page_num)
19
- text += page.get_text("text") # Extract text from page
20
- return text
21
-
22
- def extract_text_from_pdf_pdfplumber(pdf_file):
23
- """Extract text from PDF using pdfplumber."""
24
- text = ""
25
- with pdfplumber.open(pdf_file) as pdf:
26
- for page in pdf.pages:
27
- text += page.extract_text() # Extract text from page
28
  return text
29
 
30
  def extract_text_from_docx(docx_file):
@@ -57,51 +44,14 @@ def analyze_documents(resume_text, job_description):
57
  ]
58
  }
59
  response = requests.post(url, headers=headers, json=data)
60
-
61
- # Return the full API response
62
  return response.json()
63
 
64
- def extract_full_analysis(response):
65
- """Extract the full analysis (match percentage, missing keywords, etc.) from the API response."""
66
- try:
67
- # Get the analysis content from the API response
68
- analysis_content = response.get("choices", [{}])[0].get("text", "")
69
-
70
- # Regex to extract the match percentage, missing keywords, final thoughts, and recommendations
71
- match_percentage = re.search(r"Match Percentage:.*?([a-zA-Z0-9\s\-\(\)<>\d]+%)", analysis_content)
72
- missing_keywords = re.search(r"Missing Keywords:([\s\S]*?)(?=\n\n|Final Thoughts)", analysis_content)
73
- final_thoughts = re.search(r"Final Thoughts:\n\n([\s\S]*?)(?=\n\n|Recommendations)", analysis_content)
74
- recommendations = re.search(r"Recommendations:\n\n([\s\S]*?)(?=\n\n|$)", analysis_content)
75
-
76
- # Extracted content
77
- match_percentage = match_percentage.group(1) if match_percentage else "Match Percentage: N/A"
78
- missing_keywords = missing_keywords.group(1).strip() if missing_keywords else "No missing keywords identified."
79
- final_thoughts = final_thoughts.group(1).strip() if final_thoughts else "No final thoughts provided."
80
- recommendations = recommendations.group(1).strip() if recommendations else "No recommendations provided."
81
-
82
- return {
83
- "match_percentage": match_percentage,
84
- "missing_keywords": missing_keywords,
85
- "final_thoughts": final_thoughts,
86
- "recommendations": recommendations
87
- }
88
-
89
- except Exception as e:
90
- st.error(f"Error extracting analysis: {str(e)}")
91
- return {
92
- "match_percentage": "Match Percentage: N/A",
93
- "missing_keywords": "Error extracting missing keywords.",
94
- "final_thoughts": "Error extracting final thoughts.",
95
- "recommendations": "Error extracting recommendations."
96
- }
97
-
98
  def display_resume(file, index):
99
  """Display uploaded resume content."""
100
  file_type = file.name.split('.')[-1].lower()
101
  unique_key = f"{file.name}_{index}" # Ensure the key is unique by appending an index
102
  if file_type == 'pdf':
103
- # Read the PDF file into memory and extract text
104
- text = extract_text_from_pdf_pymupdf(file)
105
  st.text_area(f"Parsed Resume Content - {file.name}", text, height=400, key=unique_key)
106
  elif file_type == 'docx':
107
  text = extract_text_from_docx(file)
@@ -110,27 +60,55 @@ def display_resume(file, index):
110
  st.error(f"Unsupported file type for {file.name}. Please upload a PDF or DOCX file.")
111
 
112
  def analyze_multiple_resumes(resumes, job_description):
113
- """Analyze multiple resumes."""
114
-
115
  for index, resume in enumerate(resumes):
116
  resume.seek(0) # Reset file pointer
117
  file_type = resume.name.split('.')[-1].lower()
118
 
119
  # Extract resume text based on file type
120
  if file_type == 'pdf':
121
- text = extract_text_from_pdf_pymupdf(resume) # Use PyMuPDF
122
  elif file_type == 'docx':
123
- text = extract_text_from_docx(resume)
124
 
125
- # Analyze the resume once
126
- analysis = analyze_documents(text, job_description)
127
- full_analysis = extract_full_analysis(analysis)
128
 
129
- # Display the full analysis result
130
- st.write(f"### Match Percentage: {full_analysis['match_percentage']}")
131
- st.write(f"### Missing Keywords: {full_analysis['missing_keywords']}")
132
- st.write(f"### Final Thoughts:\n{full_analysis['final_thoughts']}")
133
- st.write(f"### Recommendations:\n{full_analysis['recommendations']}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
 
135
  # Streamlit app configuration
136
  st.set_page_config(page_title="ATS Resume Evaluation System", layout="wide")
 
 
1
  import streamlit as st
2
  import requests
3
+ from PyPDF2 import PdfReader
 
4
  from docx import Document
 
5
 
6
  # Set up API key for Google Generative Language
7
  API_KEY = st.secrets["GOOGLE_API_KEY"]
8
 
9
+ def extract_text_from_pdf(pdf_file):
10
+ """Extract text from PDF file."""
11
+ reader = PdfReader(pdf_file)
12
  text = ""
13
+ for page in reader.pages:
14
+ text += page.extract_text()
 
 
 
 
 
 
 
 
 
 
 
15
  return text
16
 
17
  def extract_text_from_docx(docx_file):
 
44
  ]
45
  }
46
  response = requests.post(url, headers=headers, json=data)
 
 
47
  return response.json()
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  def display_resume(file, index):
50
  """Display uploaded resume content."""
51
  file_type = file.name.split('.')[-1].lower()
52
  unique_key = f"{file.name}_{index}" # Ensure the key is unique by appending an index
53
  if file_type == 'pdf':
54
+ text = extract_text_from_pdf(file)
 
55
  st.text_area(f"Parsed Resume Content - {file.name}", text, height=400, key=unique_key)
56
  elif file_type == 'docx':
57
  text = extract_text_from_docx(file)
 
60
  st.error(f"Unsupported file type for {file.name}. Please upload a PDF or DOCX file.")
61
 
62
  def analyze_multiple_resumes(resumes, job_description):
63
+ """Analyze multiple resumes and display the results."""
64
+ match_percentages = []
65
  for index, resume in enumerate(resumes):
66
  resume.seek(0) # Reset file pointer
67
  file_type = resume.name.split('.')[-1].lower()
68
 
69
  # Extract resume text based on file type
70
  if file_type == 'pdf':
71
+ resume_text = extract_text_from_pdf(resume)
72
  elif file_type == 'docx':
73
+ resume_text = extract_text_from_docx(resume)
74
 
75
+ # Analyze the resume text
76
+ analysis = analyze_documents(resume_text, job_description)
 
77
 
78
+ if "candidates" in analysis:
79
+ for candidate in analysis["candidates"]:
80
+ if "content" in candidate and "parts" in candidate["content"]:
81
+ for part in candidate["content"]["parts"]:
82
+ response_text = part["text"]
83
+ st.write(response_text)
84
+
85
+ # Extract match percentage safely
86
+ lines = response_text.split("\n")
87
+ match_percentage = None
88
+ for line in lines:
89
+ if "match percentage" in line.lower():
90
+ # Try to extract the match percentage
91
+ percentage_str = ''.join(filter(str.isdigit, line.split(":")[-1].strip()))
92
+ if percentage_str: # If there's a valid numeric match percentage
93
+ try:
94
+ match_percentage = int(percentage_str)
95
+ # Cap the match percentage to 100
96
+ if match_percentage > 100:
97
+ match_percentage = 100
98
+ except ValueError:
99
+ st.error(f"Error processing match percentage in resume {resume.name}")
100
+ match_percentage = 0 # Default to 0 if there's an issue
101
+
102
+ if match_percentage is not None:
103
+ match_percentages.append(match_percentage)
104
+ st.write(f"### Match Percentage for {resume.name}: {match_percentage}%")
105
+ st.progress(match_percentage / 100) # Convert to decimal format
106
+
107
+ # Display overall match percentage across all resumes
108
+ if match_percentages:
109
+ avg_match_percentage = sum(match_percentages) / len(match_percentages)
110
+ st.write(f"### Average Match Percentage for All Resumes: {avg_match_percentage:.2f}%")
111
+ st.progress(avg_match_percentage / 100) # Convert to decimal format
112
 
113
  # Streamlit app configuration
114
  st.set_page_config(page_title="ATS Resume Evaluation System", layout="wide")