DreamStream-1 commited on
Commit
14dcd22
·
verified ·
1 Parent(s): 24e857d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -46
app.py CHANGED
@@ -4,6 +4,7 @@ from PyPDF2 import PdfReader
4
  from docx import Document
5
  import pandas as pd
6
  import matplotlib.pyplot as plt
 
7
 
8
  # Set up API key for Google Generative Language
9
  API_KEY = st.secrets["GOOGLE_API_KEY"]
@@ -24,16 +25,45 @@ def extract_text_from_docx(docx_file):
24
  text += para.text + "\n"
25
  return text
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  def analyze_documents(resume_text, job_description):
28
  """Analyze resume text against the job description."""
29
  custom_prompt = f"""
30
  Please analyze the following resume in the context of the job description provided.
31
- Check every single line in the job description and analyze the resume for an exact match.
32
- Focus on hard skills and soft skills, maintaining high ATS standards. Provide:
33
- 1. The match percentage of the resume to the job description.
34
- 2. A list of missing keywords.
35
- 3. Final thoughts on the resume's overall match in 3 lines.
36
- 4. Recommendations on how to add missing keywords and improve the resume in 3-4 points.
37
  Job Description: {job_description}
38
  Resume: {resume_text}
39
  """
@@ -54,9 +84,11 @@ def display_resume(file, index):
54
  unique_key = f"{file.name}_{index}" # Ensure the key is unique by appending an index
55
  if file_type == 'pdf':
56
  text = extract_text_from_pdf(file)
 
57
  st.text_area(f"Parsed Resume Content - {file.name}", text, height=400, key=unique_key)
58
  elif file_type == 'docx':
59
  text = extract_text_from_docx(file)
 
60
  st.text_area(f"Parsed Resume Content - {file.name}", text, height=400, key=unique_key)
61
  else:
62
  st.error(f"Unsupported file type for {file.name}. Please upload a PDF or DOCX file.")
@@ -67,53 +99,34 @@ def analyze_multiple_resumes(resumes, job_description):
67
  for index, resume in enumerate(resumes):
68
  resume.seek(0) # Reset file pointer
69
  file_type = resume.name.split('.')[-1].lower()
70
-
71
  # Extract resume text based on file type
72
  if file_type == 'pdf':
73
  resume_text = extract_text_from_pdf(resume)
74
  elif file_type == 'docx':
75
  resume_text = extract_text_from_docx(resume)
76
-
 
 
 
77
  # Analyze the resume text
78
  analysis = analyze_documents(resume_text, job_description)
79
-
80
- if "candidates" in analysis:
81
- for candidate in analysis["candidates"]:
82
- if "content" in candidate and "parts" in candidate["content"]:
83
- for part in candidate["content"]["parts"]:
84
- response_text = part["text"]
85
- st.write(response_text)
86
-
87
- # Extract match percentage safely
88
- lines = response_text.split("\n")
89
- match_percentage = None
90
- for line in lines:
91
- if "match percentage" in line.lower():
92
- # Try to extract the match percentage
93
- percentage_str = ''.join(filter(str.isdigit, line.split(":")[-1].strip()))
94
- if percentage_str: # If there's a valid numeric match percentage
95
- try:
96
- match_percentage = int(percentage_str)
97
- # Cap the match percentage to 100
98
- if match_percentage > 100:
99
- match_percentage = 100
100
- except ValueError:
101
- st.error(f"Error processing match percentage in resume {resume.name}")
102
- match_percentage = 0 # Default to 0 if there's an issue
103
-
104
- if match_percentage is not None:
105
- match_percentages.append(match_percentage)
106
- st.write(f"### Match Percentage for {resume.name}: {match_percentage}%")
107
- st.progress(match_percentage / 100) # Convert to decimal format
108
-
109
- # Pie chart for skills
110
- labels = ["Matched", "Missing"]
111
- sizes = [match_percentage, 100 - match_percentage]
112
- fig, ax = plt.subplots()
113
- ax.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=140)
114
- ax.axis('equal')
115
- st.pyplot(fig)
116
-
117
  # Display overall match percentage across all resumes
118
  if match_percentages:
119
  avg_match_percentage = sum(match_percentages) / len(match_percentages)
 
4
  from docx import Document
5
  import pandas as pd
6
  import matplotlib.pyplot as plt
7
+ import re
8
 
9
  # Set up API key for Google Generative Language
10
  API_KEY = st.secrets["GOOGLE_API_KEY"]
 
25
  text += para.text + "\n"
26
  return text
27
 
28
+ def normalize_text(text):
29
+ """Normalize text by removing extra spaces, newlines, and standardizing formatting."""
30
+ # Remove extra spaces and newlines
31
+ text = " ".join(text.split())
32
+ return text.lower() # Convert to lowercase to avoid case differences
33
+
34
+ def clean_resume_text(text):
35
+ """Clean resume text by removing unwanted characters or formatting."""
36
+ text = re.sub(r'\s+', ' ', text) # Replace multiple spaces/newlines with a single space
37
+ text = re.sub(r'[\r\n\t]+', ' ', text) # Remove any carriage returns or tabs
38
+ return text.strip()
39
+
40
+ def extract_match_percentage(response):
41
+ """Extract match percentage from API response with additional checks."""
42
+ try:
43
+ analysis_content = response.get("choices", [{}])[0].get("text", "")
44
+ match_percentage = None
45
+
46
+ # Check for the match percentage in the response text
47
+ for line in analysis_content.split("\n"):
48
+ if "match percentage" in line.lower():
49
+ # Try to find the percentage value
50
+ percentage_str = ''.join(filter(str.isdigit, line.split(":")[-1].strip()))
51
+ if percentage_str:
52
+ match_percentage = int(percentage_str)
53
+ match_percentage = min(100, max(0, match_percentage)) # Ensure it's between 0 and 100
54
+ return match_percentage if match_percentage is not None else 0
55
+ except Exception as e:
56
+ st.error(f"Error extracting match percentage: {str(e)}")
57
+ return 0 # Default to 0 if there's an error
58
+
59
  def analyze_documents(resume_text, job_description):
60
  """Analyze resume text against the job description."""
61
  custom_prompt = f"""
62
  Please analyze the following resume in the context of the job description provided.
63
+ Consider exact matches for hard skills, soft skills, and experience keywords.
64
+ 1. Report the exact match percentage of the resume to the job description.
65
+ 2. List the missing keywords from the resume.
66
+ 3. Provide final feedback on the resume’s overall relevance to the job description.
 
 
67
  Job Description: {job_description}
68
  Resume: {resume_text}
69
  """
 
84
  unique_key = f"{file.name}_{index}" # Ensure the key is unique by appending an index
85
  if file_type == 'pdf':
86
  text = extract_text_from_pdf(file)
87
+ text = normalize_text(text) # Normalize text
88
  st.text_area(f"Parsed Resume Content - {file.name}", text, height=400, key=unique_key)
89
  elif file_type == 'docx':
90
  text = extract_text_from_docx(file)
91
+ text = normalize_text(text) # Normalize text
92
  st.text_area(f"Parsed Resume Content - {file.name}", text, height=400, key=unique_key)
93
  else:
94
  st.error(f"Unsupported file type for {file.name}. Please upload a PDF or DOCX file.")
 
99
  for index, resume in enumerate(resumes):
100
  resume.seek(0) # Reset file pointer
101
  file_type = resume.name.split('.')[-1].lower()
102
+
103
  # Extract resume text based on file type
104
  if file_type == 'pdf':
105
  resume_text = extract_text_from_pdf(resume)
106
  elif file_type == 'docx':
107
  resume_text = extract_text_from_docx(resume)
108
+
109
+ # Normalize and clean extracted text
110
+ resume_text = clean_resume_text(normalize_text(resume_text))
111
+
112
  # Analyze the resume text
113
  analysis = analyze_documents(resume_text, job_description)
114
+
115
+ if "choices" in analysis:
116
+ match_percentage = extract_match_percentage(analysis)
117
+
118
+ match_percentages.append(match_percentage)
119
+ st.write(f"### Match Percentage for {resume.name}: {match_percentage}%")
120
+ st.progress(match_percentage / 100) # Convert to decimal format
121
+
122
+ # Pie chart for skills
123
+ labels = ["Matched", "Missing"]
124
+ sizes = [match_percentage, 100 - match_percentage]
125
+ fig, ax = plt.subplots()
126
+ ax.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=140)
127
+ ax.axis('equal')
128
+ st.pyplot(fig)
129
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  # Display overall match percentage across all resumes
131
  if match_percentages:
132
  avg_match_percentage = sum(match_percentages) / len(match_percentages)