DreamStream-1 commited on
Commit
9a1feff
·
verified ·
1 Parent(s): f8a1025

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +105 -134
app.py CHANGED
@@ -1,42 +1,68 @@
 
1
  import streamlit as st
 
 
 
2
  import requests
3
- from PyPDF2 import PdfReader
4
- from docx import Document
5
  import pandas as pd
6
 
7
- # Set up API key for Google Generative Language
8
- API_KEY = st.secrets["GOOGLE_API_KEY"]
9
-
10
- def extract_text_from_pdf(pdf_file):
11
- """Extract text from PDF file using PyPDF2."""
12
- reader = PdfReader(pdf_file)
13
- text = ""
14
- for page in reader.pages:
15
- text += page.extract_text()
16
- return text
17
-
18
- def extract_text_from_docx(docx_file):
19
- """Extract text from DOCX file."""
20
- doc = Document(docx_file)
21
- text = ""
22
- for para in doc.paragraphs:
23
- text += para.text + "\n"
24
- return text
25
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  def analyze_documents(resume_text, job_description):
27
  """Analyze resume text against the job description using Gemini 1.5 Flash."""
28
  custom_prompt = f"""
29
  Please analyze the following resume in the context of the job description provided.
30
- Provide the following information:
31
- 1. The candidate's name, contact number, and email address extracted from the resume.
32
- 2. A numeric match percentage (0-100%) based on the alignment of the resume with the job description.
33
- 3. A list of missing keywords that are present in the job description but not in the resume.
34
- 4. Any recommendations to improve the resume for better alignment with the job description.
35
 
36
  Job Description: {job_description}
37
  Resume: {resume_text}
38
  """
39
-
40
  url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent?key={API_KEY}"
41
  headers = {'Content-Type': 'application/json'}
42
  data = {
@@ -47,93 +73,45 @@ def analyze_documents(resume_text, job_description):
47
  response = requests.post(url, headers=headers, json=data)
48
  return response.json()
49
 
50
- def display_resume(file, index):
51
- """Display uploaded resume content."""
52
- file_type = file.name.split('.')[-1].lower()
53
- unique_key = f"{file.name}_{index}" # Ensure the key is unique by appending an index
54
- if file_type == 'pdf':
55
- text = extract_text_from_pdf(file)
56
- st.text_area(f"Parsed Resume Content - {file.name}", text, height=400, key=unique_key)
57
- elif file_type == 'docx':
58
- text = extract_text_from_docx(file)
59
- st.text_area(f"Parsed Resume Content - {file.name}", text, height=400, key=unique_key)
60
- else:
61
- st.error(f"Unsupported file type for {file.name}. Please upload a PDF or DOCX file.")
62
-
63
- def analyze_multiple_resumes(resumes, job_description):
64
- """Analyze multiple resumes and display the results."""
65
- results = []
66
-
67
- for index, resume in enumerate(resumes):
68
- resume.seek(0) # Reset file pointer
69
- file_type = resume.name.split('.')[-1].lower()
70
-
71
- # Extract resume text based on file type
72
- if file_type == 'pdf':
73
- resume_text = extract_text_from_pdf(resume)
74
- elif file_type == 'docx':
75
- resume_text = extract_text_from_docx(resume)
76
-
77
- # Analyze the resume text using the model
78
- analysis = analyze_documents(resume_text, job_description)
79
-
80
- # Extract details from the model's response
81
- name, email, phone, match_percentage = "N/A", "N/A", "N/A", 0
82
- missing_keywords = []
83
- if "candidates" in analysis:
84
- for candidate in analysis["candidates"]:
85
- if "content" in candidate and "parts" in candidate["content"]:
86
- for part in candidate["content"]["parts"]:
87
- response_text = part["text"]
88
- st.write(response_text) # Optional: Display the response for debugging
89
-
90
- # Extract details based on patterns in the response
91
- lines = response_text.split("\n")
92
- for line in lines:
93
- line_lower = line.lower()
94
- if "name:" in line_lower:
95
- name = line.split(":")[-1].strip()
96
- elif "email:" in line_lower:
97
- email = line.split(":")[-1].strip()
98
- elif "contact:" in line_lower:
99
- phone = line.split(":")[-1].strip()
100
- elif "match percentage" in line_lower:
101
- # Extract numeric match percentage
102
- percentage_str = ''.join(filter(str.isdigit, line.split(":")[-1].strip()))
103
- if percentage_str:
104
- try:
105
- match_percentage = int(percentage_str)
106
- if match_percentage > 100:
107
- match_percentage = 100
108
- except ValueError:
109
- match_percentage = 0
110
- elif "missing keywords" in line_lower:
111
- missing_keywords = line.split(":")[-1].strip().split(", ")
112
-
113
- # Append results for the table
114
- results.append({
115
- "Name": name,
116
- "Contact": phone,
117
- "Email": email,
118
- "Match Percentage": match_percentage,
119
- "Missing Keywords": ", ".join(missing_keywords)
120
- })
121
-
122
- # Create a DataFrame for the results
123
- df = pd.DataFrame(results)
124
-
125
- # Display the table
126
- st.write("### Candidate Match Summary")
127
- st.dataframe(df)
128
-
129
- # Downloadable CSV
130
- csv = df.to_csv(index=False)
131
- st.download_button(
132
- label="📥 Download Results as CSV",
133
- data=csv,
134
- file_name="resume_analysis_results.csv",
135
- mime="text/csv",
136
- )
137
 
138
  # Streamlit app configuration
139
  st.set_page_config(page_title="ATS Resume Evaluation System", layout="wide")
@@ -148,27 +126,20 @@ st.markdown(
148
  """, unsafe_allow_html=True
149
  )
150
  st.markdown('<div class="title">📝🔍🌟 ATS Resume Evaluation System</div>', unsafe_allow_html=True)
151
- st.markdown('<div class="subtitle">Upload up to 10 resumes and analyze them against the job description</div>', unsafe_allow_html=True)
152
 
153
- # Inputs: Job description and multiple resume file uploads
154
- st.sidebar.header("Upload Your Inputs")
155
- job_description = st.sidebar.text_area("Enter the Job Description:", height=250)
156
- resumes = st.sidebar.file_uploader("Upload Your Resumes (PDF or DOCX)", type=["pdf", "docx"], accept_multiple_files=True)
157
 
158
- # Display uploaded resume content
159
- if resumes:
160
- for index, resume in enumerate(resumes):
161
- with st.expander(f"📄 Uploaded Resume Content - {resume.name}", expanded=True):
162
- st.write("### Extracted Text from Resume")
163
- display_resume(resume, index)
164
 
165
- # Analyze button
166
- if st.sidebar.button("Analyze Resumes"):
167
- if job_description and resumes:
168
- if len(resumes) <= 10: # Limit to a maximum of 10 resumes
169
- with st.spinner("Analyzing..."):
170
- analyze_multiple_resumes(resumes, job_description)
171
- else:
172
- st.error("You can upload a maximum of 10 resumes.")
173
  else:
174
- st.error("Please provide both a job description and at least one resume file.")
 
1
+ import spacy
2
  import streamlit as st
3
+ import nltk
4
+ from nltk.tokenize import word_tokenize
5
+ from nltk.corpus import stopwords
6
  import requests
7
+ import re
 
8
  import pandas as pd
9
 
10
+ # Download necessary NLTK data
11
+ nltk.download('punkt')
12
+ nltk.download('stopwords')
13
+
14
+ # Load the SpaCy model
15
+ nlp = spacy.load("en_core_web_sm")
16
+
17
+ # Function to clean and normalize text
18
+ def clean_and_normalize_text(text):
19
+ """Clean and normalize the resume/job description text."""
20
+ # Tokenization
21
+ tokens = word_tokenize(text)
22
+
23
+ # Lowercasing and removing non-alphabetical tokens
24
+ tokens = [word.lower() for word in tokens if word.isalpha()]
25
+
26
+ # Removing stopwords using NLTK
27
+ stop_words = set(stopwords.words("english"))
28
+ filtered_tokens = [word for word in tokens if word not in stop_words]
29
+
30
+ # Lemmatization using SpaCy
31
+ doc = nlp(' '.join(filtered_tokens))
32
+ lemmatized_tokens = [token.lemma_ for token in doc]
33
+
34
+ # Reconstruct the cleaned text
35
+ cleaned_text = ' '.join(lemmatized_tokens)
36
+
37
+ # Optionally, remove extra spaces or characters
38
+ cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip()
39
+
40
+ return cleaned_text
41
+
42
+ # Function for Named Entity Recognition (NER)
43
+ def extract_named_entities(text):
44
+ """Extract named entities from text using SpaCy."""
45
+ doc = nlp(text)
46
+
47
+ # Extract named entities
48
+ entities = [(ent.text, ent.label_) for ent in doc.ents]
49
+
50
+ return entities
51
+
52
+ # Function to analyze the resume and job description using Gemini 1.5 Flash model
53
  def analyze_documents(resume_text, job_description):
54
  """Analyze resume text against the job description using Gemini 1.5 Flash."""
55
  custom_prompt = f"""
56
  Please analyze the following resume in the context of the job description provided.
57
+ For the match percentage, please consider:
58
+ - The relevance of the hard skills mentioned.
59
+ - The match of experiences and achievements listed in the resume.
60
+ - Only return a 100% match if all critical skills, experiences, and keywords align well and meaningfully with the job description.
 
61
 
62
  Job Description: {job_description}
63
  Resume: {resume_text}
64
  """
65
+
66
  url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent?key={API_KEY}"
67
  headers = {'Content-Type': 'application/json'}
68
  data = {
 
73
  response = requests.post(url, headers=headers, json=data)
74
  return response.json()
75
 
76
+ # Streamlit interface to handle text analysis
77
+ def process_text(resume_text, job_description):
78
+ """Process and analyze resume and job description text."""
79
+ # Clean and normalize the text
80
+ cleaned_resume = clean_and_normalize_text(resume_text)
81
+ cleaned_job_description = clean_and_normalize_text(job_description)
82
+
83
+ # Perform Named Entity Recognition (NER)
84
+ resume_entities = extract_named_entities(cleaned_resume)
85
+ job_desc_entities = extract_named_entities(cleaned_job_description)
86
+
87
+ # Refine the prompt with cleaned data and extracted entities
88
+ custom_prompt = f"""
89
+ Please analyze the following resume in the context of the job description provided.
90
+ Here are the named entities found in the job description:
91
+ {job_desc_entities}
92
+ Here are the named entities found in the resume:
93
+ {resume_entities}
94
+
95
+ For the match percentage, please consider:
96
+ - The relevance of the hard skills mentioned.
97
+ - The match of experiences and achievements listed in the resume.
98
+ - Only return a 100% match if all critical skills, experiences, and keywords align well and meaningfully with the job description.
99
+
100
+ Job Description: {cleaned_job_description}
101
+ Resume: {cleaned_resume}
102
+ """
103
+
104
+ # Call the Gemini 1.5 model
105
+ analysis = analyze_documents(cleaned_resume, cleaned_job_description)
106
+
107
+ # Extract the results from the model's response
108
+ results = {
109
+ "Match Percentage": "Not Available", # Placeholder, modify as needed
110
+ "Recommendations": "Not Available" # Placeholder, modify as needed
111
+ }
112
+ # Logic to extract results from the model response can be added here.
113
+
114
+ return results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
116
  # Streamlit app configuration
117
  st.set_page_config(page_title="ATS Resume Evaluation System", layout="wide")
 
126
  """, unsafe_allow_html=True
127
  )
128
  st.markdown('<div class="title">📝🔍🌟 ATS Resume Evaluation System</div>', unsafe_allow_html=True)
129
+ st.markdown('<div class="subtitle">Upload your resume and job description for analysis</div>', unsafe_allow_html=True)
130
 
131
+ # Inputs: Job description and resume file upload
132
+ job_description = st.text_area("Enter the Job Description:", height=250)
133
+ resume_file = st.file_uploader("Upload Resume (PDF or DOCX)", type=["pdf", "docx"])
 
134
 
135
+ # Process the uploaded resume and job description
136
+ if resume_file:
137
+ if job_description:
138
+ resume_text = resume_file.read().decode("utf-8") # Assuming the resume is a text file
139
+ result = process_text(resume_text, job_description)
 
140
 
141
+ # Display the analysis results
142
+ st.write(f"**Match Percentage**: {result['Match Percentage']}")
143
+ st.write(f"**Recommendations**: {result['Recommendations']}")
 
 
 
 
 
144
  else:
145
+ st.warning("Please enter the job description to begin analysis.")