Arjun Singh commited on
Commit
ae4680e
·
1 Parent(s): aa0e951

Added metadata

Browse files
Files changed (1) hide show
  1. app.py +43 -8
app.py CHANGED
@@ -90,7 +90,7 @@ def store_culture_docs(culture_files: List[tempfile._TemporaryFileWrapper]) -> s
90
  return f"Successfully stored {len(all_docs)} culture document chunks"
91
 
92
  def store_resumes(resume_files: List[tempfile._TemporaryFileWrapper]) -> str:
93
- """Store resumes in the vector store"""
94
  text_splitter = RecursiveCharacterTextSplitter(
95
  chunk_size=1000,
96
  chunk_overlap=200
@@ -103,14 +103,23 @@ def store_resumes(resume_files: List[tempfile._TemporaryFileWrapper]) -> str:
103
  else:
104
  loader = UnstructuredFileLoader(file.name)
105
  docs = loader.load()
 
 
 
 
 
106
  splits = text_splitter.split_documents(docs)
 
 
 
 
107
  all_docs.extend(splits)
108
 
109
  resume_store.add_documents(all_docs)
110
- return f"Successfully stored {len(all_docs)} resume chunks"
111
 
112
  def analyze_candidates(job_description: str) -> str:
113
- # Extract skills prompt template
114
  skills_prompt = PromptTemplate(
115
  input_variables=["job_description"],
116
  template="""
@@ -132,7 +141,7 @@ def analyze_candidates(job_description: str) -> str:
132
  # Query vector stores separately
133
  results = resume_store.similarity_search(
134
  job_description,
135
- k=5
136
  )
137
 
138
  culture_docs = culture_store.similarity_search(
@@ -140,11 +149,25 @@ def analyze_candidates(job_description: str) -> str:
140
  k=3
141
  )
142
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  # Analysis prompt template
144
  analysis_prompt = PromptTemplate(
145
- input_variables=["job_description", "skills", "culture_docs", "resumes"],
146
  template="""
147
- Analyze these candidates for the job position and culture fit.
148
 
149
  Job Description:
150
  {job_description}
@@ -155,7 +178,7 @@ def analyze_candidates(job_description: str) -> str:
155
  Company Culture Context:
156
  {culture_docs}
157
 
158
- Candidate Resumes:
159
  {resumes}
160
 
161
  For each candidate, provide:
@@ -163,6 +186,8 @@ def analyze_candidates(job_description: str) -> str:
163
  2. Culture fit assessment
164
  3. Recommendation (move forward/reject)
165
  4. Brief explanation
 
 
166
  """
167
  )
168
 
@@ -171,13 +196,23 @@ def analyze_candidates(job_description: str) -> str:
171
  prompt=analysis_prompt
172
  )
173
 
 
 
 
 
 
 
174
  analysis = analysis_chain.run({
175
  "job_description": job_description,
176
  "skills": skills,
177
  "culture_docs": "\n".join([doc.page_content for doc in culture_docs]),
178
- "resumes": "\n".join([doc.page_content for doc in results])
 
179
  })
180
 
 
 
 
181
  return analysis
182
 
183
 
 
90
  return f"Successfully stored {len(all_docs)} culture document chunks"
91
 
92
  def store_resumes(resume_files: List[tempfile._TemporaryFileWrapper]) -> str:
93
+ """Store resumes in the vector store with proper metadata"""
94
  text_splitter = RecursiveCharacterTextSplitter(
95
  chunk_size=1000,
96
  chunk_overlap=200
 
103
  else:
104
  loader = UnstructuredFileLoader(file.name)
105
  docs = loader.load()
106
+
107
+ # Extract filename without extension as resume ID
108
+ resume_id = os.path.splitext(os.path.basename(file.name))[0]
109
+
110
+ # Add metadata to each chunk
111
  splits = text_splitter.split_documents(docs)
112
+ for split in splits:
113
+ split.metadata["resume_id"] = resume_id
114
+ split.metadata["source"] = "resume"
115
+
116
  all_docs.extend(splits)
117
 
118
  resume_store.add_documents(all_docs)
119
+ return f"Successfully stored {len(resume_files)} resumes"
120
 
121
  def analyze_candidates(job_description: str) -> str:
122
+ # Extract skills first
123
  skills_prompt = PromptTemplate(
124
  input_variables=["job_description"],
125
  template="""
 
141
  # Query vector stores separately
142
  results = resume_store.similarity_search(
143
  job_description,
144
+ k=10 # Increase k to get more chunks
145
  )
146
 
147
  culture_docs = culture_store.similarity_search(
 
149
  k=3
150
  )
151
 
152
+ # Group resume chunks by resume_id
153
+ resume_groups = {}
154
+ for doc in results:
155
+ resume_id = doc.metadata.get("resume_id")
156
+ if resume_id not in resume_groups:
157
+ resume_groups[resume_id] = []
158
+ resume_groups[resume_id].append(doc.page_content)
159
+
160
+ # Combine chunks for each resume
161
+ consolidated_resumes = {
162
+ resume_id: "\n".join(chunks)
163
+ for resume_id, chunks in resume_groups.items()
164
+ }
165
+
166
  # Analysis prompt template
167
  analysis_prompt = PromptTemplate(
168
+ input_variables=["job_description", "skills", "culture_docs", "num_resumes", "resumes"],
169
  template="""
170
+ Analyze the following {num_resumes} candidates for the job position and culture fit.
171
 
172
  Job Description:
173
  {job_description}
 
178
  Company Culture Context:
179
  {culture_docs}
180
 
181
+ Candidates to analyze:
182
  {resumes}
183
 
184
  For each candidate, provide:
 
186
  2. Culture fit assessment
187
  3. Recommendation (move forward/reject)
188
  4. Brief explanation
189
+
190
+ Important: Only analyze the {num_resumes} distinct candidates provided above. Do not make up additional candidates.
191
  """
192
  )
193
 
 
196
  prompt=analysis_prompt
197
  )
198
 
199
+ # Format resumes for analysis
200
+ formatted_resumes = "\n\n".join([
201
+ f"Resume {i+1}:\n{content}"
202
+ for i, content in enumerate(consolidated_resumes.values())
203
+ ])
204
+
205
  analysis = analysis_chain.run({
206
  "job_description": job_description,
207
  "skills": skills,
208
  "culture_docs": "\n".join([doc.page_content for doc in culture_docs]),
209
+ "num_resumes": len(consolidated_resumes),
210
+ "resumes": formatted_resumes
211
  })
212
 
213
+ print(f"Number of unique resumes found: {len(consolidated_resumes)}")
214
+ print(f"Resume IDs: {list(consolidated_resumes.keys())}")
215
+
216
  return analysis
217
 
218