Spaces:
Running
Running
Arjun Singh
commited on
Commit
·
ae4680e
1
Parent(s):
aa0e951
Added metadata
Browse files
app.py
CHANGED
|
@@ -90,7 +90,7 @@ def store_culture_docs(culture_files: List[tempfile._TemporaryFileWrapper]) -> s
|
|
| 90 |
return f"Successfully stored {len(all_docs)} culture document chunks"
|
| 91 |
|
| 92 |
def store_resumes(resume_files: List[tempfile._TemporaryFileWrapper]) -> str:
|
| 93 |
-
"""Store resumes in the vector store"""
|
| 94 |
text_splitter = RecursiveCharacterTextSplitter(
|
| 95 |
chunk_size=1000,
|
| 96 |
chunk_overlap=200
|
|
@@ -103,14 +103,23 @@ def store_resumes(resume_files: List[tempfile._TemporaryFileWrapper]) -> str:
|
|
| 103 |
else:
|
| 104 |
loader = UnstructuredFileLoader(file.name)
|
| 105 |
docs = loader.load()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
splits = text_splitter.split_documents(docs)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
all_docs.extend(splits)
|
| 108 |
|
| 109 |
resume_store.add_documents(all_docs)
|
| 110 |
-
return f"Successfully stored {len(
|
| 111 |
|
| 112 |
def analyze_candidates(job_description: str) -> str:
|
| 113 |
-
# Extract skills
|
| 114 |
skills_prompt = PromptTemplate(
|
| 115 |
input_variables=["job_description"],
|
| 116 |
template="""
|
|
@@ -132,7 +141,7 @@ def analyze_candidates(job_description: str) -> str:
|
|
| 132 |
# Query vector stores separately
|
| 133 |
results = resume_store.similarity_search(
|
| 134 |
job_description,
|
| 135 |
-
k=
|
| 136 |
)
|
| 137 |
|
| 138 |
culture_docs = culture_store.similarity_search(
|
|
@@ -140,11 +149,25 @@ def analyze_candidates(job_description: str) -> str:
|
|
| 140 |
k=3
|
| 141 |
)
|
| 142 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
# Analysis prompt template
|
| 144 |
analysis_prompt = PromptTemplate(
|
| 145 |
-
input_variables=["job_description", "skills", "culture_docs", "resumes"],
|
| 146 |
template="""
|
| 147 |
-
Analyze
|
| 148 |
|
| 149 |
Job Description:
|
| 150 |
{job_description}
|
|
@@ -155,7 +178,7 @@ def analyze_candidates(job_description: str) -> str:
|
|
| 155 |
Company Culture Context:
|
| 156 |
{culture_docs}
|
| 157 |
|
| 158 |
-
|
| 159 |
{resumes}
|
| 160 |
|
| 161 |
For each candidate, provide:
|
|
@@ -163,6 +186,8 @@ def analyze_candidates(job_description: str) -> str:
|
|
| 163 |
2. Culture fit assessment
|
| 164 |
3. Recommendation (move forward/reject)
|
| 165 |
4. Brief explanation
|
|
|
|
|
|
|
| 166 |
"""
|
| 167 |
)
|
| 168 |
|
|
@@ -171,13 +196,23 @@ def analyze_candidates(job_description: str) -> str:
|
|
| 171 |
prompt=analysis_prompt
|
| 172 |
)
|
| 173 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
analysis = analysis_chain.run({
|
| 175 |
"job_description": job_description,
|
| 176 |
"skills": skills,
|
| 177 |
"culture_docs": "\n".join([doc.page_content for doc in culture_docs]),
|
| 178 |
-
"
|
|
|
|
| 179 |
})
|
| 180 |
|
|
|
|
|
|
|
|
|
|
| 181 |
return analysis
|
| 182 |
|
| 183 |
|
|
|
|
| 90 |
return f"Successfully stored {len(all_docs)} culture document chunks"
|
| 91 |
|
| 92 |
def store_resumes(resume_files: List[tempfile._TemporaryFileWrapper]) -> str:
|
| 93 |
+
"""Store resumes in the vector store with proper metadata"""
|
| 94 |
text_splitter = RecursiveCharacterTextSplitter(
|
| 95 |
chunk_size=1000,
|
| 96 |
chunk_overlap=200
|
|
|
|
| 103 |
else:
|
| 104 |
loader = UnstructuredFileLoader(file.name)
|
| 105 |
docs = loader.load()
|
| 106 |
+
|
| 107 |
+
# Extract filename without extension as resume ID
|
| 108 |
+
resume_id = os.path.splitext(os.path.basename(file.name))[0]
|
| 109 |
+
|
| 110 |
+
# Add metadata to each chunk
|
| 111 |
splits = text_splitter.split_documents(docs)
|
| 112 |
+
for split in splits:
|
| 113 |
+
split.metadata["resume_id"] = resume_id
|
| 114 |
+
split.metadata["source"] = "resume"
|
| 115 |
+
|
| 116 |
all_docs.extend(splits)
|
| 117 |
|
| 118 |
resume_store.add_documents(all_docs)
|
| 119 |
+
return f"Successfully stored {len(resume_files)} resumes"
|
| 120 |
|
| 121 |
def analyze_candidates(job_description: str) -> str:
|
| 122 |
+
# Extract skills first
|
| 123 |
skills_prompt = PromptTemplate(
|
| 124 |
input_variables=["job_description"],
|
| 125 |
template="""
|
|
|
|
| 141 |
# Query vector stores separately
|
| 142 |
results = resume_store.similarity_search(
|
| 143 |
job_description,
|
| 144 |
+
k=10 # Increase k to get more chunks
|
| 145 |
)
|
| 146 |
|
| 147 |
culture_docs = culture_store.similarity_search(
|
|
|
|
| 149 |
k=3
|
| 150 |
)
|
| 151 |
|
| 152 |
+
# Group resume chunks by resume_id
|
| 153 |
+
resume_groups = {}
|
| 154 |
+
for doc in results:
|
| 155 |
+
resume_id = doc.metadata.get("resume_id")
|
| 156 |
+
if resume_id not in resume_groups:
|
| 157 |
+
resume_groups[resume_id] = []
|
| 158 |
+
resume_groups[resume_id].append(doc.page_content)
|
| 159 |
+
|
| 160 |
+
# Combine chunks for each resume
|
| 161 |
+
consolidated_resumes = {
|
| 162 |
+
resume_id: "\n".join(chunks)
|
| 163 |
+
for resume_id, chunks in resume_groups.items()
|
| 164 |
+
}
|
| 165 |
+
|
| 166 |
# Analysis prompt template
|
| 167 |
analysis_prompt = PromptTemplate(
|
| 168 |
+
input_variables=["job_description", "skills", "culture_docs", "num_resumes", "resumes"],
|
| 169 |
template="""
|
| 170 |
+
Analyze the following {num_resumes} candidates for the job position and culture fit.
|
| 171 |
|
| 172 |
Job Description:
|
| 173 |
{job_description}
|
|
|
|
| 178 |
Company Culture Context:
|
| 179 |
{culture_docs}
|
| 180 |
|
| 181 |
+
Candidates to analyze:
|
| 182 |
{resumes}
|
| 183 |
|
| 184 |
For each candidate, provide:
|
|
|
|
| 186 |
2. Culture fit assessment
|
| 187 |
3. Recommendation (move forward/reject)
|
| 188 |
4. Brief explanation
|
| 189 |
+
|
| 190 |
+
Important: Only analyze the {num_resumes} distinct candidates provided above. Do not make up additional candidates.
|
| 191 |
"""
|
| 192 |
)
|
| 193 |
|
|
|
|
| 196 |
prompt=analysis_prompt
|
| 197 |
)
|
| 198 |
|
| 199 |
+
# Format resumes for analysis
|
| 200 |
+
formatted_resumes = "\n\n".join([
|
| 201 |
+
f"Resume {i+1}:\n{content}"
|
| 202 |
+
for i, content in enumerate(consolidated_resumes.values())
|
| 203 |
+
])
|
| 204 |
+
|
| 205 |
analysis = analysis_chain.run({
|
| 206 |
"job_description": job_description,
|
| 207 |
"skills": skills,
|
| 208 |
"culture_docs": "\n".join([doc.page_content for doc in culture_docs]),
|
| 209 |
+
"num_resumes": len(consolidated_resumes),
|
| 210 |
+
"resumes": formatted_resumes
|
| 211 |
})
|
| 212 |
|
| 213 |
+
print(f"Number of unique resumes found: {len(consolidated_resumes)}")
|
| 214 |
+
print(f"Resume IDs: {list(consolidated_resumes.keys())}")
|
| 215 |
+
|
| 216 |
return analysis
|
| 217 |
|
| 218 |
|