Spaces:
Sleeping
Sleeping
update application file
Browse files
app.py
CHANGED
|
@@ -85,7 +85,7 @@ def insert_chunks(chunks):
|
|
| 85 |
def expand_query(query):
|
| 86 |
try:
|
| 87 |
prompt = f"""Expand the following short questions into a more detailed search query
|
| 88 |
-
that includes synonyms and related HR terms:
|
| 89 |
|
| 90 |
{query}
|
| 91 |
"""
|
|
@@ -99,7 +99,7 @@ that includes synonyms and related HR terms:
|
|
| 99 |
print("⚠️ Query expansion failed:", e)
|
| 100 |
return query
|
| 101 |
|
| 102 |
-
def search_weaviate(query, k=
|
| 103 |
pdf_chunks = client.collections.get("PDFChunk")
|
| 104 |
expanded_query = expand_query(query)
|
| 105 |
query_vec = embed(expanded_query)
|
|
@@ -107,10 +107,20 @@ def search_weaviate(query, k=8):
|
|
| 107 |
result = pdf_chunks.query.hybrid( #both lexical and semantic
|
| 108 |
query=expanded_query,
|
| 109 |
vector=query_vec,
|
| 110 |
-
alpha=0.
|
| 111 |
limit=k,
|
| 112 |
return_properties=["text", "page"]
|
| 113 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
return [(o.properties["text"], o.metadata.distance)for o in result.objects]
|
| 115 |
|
| 116 |
def rerank_chunks_with_llm(query, chunks):
|
|
@@ -128,7 +138,8 @@ def rerank_chunks_with_llm(query, chunks):
|
|
| 128 |
|
| 129 |
rerank_prompt = f"""
|
| 130 |
You are a precise HR assistant that ranks excerpts
|
| 131 |
-
from a staff handbook by how relevant they are to the user's question
|
|
|
|
| 132 |
|
| 133 |
Question: {query}
|
| 134 |
|
|
@@ -165,14 +176,14 @@ from a staff handbook by how relevant they are to the user's question
|
|
| 165 |
return ordered_chunks
|
| 166 |
|
| 167 |
def ask_question(query):
|
| 168 |
-
chunks = search_weaviate(query, k=
|
| 169 |
reranked_chunks = rerank_chunks_with_llm(query, chunks)
|
| 170 |
|
| 171 |
# Use top three after reranking
|
| 172 |
-
context = "\n\n---\n\n".join(reranked_chunks[:
|
| 173 |
|
| 174 |
prompt = f"""
|
| 175 |
-
You are an HR
|
| 176 |
Use only the following content to answer accurately and concisely:
|
| 177 |
{context}
|
| 178 |
|
|
|
|
| 85 |
def expand_query(query):
|
| 86 |
try:
|
| 87 |
prompt = f"""Expand the following short questions into a more detailed search query
|
| 88 |
+
that includes synonyms and related HR terms, but also restate the keywords clearly:
|
| 89 |
|
| 90 |
{query}
|
| 91 |
"""
|
|
|
|
| 99 |
print("⚠️ Query expansion failed:", e)
|
| 100 |
return query
|
| 101 |
|
| 102 |
+
def search_weaviate(query, k=12):
|
| 103 |
pdf_chunks = client.collections.get("PDFChunk")
|
| 104 |
expanded_query = expand_query(query)
|
| 105 |
query_vec = embed(expanded_query)
|
|
|
|
| 107 |
result = pdf_chunks.query.hybrid( #both lexical and semantic
|
| 108 |
query=expanded_query,
|
| 109 |
vector=query_vec,
|
| 110 |
+
alpha=0.3,
|
| 111 |
limit=k,
|
| 112 |
return_properties=["text", "page"]
|
| 113 |
)
|
| 114 |
+
|
| 115 |
+
filtered_objects = []
|
| 116 |
+
for o in result.objects:
|
| 117 |
+
distance = getattr(o.metadata, "distance", None)
|
| 118 |
+
certainty = getattr(o.metadata, "certainty", None)
|
| 119 |
+
|
| 120 |
+
# Keep results above a relevance threshold
|
| 121 |
+
if (distance is None or distance < 1.2) or (certainty and certainty >0.3):
|
| 122 |
+
filtered_objects.append(o)
|
| 123 |
+
|
| 124 |
return [(o.properties["text"], o.metadata.distance)for o in result.objects]
|
| 125 |
|
| 126 |
def rerank_chunks_with_llm(query, chunks):
|
|
|
|
| 138 |
|
| 139 |
rerank_prompt = f"""
|
| 140 |
You are a precise HR assistant that ranks excerpts
|
| 141 |
+
from a staff handbook by how relevant they are to the user's question.
|
| 142 |
+
You must rank excerpts that directly answer the user's question higher than those that merely discuss related topics.
|
| 143 |
|
| 144 |
Question: {query}
|
| 145 |
|
|
|
|
| 176 |
return ordered_chunks
|
| 177 |
|
| 178 |
def ask_question(query):
|
| 179 |
+
chunks = search_weaviate(query, k=12)
|
| 180 |
reranked_chunks = rerank_chunks_with_llm(query, chunks)
|
| 181 |
|
| 182 |
# Use top three after reranking
|
| 183 |
+
context = "\n\n---\n\n".join(reranked_chunks[:4])
|
| 184 |
|
| 185 |
prompt = f"""
|
| 186 |
+
You are an HR assistant answering questions from the staff handbook.
|
| 187 |
Use only the following content to answer accurately and concisely:
|
| 188 |
{context}
|
| 189 |
|