cicboy commited on
Commit
d2f8a91
·
1 Parent(s): cc9c8d4

update application file

Browse files
Files changed (1) hide show
  1. app.py +18 -7
app.py CHANGED
@@ -85,7 +85,7 @@ def insert_chunks(chunks):
85
  def expand_query(query):
86
  try:
87
  prompt = f"""Expand the following short questions into a more detailed search query
88
- that includes synonyms and related HR terms:
89
 
90
  {query}
91
  """
@@ -99,7 +99,7 @@ that includes synonyms and related HR terms:
99
  print("⚠️ Query expansion failed:", e)
100
  return query
101
 
102
- def search_weaviate(query, k=8):
103
  pdf_chunks = client.collections.get("PDFChunk")
104
  expanded_query = expand_query(query)
105
  query_vec = embed(expanded_query)
@@ -107,10 +107,20 @@ def search_weaviate(query, k=8):
107
  result = pdf_chunks.query.hybrid( #both lexical and semantic
108
  query=expanded_query,
109
  vector=query_vec,
110
- alpha=0.6,
111
  limit=k,
112
  return_properties=["text", "page"]
113
  )
 
 
 
 
 
 
 
 
 
 
114
  return [(o.properties["text"], o.metadata.distance)for o in result.objects]
115
 
116
  def rerank_chunks_with_llm(query, chunks):
@@ -128,7 +138,8 @@ def rerank_chunks_with_llm(query, chunks):
128
 
129
  rerank_prompt = f"""
130
  You are a precise HR assistant that ranks excerpts
131
- from a staff handbook by how relevant they are to the user's question
 
132
 
133
  Question: {query}
134
 
@@ -165,14 +176,14 @@ from a staff handbook by how relevant they are to the user's question
165
  return ordered_chunks
166
 
167
  def ask_question(query):
168
- chunks = search_weaviate(query, k=8)
169
  reranked_chunks = rerank_chunks_with_llm(query, chunks)
170
 
171
  # Use top three after reranking
172
- context = "\n\n---\n\n".join(reranked_chunks[:3])
173
 
174
  prompt = f"""
175
- You are an HR assitant answering questions from the staff handbook.
176
  Use only the following content to answer accurately and concisely:
177
  {context}
178
 
 
85
  def expand_query(query):
86
  try:
87
  prompt = f"""Expand the following short questions into a more detailed search query
88
+ that includes synonyms and related HR terms, but also restate the keywords clearly:
89
 
90
  {query}
91
  """
 
99
  print("⚠️ Query expansion failed:", e)
100
  return query
101
 
102
+ def search_weaviate(query, k=12):
103
  pdf_chunks = client.collections.get("PDFChunk")
104
  expanded_query = expand_query(query)
105
  query_vec = embed(expanded_query)
 
107
  result = pdf_chunks.query.hybrid( #both lexical and semantic
108
  query=expanded_query,
109
  vector=query_vec,
110
+ alpha=0.3,
111
  limit=k,
112
  return_properties=["text", "page"]
113
  )
114
+
115
+ filtered_objects = []
116
+ for o in result.objects:
117
+ distance = getattr(o.metadata, "distance", None)
118
+ certainty = getattr(o.metadata, "certainty", None)
119
+
120
+ # Keep results above a relevance threshold
121
+ if (distance is None or distance < 1.2) or (certainty and certainty >0.3):
122
+ filtered_objects.append(o)
123
+
124
  return [(o.properties["text"], o.metadata.distance)for o in result.objects]
125
 
126
  def rerank_chunks_with_llm(query, chunks):
 
138
 
139
  rerank_prompt = f"""
140
  You are a precise HR assistant that ranks excerpts
141
+ from a staff handbook by how relevant they are to the user's question.
142
+ You must rank excerpts that directly answer the user's question higher than those that merely discuss related topics.
143
 
144
  Question: {query}
145
 
 
176
  return ordered_chunks
177
 
178
  def ask_question(query):
179
+ chunks = search_weaviate(query, k=12)
180
  reranked_chunks = rerank_chunks_with_llm(query, chunks)
181
 
182
  # Use top three after reranking
183
+ context = "\n\n---\n\n".join(reranked_chunks[:4])
184
 
185
  prompt = f"""
186
+ You are an HR assistant answering questions from the staff handbook.
187
  Use only the following content to answer accurately and concisely:
188
  {context}
189