Spaces:

cicboy
/

PDF_chatbot

Runtime error

App Files Files Community

cicboy commited on Oct 16, 2025

Commit

d2f8a91

1 Parent(s): cc9c8d4

update application file

Browse files

Files changed (1) hide show

app.py +18 -7

app.py CHANGED Viewed

@@ -85,7 +85,7 @@ def insert_chunks(chunks):
 def expand_query(query):
     try:
         prompt = f"""Expand the following short questions into a more detailed search query
-that includes synonyms and related HR terms:
 {query}
 """
@@ -99,7 +99,7 @@ that includes synonyms and related HR terms:
         print("⚠️ Query expansion failed:", e)
         return query
-def search_weaviate(query, k=8):
     pdf_chunks = client.collections.get("PDFChunk")
     expanded_query = expand_query(query)
     query_vec = embed(expanded_query)
@@ -107,10 +107,20 @@ def search_weaviate(query, k=8):
     result = pdf_chunks.query.hybrid(  #both lexical and semantic
             query=expanded_query,
             vector=query_vec,
-            alpha=0.6,
             limit=k,
             return_properties=["text", "page"]
     )
     return [(o.properties["text"], o.metadata.distance)for o in result.objects]
 def rerank_chunks_with_llm(query, chunks):
@@ -128,7 +138,8 @@ def rerank_chunks_with_llm(query, chunks):
     rerank_prompt = f"""
 You are a precise HR assistant that ranks excerpts
-from a staff handbook by how relevant they are to the user's question
     Question: {query}
@@ -165,14 +176,14 @@ from a staff handbook by how relevant they are to the user's question
     return ordered_chunks
 def ask_question(query):
-    chunks = search_weaviate(query, k=8)
     reranked_chunks = rerank_chunks_with_llm(query, chunks)
     # Use top three after reranking
-    context = "\n\n---\n\n".join(reranked_chunks[:3])
     prompt = f"""
-You are an HR assitant answering questions from the staff handbook.
 Use only the following content to answer accurately and concisely:
     {context}

 def expand_query(query):
     try:
         prompt = f"""Expand the following short questions into a more detailed search query
+that includes synonyms and related HR terms, but also restate the keywords clearly:
 {query}
 """
         print("⚠️ Query expansion failed:", e)
         return query
+def search_weaviate(query, k=12):
     pdf_chunks = client.collections.get("PDFChunk")
     expanded_query = expand_query(query)
     query_vec = embed(expanded_query)
     result = pdf_chunks.query.hybrid(  #both lexical and semantic
             query=expanded_query,
             vector=query_vec,
+            alpha=0.3,
             limit=k,
             return_properties=["text", "page"]
     )
+    filtered_objects = []
+    for o in result.objects:
+        distance = getattr(o.metadata, "distance", None)
+        certainty = getattr(o.metadata, "certainty", None)
+        # Keep results above a relevance threshold
+        if (distance is None or distance < 1.2) or (certainty and certainty >0.3):
+            filtered_objects.append(o)
     return [(o.properties["text"], o.metadata.distance)for o in result.objects]
 def rerank_chunks_with_llm(query, chunks):
     rerank_prompt = f"""
 You are a precise HR assistant that ranks excerpts
+from a staff handbook by how relevant they are to the user's question.
+You must rank excerpts that directly answer the user's question higher than those that merely discuss related topics.
     Question: {query}
     return ordered_chunks
 def ask_question(query):
+    chunks = search_weaviate(query, k=12)
     reranked_chunks = rerank_chunks_with_llm(query, chunks)
     # Use top three after reranking
+    context = "\n\n---\n\n".join(reranked_chunks[:4])
     prompt = f"""
+You are an HR assistant answering questions from the staff handbook.
 Use only the following content to answer accurately and concisely:
     {context}