Spaces:

Shubham170793
/

enterprise-knowledge-assistant

Sleeping

App Files Files Community

Shubham170793 commited on Oct 6, 2025

Commit

c28ff15

verified ·

1 Parent(s): 16a0f13

Update src/qa.py

Browse files

Files changed (1) hide show

src/qa.py +68 -118

src/qa.py CHANGED Viewed

@@ -47,147 +47,97 @@ except Exception as e:
     print("✅ Loaded fallback model: all-MiniLM-L6-v2")
 # ==========================================================
-# 3️⃣ LLM for Answer Generation (FLAN-T5)
 # ==========================================================
-MODEL_NAME = "google/flan-t5-base"   # Switch to 'large' if you have more memory
-print(f"✅ Loading LLM: {MODEL_NAME}")
-_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)
-_model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)
-_answer_model = pipeline(
-    "text2text-generation",
-    model=_model,
-    tokenizer=_tokenizer,
-    device=-1  # CPU-safe
-)
-# ==========================================================
-# 4️⃣ Prompt Template
-# ==========================================================
-PROMPT_TEMPLATE = """
-You are an expert enterprise knowledge assistant.
-Use ONLY the CONTEXT below to answer the QUESTION clearly, completely, and factually.
-If the context doesn’t contain the answer, reply exactly:
-"I don't know based on the provided document."
----
-Context:
-{context}
----
-Question:
-{query}
----
-Answer:
-"""
-# ==========================================================
-# 5️⃣ Chunk Retrieval Function (Improved for Large Docs)
-# ==========================================================
-def retrieve_chunks(query: str, index, chunks: list, top_k: int = 5):
-    """
-    Retrieve top-K relevant chunks and merge nearby ones for context continuity.
-    Re-ranks using cosine similarity to improve semantic precision.
-    """
-    if not index or not chunks:
-        return []
-    try:
-        # Step 1: Encode query
-        query_emb = _query_model.encode(
-            [f"query: {query.strip()}"],
-            convert_to_numpy=True,
-            normalize_embeddings=True
-        )[0]
-        # Step 2: Initial FAISS retrieval
-        distances, indices = index.search(np.array([query_emb]).astype("float32"), top_k * 2)
-        # Step 3: Merge neighbors for more complete context
-        merged_chunks = []
-        for idx in indices[0]:
-            neighbors = [chunks[i] for i in range(max(0, idx - 1), min(len(chunks), idx + 2))]
-            merged_chunks.append(" ".join(neighbors))
-        # Step 4: Re-rank results with cosine similarity
-        chunk_vecs = np.array([
-            _query_model.encode([c], convert_to_numpy=True, normalize_embeddings=True)[0]
-            for c in merged_chunks
-        ])
-        scores = cosine_similarity(np.array([query_emb]), chunk_vecs)[0]
-        sorted_indices = np.argsort(scores)[::-1]
-        # Step 5: Return top ranked chunks
-        return [merged_chunks[i] for i in sorted_indices[:top_k]]
-    except Exception as e:
-        print(f"⚠️ Retrieval error: {e}")
-        return []
 # ==========================================================
-# 6️⃣ Answer Generation Function (Long + Structured)
 # ==========================================================
 def generate_answer(query: str, retrieved_chunks: list):
     """
-    Generates a well-structured answer using FLAN-T5.
-    - Supports multi-step reasoning (if context mentions steps or procedures)
-    - Ensures completeness for large-document answers
     """
     if not retrieved_chunks:
         return "Sorry, I couldn’t find relevant information in the document."
-    # Merge retrieved chunks
     context = "\n\n".join([
-        f"[Chunk {i+1}]: {chunk.strip()}"
-        for i, chunk in enumerate(retrieved_chunks)
     ])
-    prompt = PROMPT_TEMPLATE.format(context=context, query=query)
     try:
         result = _answer_model(
-            prompt,
-            max_new_tokens=600,       # allow multi-step responses
-            do_sample=False,          # deterministic for factual QA
             temperature=0.3,
             repetition_penalty=1.1
         )
         answer = result[0]["generated_text"].strip()
-        # Safety filter: ensure the model doesn’t hallucinate
         if "I don't know" in answer:
             return "I don't know based on the provided document."
         return answer
     except Exception as e:
-        print(f"⚠️ Generation failed: {e}")
         return "⚠️ Error: Could not generate an answer at the moment."
-# ==========================================================
-# 7️⃣ Optional Local Test
-# ==========================================================
-if __name__ == "__main__":
-    dummy_chunks = [
-        "Step 1: Open the dashboard and navigate to reports.",
-        "Step 2: Click 'Export' to download a CSV summary.",
-        "Step 3: Review the generated report in your downloads folder."
-    ]
-    from vectorstore import build_faiss_index
-    index = build_faiss_index([
-        _query_model.encode(
-            [f"passage: {chunk}"],
-            convert_to_numpy=True,
-            normalize_embeddings=True
-        )[0]
-        for chunk in dummy_chunks
-    ])
-    query = "What are the steps to export a report?"
-    retrieved = retrieve_chunks(query, index, dummy_chunks)
-    print("🔍 Retrieved:", retrieved)
-    print("💬 Answer:", generate_answer(query, retrieved))

     print("✅ Loaded fallback model: all-MiniLM-L6-v2")
 # ==========================================================
+# 3️⃣ LLM for Answer Generation (OpenAI GPT with Flan fallback)
 # ==========================================================
+from openai import OpenAI
+client = None
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+if OPENAI_API_KEY:
+    client = OpenAI(api_key=OPENAI_API_KEY)
+    LLM_MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini")
+    print(f"✅ Using OpenAI model: {LLM_MODEL}")
+else:
+    # Fallback to Flan if no API key is provided
+    MODEL_NAME = "google/flan-t5-base"
+    print(f"⚠️ No OpenAI key found. Using fallback model: {MODEL_NAME}")
+    _tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)
+    _model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)
+    _answer_model = pipeline(
+        "text2text-generation",
+        model=_model,
+        tokenizer=_tokenizer,
+        device=-1
+    )
 # ==========================================================
+# 6️⃣ Answer Generation Function (GPT or Flan fallback)
 # ==========================================================
 def generate_answer(query: str, retrieved_chunks: list):
     """
+    Generates grounded, context-only answers.
+    Uses GPT (preferred) or Flan-T5 (fallback) for response synthesis.
     """
     if not retrieved_chunks:
         return "Sorry, I couldn’t find relevant information in the document."
+    # Combine retrieved chunks
     context = "\n\n".join([
+        f"[Chunk {i+1}]: {chunk.strip()}" for i, chunk in enumerate(retrieved_chunks)
     ])
+    # --- PROMPT TEMPLATE ---
+    system_prompt = """You are an enterprise knowledge assistant.
+Use ONLY the provided context to answer the user's question accurately.
+If the answer is not explicitly in the context, reply exactly:
+"I don't know based on the provided document."
+Be factual, concise, and structured when relevant.
+"""
+    user_prompt = f"""
+Context:
+{context}
+Question:
+{query}
+Answer:
+"""
+    # --- Use OpenAI GPT if key available ---
+    if client:
+        try:
+            response = client.chat.completions.create(
+                model=LLM_MODEL,
+                messages=[
+                    {"role": "system", "content": system_prompt},
+                    {"role": "user", "content": user_prompt},
+                ],
+                temperature=0.2,        # factual, low creativity
+                max_tokens=500,
+                presence_penalty=0,
+                frequency_penalty=0
+            )
+            answer = response.choices[0].message.content.strip()
+            return answer
+        except Exception as e:
+            print(f"⚠️ OpenAI generation failed: {e}")
+            return "⚠️ Error: Could not generate an answer at the moment."
+    # --- Otherwise, use Flan-T5 fallback ---
     try:
         result = _answer_model(
+            PROMPT_TEMPLATE.format(context=context, query=query),
+            max_new_tokens=600,
+            do_sample=False,
             temperature=0.3,
             repetition_penalty=1.1
         )
         answer = result[0]["generated_text"].strip()
         if "I don't know" in answer:
             return "I don't know based on the provided document."
         return answer
     except Exception as e:
+        print(f"⚠️ Flan generation failed: {e}")
         return "⚠️ Error: Could not generate an answer at the moment."