Spaces:

sofzcc
/

Full_RAG_Assistant

Sleeping

App Files Files Community

sofzcc commited on Dec 2, 2025

Commit

2d28f5c

verified ·

1 Parent(s): df86717

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -19

app.py CHANGED Viewed

@@ -427,28 +427,47 @@ class RAGIndex:
         combined_text = "\n\n".join(combined_context)
-        # Clean markdown / numbering / duplicates
-        combined_text = clean_context_text(combined_text)
-        # Limit context length to keep it manageable
-        max_context_chars = 4000
-        if len(combined_text) > max_context_chars:
-            combined_text = combined_text[:max_context_chars]
-        # Prompt for the generative model
-        prompt = (
-            "You are an AI assistant that answers questions using only the provided context.\n"
-            "Your task is to synthesize a clear, natural explanation in your own words.\n"
-            "- Do NOT copy headings or section numbers from the context.\n"
-            "- Do NOT include markdown like '#', '##', '---', or bullet/list markers.\n"
-            "- Do NOT mention file names, sources, or internal labels in your answer.\n"
-            "- Do NOT just repeat full sentences from the context; always paraphrase.\n"
-            "- If the answer cannot be found in the context, reply exactly with: "
             "\"I don't know based on the provided documents.\"\n\n"
-            f"Context:\n{combined_text}\n\n"
             f"Question: {question}\n\n"
-            "Answer in 1–3 concise sentences of plain text:"
         )
         try:

         combined_text = "\n\n".join(combined_context)
+        # STEP 1 — Summarize each chunk individually
+        summaries = []
+        for ctx in combined_context:
+            prompt_summary = (
+                "Summarize the following text in one concise sentence, keeping only the core idea:\n\n"
+                f"{ctx}\n\nSummary:"
+            )
+            inputs = self.qa_tokenizer(prompt_summary, return_tensors="pt", truncation=True).to(self.qa_model.device)
+            output = self.qa_model.generate(
+                **inputs,
+                max_new_tokens=64,
+                do_sample=False
+            )
+            summary_text = self.qa_tokenizer.decode(output[0], skip_special_tokens=True).strip()
+            summaries.append(summary_text)
+        # STEP 2 — Combine all summaries into a clean evidence pool
+        evidence = " ".join(summaries)
+        # STEP 3 — Ask model to answer based on summaries only
+        prompt_answer = (
+            "You are an AI assistant that answers questions using only the summarized evidence below.\n"
+            "Write a clear and complete answer in 1–3 sentences.\n"
+            "Do NOT repeat numbers, headings, markdown, or irrelevant text.\n"
+            "Do NOT say where the information came from.\n"
+            "If the answer cannot be found in the evidence, reply:\n"
             "\"I don't know based on the provided documents.\"\n\n"
+            f"Evidence:\n{evidence}\n\n"
             f"Question: {question}\n\n"
+            "Answer:"
+        )
+        inputs = self.qa_tokenizer(prompt_answer, return_tensors="pt", truncation=True).to(self.qa_model.device)
+        output = self.qa_model.generate(
+            **inputs,
+            max_new_tokens=128,
+            do_sample=False
         )
+        answer_text = self.qa_tokenizer.decode(output[0], skip_special_tokens=True).strip()
         try: