Spaces:

rbbist
/

RAG_System_with_Nepal_Kanun_Patrika_Dataset

Sleeping

App Files Files Community

rbbist commited on Aug 12, 2025

Commit

c369c5f

verified ·

1 Parent(s): c099b4f

Update app.py

Browse files

Files changed (1) hide show

app.py +192 -65

app.py CHANGED Viewed

@@ -1,112 +1,239 @@
 import gradio as gr
 from chromadb_semantic_search_for_dataset import semantic_search, build_compact_context
-from transformers import pipeline
 import time
-# Instantiate summarizer + answerer once
-SUMMARY_MODEL = "google/mt5-small"  # Switched to smaller model for Spaces
-ANSWER_MODEL = "google/mt5-small"   # Switched to smaller model for Spaces
-# Create pipelines (text2text-generation interface)
 try:
-    summarizer = pipeline("text2text-generation", model=SUMMARY_MODEL, device=-1, tokenizer=SUMMARY_MODEL, use_fast=False)
-    answerer = pipeline("text2text-generation", model=ANSWER_MODEL, device=-1, tokenizer=ANSWER_MODEL, use_fast=False)
-    print("Summarizer pipeline initialized:", summarizer is not None)
-    print("Answerer pipeline initialized:", answerer is not None)
 except Exception as e:
-    print(f"Pipeline initialization error: {e}")
     raise
-# Keep last search context in memory for RAG
 _last_combined_context = ""
 _last_search_query = ""
 def semantic_search_ui(search_text: str):
     """Runs semantic search and returns formatted results. Also stores summarized context for RAG."""
     global _last_combined_context, _last_search_query
-    if not search_text.strip():
-        return "Error: Please provide a search query.", ""
-    formatted, top_docs, combined_context = semantic_search(search_text, n_results=2)  # Reduced to 2 for performance
-    # Summarize each top doc (short)
-    summaries = []
-    for idx, item in enumerate(top_docs, start=1):
-        doc_text = item["document"][:1000]  # Limit input length
-        prompt = f"नेपालीमा संक्षिप्त सारांश बनाउनुहोस्: {doc_text}"
-        try:
-            summary_out = summarizer(prompt, max_length=100, do_sample=False)[0]["generated_text"].strip()  # Reduced max_length
-            print(f"Summary for doc {idx}: {summary_out}")
-        except Exception as e:
-            print(f"Summary error for doc {idx}: {e}")
-            summary_out = (doc_text[:200] + "...").strip()
-        summaries.append(summary_out)
-    # Build compact combined context
-    compact_context = build_compact_context(summaries)
-    print("Compact context:", compact_context)
-    # Save last context for RAG
-    _last_combined_context = compact_context
-    _last_search_query = search_text
-    return formatted, compact_context
 def rag_answer(question: str, search_text_for_context: str = ""):
     """
     Answer the user's question using RAG:
        - If search_text_for_context provided, run semantic search for it and use its summaries.
-       - Otherwise, use the last search context stored in memory.
     """
     global _last_combined_context, _last_search_query
     start_time = time.time()
-    # If user provided a search string, refresh context
-    if search_text_for_context.strip():
         _, compact_context = semantic_search_ui(search_text_for_context)
         context = compact_context
     else:
         context = _last_combined_context
     if not context:
-        return "Error: No context available. Please run a semantic search first or provide a search query."
-    # Simplified prompt for mT5
-    prompt = f"सन्दर्भ: {context}\nप्रश्न: {question}\nजवाफ:"
-    print("RAG prompt:", prompt)
     try:
-        out = answerer(prompt, max_length=200, do_sample=False)[0]["generated_text"].strip()  # Reduced max_length
-        print("RAG output:", out)
     except Exception as e:
-        out = f"LLM generation error: {e}"
-        print(f"RAG error: {e}")
     elapsed = time.time() - start_time
-    footer = f"\n\n---\n(Generated in {elapsed:.2f}s using summaries of top-2 cases.)"
     return out + footer
 # --- Gradio UI ---
 with gr.Blocks() as demo:
-    gr.Markdown("# 📚 Semantic Search + RAG (auto-summarize top-2) — Nepali cases")
     with gr.Tab("🔍 Semantic Search"):
-        search_input = gr.Textbox(label="Search for a case (use Nepali preferred)", placeholder="मुद्दाको संक्षेप वा कीवर्ड टाइप गर्नुहोस्...")
         search_button = gr.Button("Search")
-        search_results = gr.Markdown(label="Top 2 Similar Cases (formatted)")
-        context_preview = gr.Textbox(label="Combined Summarized Context (for RAG)", interactive=False)
-        search_button.click(fn=semantic_search_ui, inputs=search_input, outputs=[search_results, context_preview])
     with gr.Tab("🤖 Ask a Question (RAG)"):
-        question_input = gr.Textbox(label="Your question (Nepali)", placeholder="यहाँ प्रश्न लेख्नुहोस्...")
-        optional_search_input = gr.Textbox(label="Optional: Search query to refresh context", placeholder="(Optional) provide a search query to refresh top-2 context")
         ask_button = gr.Button("Get Answer")
-        rag_output = gr.Markdown(label="LLM Answer (based on summarized top-2)")
-        ask_button.click(fn=rag_answer, inputs=[question_input, optional_search_input], outputs=rag_output)
-    gr.Markdown("Notes: The system summarizes the top-2 semantic results and uses those summaries as context for the LLM.")
-demo.launch(server_timeout=300)  # Increased timeout for Spaces

+# app.py
 import gradio as gr
 from chromadb_semantic_search_for_dataset import semantic_search, build_compact_context
+from transformers import pipeline, AutoTokenizer, MT5ForConditionalGeneration
 import time
+import torch
+# Try different models - MT5 can be problematic for this task
+# Consider using these alternatives:
+MODELS_TO_TRY = [
+    "google/flan-t5-base",  # Better for instruction following
+    "google/mt5-base",      # Smaller, more stable than large
+    # "google/mt5-large"    # Your original choice - may have issues
+]
+SUMMARY_MODEL = MODELS_TO_TRY[0]  # Start with flan-t5-base
+ANSWER_MODEL = MODELS_TO_TRY[0]   # Use same model for consistency
+print(f"Loading models: {SUMMARY_MODEL}")
+# Create pipelines with better parameters
 try:
+    summarizer = pipeline(
+        "text2text-generation",
+        model=SUMMARY_MODEL,
+        device=-1,  # CPU
+        model_kwargs={
+            "torch_dtype": torch.float32,
+            "low_cpu_mem_usage": True
+        }
+    )
+    answerer = pipeline(
+        "text2text-generation",
+        model=ANSWER_MODEL,
+        device=-1,  # CPU
+        model_kwargs={
+            "torch_dtype": torch.float32,
+            "low_cpu_mem_usage": True
+        }
+    )
+    print("Models loaded successfully!")
 except Exception as e:
+    print(f"Error loading models: {e}")
     raise
+# Keep last search context in memory so RAG can use previous search if user doesn't provide a new search
 _last_combined_context = ""
 _last_search_query = ""
 def semantic_search_ui(search_text: str):
     """Runs semantic search and returns formatted results. Also stores summarized context for RAG."""
     global _last_combined_context, _last_search_query
+    print(f"DEBUG: Starting semantic search for: {search_text}")
+    try:
+        formatted, top_docs, combined_context = semantic_search(search_text, n_results=3)
+        print(f"DEBUG: Retrieved {len(top_docs)} documents")
+        # Summarize each top doc (short)
+        summaries = []
+        for idx, item in enumerate(top_docs, start=1):
+            doc_text = item["document"]
+            print(f"DEBUG: Processing document {idx}, length: {len(doc_text)}")
+            # Create a simpler prompt that works better with T5/MT5
+            if "flan-t5" in SUMMARY_MODEL.lower():
+                prompt = f"Summarize this legal case in Nepali: {doc_text[:1000]}"
+            else:
+                prompt = f"संक्षेपमा नेपालीमा सारांश बनाउनुहोस्: {doc_text[:1000]}"
+            try:
+                # Better generation parameters
+                summary_out = summarizer(
+                    prompt,
+                    max_length=150,
+                    min_length=20,
+                    do_sample=False,
+                    temperature=0.7,
+                    pad_token_id=summarizer.tokenizer.eos_token_id
+                )[0]["generated_text"]
+                print(f"DEBUG: Generated summary {idx}: {summary_out[:100]}...")
+            except Exception as e:
+                print(f"DEBUG: Error generating summary {idx}: {e}")
+                # fallback: truncated raw text
+                summary_out = (doc_text[:300] + "...")
+            # Clean/truncate extra whitespace
+            summary_out = summary_out.strip()
+            summaries.append(summary_out)
+        # Build compact combined context for the answerer (limited length)
+        compact_context = build_compact_context(summaries)
+        print(f"DEBUG: Built compact context, length: {len(compact_context)}")
+        # Save last context for Ask flow
+        _last_combined_context = compact_context
+        _last_search_query = search_text
+        return formatted, compact_context
+    except Exception as e:
+        error_msg = f"Error in semantic search: {e}"
+        print(f"DEBUG: {error_msg}")
+        return error_msg, ""
 def rag_answer(question: str, search_text_for_context: str = ""):
     """
     Answer the user's question using RAG:
        - If search_text_for_context provided, run semantic search for it and use its summaries.
+       - Otherwise, use the last search context stored in memory (_last_combined_context).
     """
     global _last_combined_context, _last_search_query
+    print(f"DEBUG: RAG answer called with question: {question[:50]}...")
     start_time = time.time()
+    # If user provided a search string in the RAG tab, refresh context
+    if search_text_for_context and search_text_for_context.strip():
+        print("DEBUG: Refreshing context with new search")
         _, compact_context = semantic_search_ui(search_text_for_context)
         context = compact_context
     else:
         context = _last_combined_context
+        print(f"DEBUG: Using cached context, length: {len(context)}")
     if not context:
+        return "No context available. Please run a semantic search first or provide a search query."
+    # Construct a simpler prompt that works better with the models
+    if "flan-t5" in ANSWER_MODEL.lower():
+        prompt = f"Based on these legal case summaries, answer the question in Nepali:\n\nContext: {context[:2000]}\n\nQuestion: {question}\n\nAnswer:"
+    else:
+        prompt = (
+            "तपाईं एक कानुनी सहायक हुनुहुन्छ। तलका केस संक्षेप प्रयोग गरी प्रश्नको जवाफ नेपालीमा दिनुहोस्।\n\n"
+            f"सन्दर्भ: {context[:2000]}\n\n"
+            f"प्रश्न: {question}\n\n"
+            "जवाफ:"
+        )
+    print(f"DEBUG: Generated prompt length: {len(prompt)}")
+    print(f"DEBUG: Prompt preview: {prompt[:200]}...")
     try:
+        # Generate answer with better parameters
+        result = answerer(
+            prompt,
+            max_length=400,
+            min_length=30,
+            do_sample=False,
+            temperature=0.7,
+            pad_token_id=answerer.tokenizer.eos_token_id,
+            early_stopping=True
+        )
+        out = result[0]["generated_text"].strip()
+        print(f"DEBUG: Generated answer: {out[:100]}...")
+        if not out or len(out) < 10:
+            out = "माफ गर्नुहोस्, मैले प्रश्नको उपयुक्त जवाफ उत्पन्न गर्न सकिन। कृपया फरक तरिकाले प्रश्न सोध्नुहोस्।"
     except Exception as e:
+        print(f"DEBUG: LLM generation error: {e}")
+        out = f"Error generating response: {e}. Please try with a simpler question."
     elapsed = time.time() - start_time
+    footer = f"\n\n---\n(Generated in {elapsed:.2f}s using summaries of top-3 cases.)"
     return out + footer
 # --- Gradio UI ---
 with gr.Blocks() as demo:
+    gr.Markdown("# 📚 Semantic Search + RAG (auto-summarize top-3) — Nepali cases")
+    gr.Markdown("**Debug Info**: Using models: " + SUMMARY_MODEL)
     with gr.Tab("🔍 Semantic Search"):
+        search_input = gr.Textbox(
+            label="Search for a case (use Nepali preferred)",
+            placeholder="मुद्दाको संक्षेप वा कीवर्ड टाइप गर्नुहोस्..."
+        )
         search_button = gr.Button("Search")
+        search_results = gr.Markdown(label="Top 3 Similar Cases (formatted)")
+        context_preview = gr.Textbox(
+            label="Combined Summarized Context (for RAG)",
+            interactive=False,
+            max_lines=10
+        )
+        search_button.click(
+            fn=semantic_search_ui,
+            inputs=search_input,
+            outputs=[search_results, context_preview]
+        )
     with gr.Tab("🤖 Ask a Question (RAG)"):
+        question_input = gr.Textbox(
+            label="Your question (Nepali)",
+            placeholder="यहाँ प्रश्न लेख्नुहोस्..."
+        )
+        optional_search_input = gr.Textbox(
+            label="Optional: Search query to refresh context",
+            placeholder="(Optional) provide a search query to refresh top-3 context"
+        )
         ask_button = gr.Button("Get Answer")
+        rag_output = gr.Markdown(label="LLM Answer (based on summarized top-3)")
+        ask_button.click(
+            fn=rag_answer,
+            inputs=[question_input, optional_search_input],
+            outputs=rag_output
+        )
+    with gr.Tab("🐛 Test Model"):
+        test_input = gr.Textbox(label="Test input", placeholder="Enter test text...")
+        test_button = gr.Button("Test Model")
+        test_output = gr.Textbox(label="Model output")
+        def test_model(text):
+            try:
+                result = answerer(f"Translate to Nepali: {text}", max_length=100, do_sample=False)
+                return result[0]["generated_text"]
+            except Exception as e:
+                return f"Model test failed: {e}"
+        test_button.click(fn=test_model, inputs=test_input, outputs=test_output)
+    gr.Markdown("""
+    **Notes**:
+    - The system summarizes the top-3 semantic results and uses those summaries as context for the LLM
+    - If you experience issues, try the Test Model tab first
+    - Check the console logs for debugging information
+    """)
+if __name__ == "__main__":
+    demo.launch(debug=True)