# app.py import gradio as gr from chromadb_semantic_search_for_dataset import semantic_search, build_compact_context from transformers import pipeline, AutoTokenizer, MT5ForConditionalGeneration import time import torch # Try different models - MT5 can be problematic for this task # Consider using these alternatives: MODELS_TO_TRY = [ "google/flan-t5-base", # Better for instruction following "google/mt5-base", # Smaller, more stable than large # "google/mt5-large" # Your original choice - may have issues ] SUMMARY_MODEL = MODELS_TO_TRY[0] # Start with flan-t5-base ANSWER_MODEL = MODELS_TO_TRY[0] # Use same model for consistency print(f"Loading models: {SUMMARY_MODEL}") # Create pipelines with better parameters try: summarizer = pipeline( "text2text-generation", model=SUMMARY_MODEL, device=-1, # CPU model_kwargs={ "torch_dtype": torch.float32, "low_cpu_mem_usage": True } ) answerer = pipeline( "text2text-generation", model=ANSWER_MODEL, device=-1, # CPU model_kwargs={ "torch_dtype": torch.float32, "low_cpu_mem_usage": True } ) print("Models loaded successfully!") except Exception as e: print(f"Error loading models: {e}") raise # Keep last search context in memory so RAG can use previous search if user doesn't provide a new search _last_combined_context = "" _last_search_query = "" def create_template_answer(context: str, question: str) -> str: """Create a template-based answer when the model fails""" print("DEBUG: Creating template-based answer") # Extract key information from context cases = context.split("[Case ") case_info = [] for i, case in enumerate(cases[1:], 1): # Skip first empty split try: # Extract case type case_type_match = case.split("मुद्दाको किसिम: ")[1].split(" |")[0] if "मुद्दाको किसिम: " in case else "N/A" # Extract subject subject_match = case.split("विषय: ")[1].split(" |")[0] if "विषय: " in case else "N/A" case_info.append({ 'number': i, 'type': case_type_match, 'subject': subject_match, 'snippet': case[:300] + "..." if len(case) > 300 else case }) except: continue # Generate answer based on question keywords question_lower = question.lower() # Check what the question is about if any(word in question_lower for word in ['हक', 'अधिकार', 'कायम']): answer = f"तपाईंको प्रश्न 'हक कायम' संबंधी छ। उपलब्ध {len(case_info)} केसहरूमा:\n\n" elif any(word in question_lower for word in ['फैसला', 'बदर', 'निर्णय']): answer = f"तपाईंको प्रश्न 'फैसला बदर' संबंधी छ। उपलब्ध {len(case_info)} केसहरूमा:\n\n" elif any(word in question_lower for word in ['अंश', 'दर्ता', 'बाँडफाँड']): answer = f"तपाईंको प्रश्न 'अंश दर्ता' संबंधी छ। उपलब्ध {len(case_info)} केसहरूमा:\n\n" else: answer = f"तपाईंको प्रश्नको सम्बन्धमा {len(case_info)} केसहरू भेटिएका छन्:\n\n" # Add case details for case in case_info[:3]: # Limit to top 3 cases answer += f"केस {case['number']}: {case['type']} - {case['subject']}\n" answer += f"मुख्य विषय: {case['subject']}\n\n" answer += "विस्तृत जानकारीका लागि माथिका केसहरूको लिङ्कहरू हेर्नुहोस्।" return answer def semantic_search_ui(search_text: str): """Runs semantic search and returns formatted results. Also stores summarized context for RAG.""" global _last_combined_context, _last_search_query print(f"DEBUG: Starting semantic search for: {search_text}") try: formatted, top_docs, combined_context = semantic_search(search_text, n_results=3) print(f"DEBUG: Retrieved {len(top_docs)} documents") # Skip model-based summarization for now - use direct text extraction instead summaries = [] for idx, item in enumerate(top_docs, start=1): doc_text = item["document"] meta = item["metadata"] print(f"DEBUG: Processing document {idx}, length: {len(doc_text)}") # Create a manual summary using metadata and document text # This is more reliable than model-based summarization summary_parts = [] # Add key metadata if meta.get('mudda_type'): summary_parts.append(f"मुद्दाको किसिम: {meta['mudda_type']}") if meta.get('subject'): summary_parts.append(f"विषय: {meta['subject']}") if meta.get('nibedak'): summary_parts.append(f"निवेदक: {meta['nibedak'][:100]}...") if meta.get('vipakshi'): summary_parts.append(f"विपक्षी: {meta['vipakshi'][:100]}...") # Add relevant text snippets (look for key legal terms) doc_clean = doc_text.replace('["', '').replace('"]', '').replace('\\n', ' ') # Extract sentences that contain important legal terms important_sentences = [] sentences = doc_clean.split('।') # Split by Nepali sentence delimiter for sentence in sentences[:5]: # Take first 5 sentences sentence = sentence.strip() if len(sentence) > 20 and any(term in sentence.lower() for term in ['फैसला', 'ठहर', 'अदालत', 'मुद्दा', 'कानुन']): important_sentences.append(sentence[:200]) # Limit sentence length if important_sentences: summary_parts.append("मुख्य बुँदाहरू: " + "। ".join(important_sentences[:2]) + "।") else: # Fallback to first part of document clean_start = doc_clean[:300].strip() if clean_start: summary_parts.append(f"विवरण: {clean_start}...") # Combine all parts manual_summary = " | ".join(summary_parts) summaries.append(manual_summary) print(f"DEBUG: Created manual summary {idx}: {manual_summary[:100]}...") # Build compact combined context for the answerer (limited length) compact_context = build_compact_context(summaries) print(f"DEBUG: Built compact context, length: {len(compact_context)}") print(f"DEBUG: Context preview: {compact_context[:200]}...") # Save last context for Ask flow _last_combined_context = compact_context _last_search_query = search_text return formatted, compact_context except Exception as e: error_msg = f"Error in semantic search: {e}" print(f"DEBUG: {error_msg}") return error_msg, "" def rag_answer(question: str, search_text_for_context: str = ""): """ Answer the user's question using RAG: - If search_text_for_context provided, run semantic search for it and use its summaries. - Otherwise, use the last search context stored in memory (_last_combined_context). """ global _last_combined_context, _last_search_query print(f"DEBUG: RAG answer called with question: {question[:50]}...") start_time = time.time() # If user provided a search string in the RAG tab, refresh context if search_text_for_context and search_text_for_context.strip(): print("DEBUG: Refreshing context with new search") _, compact_context = semantic_search_ui(search_text_for_context) context = compact_context else: context = _last_combined_context print(f"DEBUG: Using cached context, length: {len(context)}") if not context or len(context.strip()) < 50: return "No sufficient context available. Please run a semantic search first or provide a search query." print(f"DEBUG: Using context: {context[:300]}...") # Construct a simpler prompt that works better with the models if "flan-t5" in ANSWER_MODEL.lower(): prompt = f"Based on these Nepali legal case summaries, answer the question in Nepali:\n\nContext: {context[:1500]}\n\nQuestion: {question}\n\nProvide a detailed answer in Nepali:" else: prompt = ( "तलका नेपाली अदालती मुद्दाका विवरणहरू प्रयोग गरेर प्रश्नको जवाफ नेपालीमा दिनुहोस्:\n\n" f"मुद्दाहरूको विवरण:\n{context[:1500]}\n\n" f"प्रश्न: {question}\n\n" "विस्तृत जवाफ:" ) print(f"DEBUG: Generated prompt length: {len(prompt)}") print(f"DEBUG: Prompt preview: {prompt[:200]}...") try: print(f"DEBUG: Sending prompt to model (length: {len(prompt)})") print(f"DEBUG: First 300 chars of prompt: {prompt[:300]}") # Try multiple generation strategies result = None # Strategy 1: Simple generation try: result = answerer( prompt, max_length=300, min_length=10, do_sample=False, num_beams=1, pad_token_id=answerer.tokenizer.pad_token_id if hasattr(answerer.tokenizer, 'pad_token_id') else answerer.tokenizer.eos_token_id ) print(f"DEBUG: Strategy 1 successful") except Exception as e1: print(f"DEBUG: Strategy 1 failed: {e1}") # Strategy 2: Even simpler generation try: result = answerer(prompt, max_length=200, do_sample=False) print(f"DEBUG: Strategy 2 successful") except Exception as e2: print(f"DEBUG: Strategy 2 failed: {e2}") # Strategy 3: Template-based fallback (no model) print(f"DEBUG: Using template-based fallback") template_answer = create_template_answer(context, question) return template_answer + f"\n\n---\n(Generated using template fallback in {time.time() - start_time:.2f}s)" if result: out = result[0]["generated_text"].strip() print(f"DEBUG: Raw model output: '{out}'") print(f"DEBUG: Output length: {len(out)}") # Clean up the output - remove the input prompt if it's repeated if prompt in out: out = out.replace(prompt, "").strip() print(f"DEBUG: Cleaned output: '{out[:100]}...'") # Check if output is meaningful if not out or len(out) < 5 or out.lower() in ['none', 'n/a', '']: print(f"DEBUG: Output too short or meaningless, using template fallback") out = create_template_answer(context, question) else: print(f"DEBUG: No result from model, using template fallback") out = create_template_answer(context, question) except Exception as e: print(f"DEBUG: All strategies failed: {e}") out = create_template_answer(context, question) elapsed = time.time() - start_time footer = f"\n\n---\n(Generated in {elapsed:.2f}s using summaries of top-3 cases.)" return out + footer # --- Gradio UI --- with gr.Blocks() as demo: gr.Markdown("# 📚 Semantic Search + RAG (auto-summarize top-3) — Nepali cases") gr.Markdown("**Debug Info**: Using models: " + SUMMARY_MODEL) with gr.Tab("🔍 Semantic Search"): search_input = gr.Textbox( label="Search for a case (use Nepali preferred)", placeholder="मुद्दाको संक्षेप वा कीवर्ड टाइप गर्नुहोस्..." ) search_button = gr.Button("Search") search_results = gr.Markdown(label="Top 3 Similar Cases (formatted)") context_preview = gr.Textbox( label="Combined Summarized Context (for RAG)", interactive=False, max_lines=10 ) search_button.click( fn=semantic_search_ui, inputs=search_input, outputs=[search_results, context_preview] ) with gr.Tab("🤖 Ask a Question (RAG)"): question_input = gr.Textbox( label="Your question (Nepali)", placeholder="यहाँ प्रश्न लेख्नुहोस्..." ) optional_search_input = gr.Textbox( label="Optional: Search query to refresh context", placeholder="(Optional) provide a search query to refresh top-3 context" ) ask_button = gr.Button("Get Answer") rag_output = gr.Markdown(label="LLM Answer (based on summarized top-3)") ask_button.click( fn=rag_answer, inputs=[question_input, optional_search_input], outputs=rag_output ) with gr.Tab("🐛 Test Model"): test_input = gr.Textbox(label="Test input", placeholder="Enter test text...") test_button = gr.Button("Test Model") test_output = gr.Textbox(label="Model output") def test_model(text): if not text.strip(): return "Please enter some text to test" try: # Test 1: Very simple prompt simple_result = answerer(f"Translate to English: {text}", max_length=50, do_sample=False) result1 = simple_result[0]["generated_text"] # Test 2: Nepali prompt nepali_result = answerer(f"यसलाई नेपालीमा भन्नुहोस्: {text}", max_length=50, do_sample=False) result2 = nepali_result[0]["generated_text"] return f"English test: {result1}\n\nNepali test: {result2}\n\nModel is working!" except Exception as e: return f"Model test failed: {e}\n\nModel details:\n- Name: {ANSWER_MODEL}\n- Type: {type(answerer)}" test_button.click(fn=test_model, inputs=test_input, outputs=test_output) gr.Markdown(""" **Notes**: - The system summarizes the top-3 semantic results and uses those summaries as context for the LLM - If you experience issues, try the Test Model tab first - Check the console logs for debugging information """) if __name__ == "__main__": demo.launch(debug=True)