Spaces:

rbbist
/

RAG_System_with_Nepal_Kanun_Patrika_Dataset

Sleeping

RAG_System_with_Nepal_Kanun_Patrika_Dataset

File size: 15,445 Bytes

c369c5f
4829153
1fe038b
c369c5f
1fe038b
c369c5f
4829153
c369c5f
 
 
 
 
 
 
1fe038b
c369c5f
 
 
 
 
 
5cddb4b
c369c5f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5cddb4b
c369c5f
5cddb4b
 
c369c5f
1fe038b
 
 
aa4020e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1fe038b
 
 
c369c5f
 
 
 
 
 
 
d6b3fb4
c369c5f
 
 
d6b3fb4
c369c5f
 
d6b3fb4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c369c5f
d6b3fb4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c369c5f
d6b3fb4
 
 
 
c369c5f
 
 
 
d6b3fb4
c369c5f
 
 
 
 
 
 
 
 
 
 
1fe038b
 
 
 
 
c369c5f
1fe038b
 
 
c369c5f
1fe038b
 
c369c5f
 
 
1fe038b
 
 
 
c369c5f
1fe038b
aa4020e
 
 
 
c369c5f
 
 
aa4020e
c369c5f
 
aa4020e
 
c369c5f
aa4020e
c369c5f
1fe038b
c369c5f
 
4829153
1fe038b
aa4020e
 
 
 
 
c369c5f
aa4020e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c369c5f
aa4020e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c369c5f
1fe038b
aa4020e
 
1fe038b
 
c369c5f
701e4ef
1fe038b
c369c5f
1fe038b
 
c369c5f
 
4829153
 
c369c5f
 
 
 
4829153
c369c5f
 
 
 
 
 
 
 
 
 
 
 
4829153
 
c369c5f
 
 
 
 
 
 
 
1fe038b
c369c5f
 
 
 
 
 
 
 
 
 
 
 
 
 
aa4020e
 
 
c369c5f
aa4020e
 
 
 
 
 
 
 
 
 
c369c5f
aa4020e
c369c5f

# app.py
import gradio as gr
from chromadb_semantic_search_for_dataset import semantic_search, build_compact_context
from transformers import pipeline, AutoTokenizer, MT5ForConditionalGeneration
import time
import torch

# Try different models - MT5 can be problematic for this task
# Consider using these alternatives:
MODELS_TO_TRY = [
    "google/flan-t5-base",  # Better for instruction following
    "google/mt5-base",      # Smaller, more stable than large
    # "google/mt5-large"    # Your original choice - may have issues
]

SUMMARY_MODEL = MODELS_TO_TRY[0]  # Start with flan-t5-base
ANSWER_MODEL = MODELS_TO_TRY[0]   # Use same model for consistency

print(f"Loading models: {SUMMARY_MODEL}")

# Create pipelines with better parameters
try:
    summarizer = pipeline(
        "text2text-generation", 
        model=SUMMARY_MODEL, 
        device=-1,  # CPU
        model_kwargs={
            "torch_dtype": torch.float32,
            "low_cpu_mem_usage": True
        }
    )
    
    answerer = pipeline(
        "text2text-generation", 
        model=ANSWER_MODEL, 
        device=-1,  # CPU
        model_kwargs={
            "torch_dtype": torch.float32,
            "low_cpu_mem_usage": True
        }
    )
    print("Models loaded successfully!")
    
except Exception as e:
    print(f"Error loading models: {e}")
    raise

# Keep last search context in memory so RAG can use previous search if user doesn't provide a new search
_last_combined_context = ""
_last_search_query = ""

def create_template_answer(context: str, question: str) -> str:
    """Create a template-based answer when the model fails"""
    print("DEBUG: Creating template-based answer")
    
    # Extract key information from context
    cases = context.split("[Case ")
    case_info = []
    
    for i, case in enumerate(cases[1:], 1):  # Skip first empty split
        try:
            # Extract case type
            case_type_match = case.split("मुद्दाको किसिम: ")[1].split(" |")[0] if "मुद्दाको किसिम: " in case else "N/A"
            # Extract subject
            subject_match = case.split("विषय: ")[1].split(" |")[0] if "विषय: " in case else "N/A"
            
            case_info.append({
                'number': i,
                'type': case_type_match,
                'subject': subject_match,
                'snippet': case[:300] + "..." if len(case) > 300 else case
            })
        except:
            continue
    
    # Generate answer based on question keywords
    question_lower = question.lower()
    
    # Check what the question is about
    if any(word in question_lower for word in ['हक', 'अधिकार', 'कायम']):
        answer = f"तपाईंको प्रश्न 'हक कायम' संबंधी छ। उपलब्ध {len(case_info)} केसहरूमा:\n\n"
    elif any(word in question_lower for word in ['फैसला', 'बदर', 'निर्णय']):
        answer = f"तपाईंको प्रश्न 'फैसला बदर' संबंधी छ। उपलब्ध {len(case_info)} केसहरूमा:\n\n"
    elif any(word in question_lower for word in ['अंश', 'दर्ता', 'बाँडफाँड']):
        answer = f"तपाईंको प्रश्न 'अंश दर्ता' संबंधी छ। उपलब्ध {len(case_info)} केसहरूमा:\n\n"
    else:
        answer = f"तपाईंको प्रश्नको सम्बन्धमा {len(case_info)} केसहरू भेटिएका छन्:\n\n"
    
    # Add case details
    for case in case_info[:3]:  # Limit to top 3 cases
        answer += f"केस {case['number']}: {case['type']} - {case['subject']}\n"
        answer += f"मुख्य विषय: {case['subject']}\n\n"
    
    answer += "विस्तृत जानकारीका लागि माथिका केसहरूको लिङ्कहरू हेर्नुहोस्।"
    
    return answer

def semantic_search_ui(search_text: str):
    """Runs semantic search and returns formatted results. Also stores summarized context for RAG."""
    global _last_combined_context, _last_search_query
    
    print(f"DEBUG: Starting semantic search for: {search_text}")
    
    try:
        formatted, top_docs, combined_context = semantic_search(search_text, n_results=3)
        print(f"DEBUG: Retrieved {len(top_docs)} documents")
        
        # Skip model-based summarization for now - use direct text extraction instead
        summaries = []
        for idx, item in enumerate(top_docs, start=1):
            doc_text = item["document"]
            meta = item["metadata"]
            print(f"DEBUG: Processing document {idx}, length: {len(doc_text)}")
            
            # Create a manual summary using metadata and document text
            # This is more reliable than model-based summarization
            summary_parts = []
            
            # Add key metadata
            if meta.get('mudda_type'):
                summary_parts.append(f"मुद्दाको किसिम: {meta['mudda_type']}")
            if meta.get('subject'):
                summary_parts.append(f"विषय: {meta['subject']}")
            if meta.get('nibedak'):
                summary_parts.append(f"निवेदक: {meta['nibedak'][:100]}...")
            if meta.get('vipakshi'):
                summary_parts.append(f"विपक्षी: {meta['vipakshi'][:100]}...")
            
            # Add relevant text snippets (look for key legal terms)
            doc_clean = doc_text.replace('["', '').replace('"]', '').replace('\\n', ' ')
            
            # Extract sentences that contain important legal terms
            important_sentences = []
            sentences = doc_clean.split('।')  # Split by Nepali sentence delimiter
            
            for sentence in sentences[:5]:  # Take first 5 sentences
                sentence = sentence.strip()
                if len(sentence) > 20 and any(term in sentence.lower() for term in ['फैसला', 'ठहर', 'अदालत', 'मुद्दा', 'कानुन']):
                    important_sentences.append(sentence[:200])  # Limit sentence length
            
            if important_sentences:
                summary_parts.append("मुख्य बुँदाहरू: " + "। ".join(important_sentences[:2]) + "।")
            else:
                # Fallback to first part of document
                clean_start = doc_clean[:300].strip()
                if clean_start:
                    summary_parts.append(f"विवरण: {clean_start}...")
            
            # Combine all parts
            manual_summary = " | ".join(summary_parts)
            summaries.append(manual_summary)
            print(f"DEBUG: Created manual summary {idx}: {manual_summary[:100]}...")

        # Build compact combined context for the answerer (limited length)
        compact_context = build_compact_context(summaries)
        print(f"DEBUG: Built compact context, length: {len(compact_context)}")
        print(f"DEBUG: Context preview: {compact_context[:200]}...")

        # Save last context for Ask flow
        _last_combined_context = compact_context
        _last_search_query = search_text

        return formatted, compact_context
        
    except Exception as e:
        error_msg = f"Error in semantic search: {e}"
        print(f"DEBUG: {error_msg}")
        return error_msg, ""

def rag_answer(question: str, search_text_for_context: str = ""):
    """
    Answer the user's question using RAG:
       - If search_text_for_context provided, run semantic search for it and use its summaries.
       - Otherwise, use the last search context stored in memory (_last_combined_context).
    """
    global _last_combined_context, _last_search_query

    print(f"DEBUG: RAG answer called with question: {question[:50]}...")
    start_time = time.time()

    # If user provided a search string in the RAG tab, refresh context
    if search_text_for_context and search_text_for_context.strip():
        print("DEBUG: Refreshing context with new search")
        _, compact_context = semantic_search_ui(search_text_for_context)
        context = compact_context
    else:
        context = _last_combined_context
        print(f"DEBUG: Using cached context, length: {len(context)}")

    if not context or len(context.strip()) < 50:
        return "No sufficient context available. Please run a semantic search first or provide a search query."

    print(f"DEBUG: Using context: {context[:300]}...")

    # Construct a simpler prompt that works better with the models
    if "flan-t5" in ANSWER_MODEL.lower():
        prompt = f"Based on these Nepali legal case summaries, answer the question in Nepali:\n\nContext: {context[:1500]}\n\nQuestion: {question}\n\nProvide a detailed answer in Nepali:"
    else:
        prompt = (
            "तलका नेपाली अदालती मुद्दाका विवरणहरू प्रयोग गरेर प्रश्नको जवाफ नेपालीमा दिनुहोस्:\n\n"
            f"मुद्दाहरूको विवरण:\n{context[:1500]}\n\n"
            f"प्रश्न: {question}\n\n"
            "विस्तृत जवाफ:"
        )

    print(f"DEBUG: Generated prompt length: {len(prompt)}")
    print(f"DEBUG: Prompt preview: {prompt[:200]}...")

    try:
        print(f"DEBUG: Sending prompt to model (length: {len(prompt)})")
        print(f"DEBUG: First 300 chars of prompt: {prompt[:300]}")
        
        # Try multiple generation strategies
        result = None
        
        # Strategy 1: Simple generation
        try:
            result = answerer(
                prompt, 
                max_length=300,
                min_length=10,
                do_sample=False,
                num_beams=1,
                pad_token_id=answerer.tokenizer.pad_token_id if hasattr(answerer.tokenizer, 'pad_token_id') else answerer.tokenizer.eos_token_id
            )
            print(f"DEBUG: Strategy 1 successful")
        except Exception as e1:
            print(f"DEBUG: Strategy 1 failed: {e1}")
            
            # Strategy 2: Even simpler generation
            try:
                result = answerer(prompt, max_length=200, do_sample=False)
                print(f"DEBUG: Strategy 2 successful")
            except Exception as e2:
                print(f"DEBUG: Strategy 2 failed: {e2}")
                
                # Strategy 3: Template-based fallback (no model)
                print(f"DEBUG: Using template-based fallback")
                template_answer = create_template_answer(context, question)
                return template_answer + f"\n\n---\n(Generated using template fallback in {time.time() - start_time:.2f}s)"
        
        if result:
            out = result[0]["generated_text"].strip()
            print(f"DEBUG: Raw model output: '{out}'")
            print(f"DEBUG: Output length: {len(out)}")
            
            # Clean up the output - remove the input prompt if it's repeated
            if prompt in out:
                out = out.replace(prompt, "").strip()
                print(f"DEBUG: Cleaned output: '{out[:100]}...'")
            
            # Check if output is meaningful
            if not out or len(out) < 5 or out.lower() in ['none', 'n/a', '']:
                print(f"DEBUG: Output too short or meaningless, using template fallback")
                out = create_template_answer(context, question)
            
        else:
            print(f"DEBUG: No result from model, using template fallback")
            out = create_template_answer(context, question)
            
    except Exception as e:
        print(f"DEBUG: All strategies failed: {e}")
        out = create_template_answer(context, question)

    elapsed = time.time() - start_time
    footer = f"\n\n---\n(Generated in {elapsed:.2f}s using summaries of top-3 cases.)"
    return out + footer


# --- Gradio UI ---
with gr.Blocks() as demo:
    gr.Markdown("# 📚 Semantic Search + RAG (auto-summarize top-3) — Nepali cases")
    gr.Markdown("**Debug Info**: Using models: " + SUMMARY_MODEL)

    with gr.Tab("🔍 Semantic Search"):
        search_input = gr.Textbox(
            label="Search for a case (use Nepali preferred)", 
            placeholder="मुद्दाको संक्षेप वा कीवर्ड टाइप गर्नुहोस्..."
        )
        search_button = gr.Button("Search")
        search_results = gr.Markdown(label="Top 3 Similar Cases (formatted)")
        context_preview = gr.Textbox(
            label="Combined Summarized Context (for RAG)", 
            interactive=False, 
            max_lines=10
        )

        search_button.click(
            fn=semantic_search_ui, 
            inputs=search_input, 
            outputs=[search_results, context_preview]
        )

    with gr.Tab("🤖 Ask a Question (RAG)"):
        question_input = gr.Textbox(
            label="Your question (Nepali)", 
            placeholder="यहाँ प्रश्न लेख्नुहोस्..."
        )
        optional_search_input = gr.Textbox(
            label="Optional: Search query to refresh context", 
            placeholder="(Optional) provide a search query to refresh top-3 context"
        )
        ask_button = gr.Button("Get Answer")
        rag_output = gr.Markdown(label="LLM Answer (based on summarized top-3)")

        ask_button.click(
            fn=rag_answer, 
            inputs=[question_input, optional_search_input], 
            outputs=rag_output
        )

    with gr.Tab("🐛 Test Model"):
        test_input = gr.Textbox(label="Test input", placeholder="Enter test text...")
        test_button = gr.Button("Test Model")
        test_output = gr.Textbox(label="Model output")
        
        def test_model(text):
            if not text.strip():
                return "Please enter some text to test"
            
            try:
                # Test 1: Very simple prompt
                simple_result = answerer(f"Translate to English: {text}", max_length=50, do_sample=False)
                result1 = simple_result[0]["generated_text"]
                
                # Test 2: Nepali prompt
                nepali_result = answerer(f"यसलाई नेपालीमा भन्नुहोस्: {text}", max_length=50, do_sample=False)
                result2 = nepali_result[0]["generated_text"]
                
                return f"English test: {result1}\n\nNepali test: {result2}\n\nModel is working!"
                
            except Exception as e:
                return f"Model test failed: {e}\n\nModel details:\n- Name: {ANSWER_MODEL}\n- Type: {type(answerer)}"
        
        test_button.click(fn=test_model, inputs=test_input, outputs=test_output)

    gr.Markdown("""
    **Notes**: 
    - The system summarizes the top-3 semantic results and uses those summaries as context for the LLM
    - If you experience issues, try the Test Model tab first
    - Check the console logs for debugging information
    """)

if __name__ == "__main__":
    demo.launch(debug=True)