Hindi-Rag

Running

File size: 18,980 Bytes

import gradio as gr
import os
import tempfile
import time
import uuid
from datetime import datetime
import fitz  
import requests
import json
import numpy as np
from sentence_transformers import SentenceTransformer
import faiss
from groq import Groq
from gtts import gTTS
import subprocess
import warnings
warnings.filterwarnings("ignore")

CONFIG = {
    'PASSCODE': os.getenv('PASSCODE'),
    'MAX_FILE_SIZE': 10 * 1024 * 1024,  
    'MAX_QUERIES_PER_SESSION': 10,
    'MAX_AUDIO_DURATION': 120,  
    'GROQ_API_KEY': os.getenv('GAPI'),
    'AUDIO_CLIP_DURATION': 10,  
    'BOOK_THUMBNAILS_DIR': './book_thumbnails',
    'OCR_BOOKS_DIR': './ocr_books',
}

SESSION_DATA = {
    'authenticated': False,
    'session_id': str(uuid.uuid4()),
    'query_count': 0,
    'document_chunks': [],
    'faiss_index': None,
    'author_name': '',
    'book_title': '',
    'embedding_model': None,
    'groq_client': None
}

# Predefined questions for books
PREDEFINED_QUESTIONS = {
    'general': [
        "इस पुस्तक का मुख्य विषय क्या है?",
        "लेखक ने इस पुस्तक में क्या संदेश दिया है?",
        "इस पुस्तक में कौन से मुख्य पात्र हैं?"
    ],
    'analysis': [
        "इस पुस्तक की मुख्य शिक्षा क्या है?",
        "लेखक की लेखन शैली कैसी है?",
        "इस पुस्तक में कौन सा मुख्य संघर्ष है?"
    ],
    'content': [
        "इस कहानी का क्या अंत है?",
        "पुस्तक में कौन सी मुख्य घटनाएं हैं?",
        "मुख्य पात्र का चरित्र कैसा है?"
    ]
}

def load_models():
    if SESSION_DATA['embedding_model'] is None:
        print("Loading embedding model...")
        SESSION_DATA['embedding_model'] = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')

    if SESSION_DATA['groq_client'] is None:
        if CONFIG['GROQ_API_KEY']:
            print("Initializing Groq client...")
            SESSION_DATA['groq_client'] = Groq(api_key=CONFIG['GROQ_API_KEY'])
        else:
            print("Warning: GROQ_API_KEY not found")

    return SESSION_DATA['embedding_model'], SESSION_DATA['groq_client']

def trim_audio_to_duration(input_path, output_path, duration=10):
    try:
        cmd = [
            'ffmpeg', '-i', input_path, 
            '-t', str(duration), 
            '-acodec', 'copy', 
            '-y',  
            output_path
        ]
        result = subprocess.run(cmd, capture_output=True, text=True)
        if result.returncode == 0:
            return True
        else:
            print(f"FFmpeg error: {result.stderr}")
            return False
    except Exception as e:
        print(f"Error trimming audio: {str(e)}")
        return False

def transcribe_audio(audio_file):
    if audio_file is None:
        return ""

    if not CONFIG['GROQ_API_KEY'] or SESSION_DATA['groq_client'] is None:
        return "Error: Groq API key not configured"

    try:
        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
            trimmed_audio_path = tmp_file.name

        if not trim_audio_to_duration(audio_file, trimmed_audio_path, CONFIG['AUDIO_CLIP_DURATION']):
            print("Warning: Could not trim audio, using full duration")
            trimmed_audio_path = audio_file

        with open(trimmed_audio_path, "rb") as file:
            transcription = SESSION_DATA['groq_client'].audio.transcriptions.create(
                file=(os.path.basename(trimmed_audio_path), file.read()),
                model="whisper-large-v3",
                response_format="verbose_json",
                language="hi"  
            )

        if trimmed_audio_path != audio_file:
            try:
                os.unlink(trimmed_audio_path)
            except:
                pass

        return transcription.text

    except Exception as e:
        try:
            if 'trimmed_audio_path' in locals() and trimmed_audio_path != audio_file:
                os.unlink(trimmed_audio_path)
        except:
            pass
        return f"Transcription error: {str(e)}"

def text_to_speech(text):
    if not text or len(text.strip()) == 0:
        return None

    try:
        tts = gTTS(text=text, lang='hi', slow=False)
        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
            tts.save(tmp_file.name)
            return tmp_file.name
    except Exception as e:
        print(f"TTS Error: {str(e)}")
        return None

def extract_text_from_pdf(pdf_path):
    text_content = ""
    try:
        pdf_document = fitz.open(pdf_path)
        total_pages = len(pdf_document)
        print(f"Processing PDF with {total_pages} pages...")

        for page_num in range(total_pages):
            page = pdf_document.load_page(page_num)
            page_text = page.get_text()
            if page_text.strip():
                text_content += page_text + "\n"

        pdf_document.close()
        
        if not text_content.strip():
            return "Error: No selectable text found in PDF. Please ensure the PDF contains selectable text, not just images."
        
        return text_content

    except Exception as e:
        return f"Error extracting text: {str(e)}"

def extract_metadata(text):
    lines = [line.strip() for line in text.split('\n')[:25] if line.strip()]
    author_name = "अज्ञात लेखक"
    book_title = "अनाम पुस्तक"

    for i, line in enumerate(lines):
        if any(word in line.lower() for word in ['लेखक', 'author', 'by', 'द्वारा', 'रचयिता']):
            author_name = line
        elif 10 < len(line) < 100 and not any(char.isdigit() for char in line[:20]):
            if book_title == "अनाम पुस्तक":
                book_title = line

    return author_name, book_title

def chunk_text(text, chunk_size=400, overlap=50):
    words = text.split()
    chunks = []
    for i in range(0, len(words), chunk_size - overlap):
        chunk = ' '.join(words[i:i + chunk_size])
        if chunk.strip():
            chunks.append(chunk)
    return chunks

def create_embeddings(chunks):
    embedding_model, _ = load_models()
    embeddings = embedding_model.encode(chunks, show_progress_bar=False)
    dimension = embeddings.shape[1]
    index = faiss.IndexFlatIP(dimension)
    faiss.normalize_L2(embeddings)
    index.add(embeddings.astype('float32'))
    return index

def search_similar_chunks(query, top_k=3):
    if SESSION_DATA['faiss_index'] is None or not SESSION_DATA['document_chunks']:
        return []

    embedding_model, _ = load_models()
    query_embedding = embedding_model.encode([query], show_progress_bar=False)
    faiss.normalize_L2(query_embedding)
    scores, indices = SESSION_DATA['faiss_index'].search(query_embedding.astype('float32'), top_k)

    results = []
    for i, idx in enumerate(indices[0]):
        if idx >= 0 and idx < len(SESSION_DATA['document_chunks']):
            results.append({
                'text': SESSION_DATA['document_chunks'][idx],
                'score': float(scores[0][i])
            })
    return results

def call_groq_api(prompt, model="llama-3.1-8b-instant"):
    if not CONFIG['GROQ_API_KEY'] or CONFIG['GROQ_API_KEY'] == 'your_groq_api_key_here':
        return "⚠️ Groq API key not configured. Please set GROQ_API_KEY environment variable."

    url = "https://api.groq.com/openai/v1/chat/completions"
    headers = {
        "Authorization": f"Bearer {CONFIG['GROQ_API_KEY']}",
        "Content-Type": "application/json"
    }

    data = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.7,
        "max_tokens": 600
    }

    try:
        response = requests.post(url, headers=headers, json=data, timeout=30)
        response.raise_for_status()
        return response.json()['choices'][0]['message']['content']
    except Exception as e:
        return f"Error calling LLM: {str(e)}"

def generate_rag_response(query, context_chunks):
    if not context_chunks:
        return "मुझे इस प्रश्न का उत्तर देने के लिए पर्याप्त जानकारी नहीं मिली।"

    context = "\n\n".join([chunk['text'] for chunk in context_chunks])

    prompt = f"""आप एक हिंदी पुस्तक सहायक हैं। निम्नलिखित जानकारी के आधार पर प्रश्न का उत्तर दें:

पुस्तक: {SESSION_DATA['book_title']}
लेखक: {SESSION_DATA['author_name']}

संदर्भ:
{context}

प्रश्न: {query}

निर्देश:
- हिंदी में संक्षिप्त और सटीक उत्तर दें
- उत्तर की शुरुआत में पुस्तक और लेखक का संदर्भ शामिल करें
- केवल दिए गए संदर्भ के आधार पर ही उत्तर दें
"""

    response = call_groq_api(prompt)
    return response

def authenticate(passcode):
    if passcode == CONFIG['PASSCODE']:
        SESSION_DATA['authenticated'] = True
        return gr.update(visible=False), gr.update(visible=True), "✅ Welcome!"
    else:
        return gr.update(visible=True), gr.update(visible=False), "❌ Invalid passcode"

def process_document(pdf_file):
    if pdf_file is None:
        return "Please upload a PDF file", "", "", gr.update(visible=True), gr.update(visible=False), gr.update(choices=[])

    try:
        file_size = os.path.getsize(pdf_file.name)
        if file_size > CONFIG['MAX_FILE_SIZE']:
            return f"File too large! Max size: {CONFIG['MAX_FILE_SIZE'] // (1024*1024)}MB", "", "", gr.update(visible=True), gr.update(visible=False), gr.update(choices=[])

        text_content = extract_text_from_pdf(pdf_file.name)
        if not text_content.strip() or "Error" in text_content:
            return text_content, "", "", gr.update(visible=True), gr.update(visible=False), gr.update(choices=[])

        author_name, book_title = extract_metadata(text_content)
        SESSION_DATA['author_name'] = author_name
        SESSION_DATA['book_title'] = book_title

        chunks = chunk_text(text_content)
        SESSION_DATA['document_chunks'] = chunks
        SESSION_DATA['faiss_index'] = create_embeddings(chunks)
        SESSION_DATA['query_count'] = 0

        # Generate predefined questions
        questions = []
        for category in PREDEFINED_QUESTIONS.values():
            questions.extend(category)

        success_msg = f"✅ Document processed successfully!"
        
        return success_msg, book_title, author_name, gr.update(visible=False), gr.update(visible=True), gr.update(choices=questions[:6])

    except Exception as e:
        return f"Error processing document: {str(e)}", "", "", gr.update(visible=True), gr.update(visible=False), gr.update(choices=[])

def show_questions():
    """Show the questions section"""
    return gr.update(visible=False), gr.update(visible=True)

def process_query(audio_input, text_input, predefined_question):
    if SESSION_DATA['query_count'] >= CONFIG['MAX_QUERIES_PER_SESSION']:
        return "⚠️ Query limit reached", None

    if not SESSION_DATA['document_chunks']:
        return "Please upload a document first", None

    query_text = ""
    
    # Priority: Predefined > Audio > Text
    if predefined_question and predefined_question != "Select a question...":
        query_text = predefined_question
    elif audio_input:
        query_text = transcribe_audio(audio_input)
        if "error" in query_text.lower():
            query_text = ""
    
    if not query_text.strip() and text_input.strip():
        query_text = text_input.strip()

    if not query_text.strip():
        return "Please ask a question", None

    try:
        similar_chunks = search_similar_chunks(query_text)
        response_text = generate_rag_response(query_text, similar_chunks)
        audio_response = text_to_speech(response_text)
        SESSION_DATA['query_count'] += 1

        formatted_response = f"**प्रश्न:** {query_text}\n\n**उत्तर:** {response_text}"
        return formatted_response, audio_response

    except Exception as e:
        return f"Error processing query: {str(e)}", None

def reset_session():
    SESSION_DATA.update({
        'query_count': 0,
        'document_chunks': [],
        'faiss_index': None,
        'author_name': '',
        'book_title': '',
        'session_id': str(uuid.uuid4())
    })
    return "✅ New session started!", "", "", gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(choices=[])

def create_interface():
    with gr.Blocks(
        title="Hindi Book Assistant",
        theme=gr.themes.Soft(),
        css="""
        .main-container { max-width: 1200px; margin: 0 auto; }
        .section-header { font-size: 1.2em; font-weight: bold; margin: 1em 0; }
        .upload-area { border: 2px dashed #ccc; padding: 2em; text-align: center; margin: 1em 0; }
        """
    ) as demo:

        gr.HTML("""
        <div style="text-align: center; padding: 2em;">
            <h1>📚 Hindi Book Assistant</h1>
            <p>AI-powered assistant for Hindi books with voice support</p>
        </div>
        """)

        # Authentication Section
        with gr.Group(visible=True) as auth_section:
            gr.Markdown("### 🔐 Enter Passcode")
            passcode_input = gr.Textbox(
                label="Passcode",
                type="password",
                placeholder="Enter access code..."
            )
            auth_button = gr.Button("🔓 Access", variant="primary")
            auth_status = gr.Textbox(label="Status", interactive=False)

        # Main Interface
        with gr.Group(visible=False) as main_section:
            
            # Step 1: Upload Document
            with gr.Group(visible=True) as upload_section:
                gr.Markdown("### 📄 Upload Your Book")
                pdf_upload = gr.File(
                    label="Choose PDF file",
                    file_types=[".pdf"],
                    type="filepath"
                )
                process_btn = gr.Button("📖 Process Book", variant="primary", size="lg")
                doc_status = gr.Textbox(label="Status", interactive=False)

            # Step 2: Book Info (shown after processing)
            with gr.Group(visible=False) as book_info_section:
                gr.Markdown("### 📚 Book Information")
                with gr.Row():
                    book_title_display = gr.Textbox(label="Book Title", interactive=False)
                    author_display = gr.Textbox(label="Author", interactive=False)
                continue_btn = gr.Button("➡️ Continue to Questions", variant="primary", size="lg")

            # Step 3: Ask Questions (shown after continue)
            with gr.Group(visible=False) as query_section:
                gr.Markdown("### 💬 Ask Questions About Your Book")
                
                with gr.Tab("🎯 Quick Questions"):
                    predefined_dropdown = gr.Dropdown(
                        label="Choose a question",
                        choices=[],
                        value=None,
                        interactive=True
                    )
                    ask_predefined_btn = gr.Button("🔍 Ask This Question", variant="primary")
                
                with gr.Tab("🎤 Voice Question"):
                    audio_input = gr.Audio(
                        label="Record your question (Hindi/English)",
                        sources=["microphone"],
                        type="filepath"
                    )
                    ask_voice_btn = gr.Button("🔍 Ask Voice Question", variant="primary")
                
                with gr.Tab("⌨️ Type Question"):
                    text_input = gr.Textbox(
                        label="Type your question",
                        placeholder="Example: इस पुस्तक का मुख्य विषय क्या है?",
                        lines=2
                    )
                    ask_text_btn = gr.Button("🔍 Ask Text Question", variant="primary")

                # Response Section
                gr.Markdown("### 📝 Answer")
                response_text = gr.Textbox(
                    label="Response",
                    lines=6,
                    interactive=False
                )
                
                response_audio = gr.Audio(
                    label="🔊 Audio Response",
                    interactive=False
                )

                # Reset Button
                gr.Markdown("---")
                reset_btn = gr.Button("🔄 Start New Session", variant="secondary")

        # Event Handlers
        auth_button.click(
            authenticate,
            inputs=[passcode_input],
            outputs=[auth_section, main_section, auth_status]
        )

        process_btn.click(
            process_document,
            inputs=[pdf_upload],
            outputs=[doc_status, book_title_display, author_display, upload_section, book_info_section, predefined_dropdown]
        )

        continue_btn.click(
            show_questions,
            outputs=[book_info_section, query_section]
        )

        ask_predefined_btn.click(
            process_query,
            inputs=[gr.State(None), gr.State(""), predefined_dropdown],
            outputs=[response_text, response_audio]
        )

        ask_voice_btn.click(
            process_query,
            inputs=[audio_input, gr.State(""), gr.State("")],
            outputs=[response_text, response_audio]
        )

        ask_text_btn.click(
            process_query,
            inputs=[gr.State(None), text_input, gr.State("")],
            outputs=[response_text, response_audio]
        )

        reset_btn.click(
            reset_session,
            outputs=[doc_status, book_title_display, author_display, upload_section, book_info_section, query_section, predefined_dropdown]
        )

        demo.load(load_models)

    return demo

def main():
    print("🚀 Starting Hindi Book Assistant...")
    print("📋 Loading AI models...")
    
    load_models()
    demo = create_interface()
    
    print("✅ Ready!")
    print(f"🔑 Passcode: {CONFIG['PASSCODE']}")
    
    demo.launch(
        share=True,
        show_error=True,
    )

if __name__ == "__main__":
    main()