Hindi-Rag

Sleeping

App Files Files Community

Update app.py

by wellwisherofindia - opened Jul 12, 2025

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+194

-539

Files changed (1) hide show

app.py +194 -539

app.py CHANGED Viewed

@@ -1,17 +1,10 @@
-#!/usr/bin/env python3
-"""
-Hindi RAG Voice Demo - Gradio Implementation (Groq Whisper API Version)
-A streamlined voice-enabled RAG system for Hindi content using Gradio
-Uses Groq Whisper API for transcription and assumes PDFs have selectable text
-"""
 import gradio as gr
 import os
 import tempfile
 import time
 import uuid
 from datetime import datetime
-import fitz  # PyMuPDF
 import requests
 import json
 import numpy as np
@@ -23,19 +16,17 @@ import subprocess
 import warnings
 warnings.filterwarnings("ignore")
-# Global configuration
 CONFIG = {
     'PASSCODE': os.getenv('PASSCODE'),
-    'MAX_FILE_SIZE': 10 * 1024 * 1024,  # 10MB
-    'MAX_QUERIES_PER_SESSION': 5,
-    'MAX_AUDIO_DURATION': 120,  # 2 minutes
     'GROQ_API_KEY': os.getenv('GAPI'),
-    'AUDIO_CLIP_DURATION': 10,  # First 10 seconds only
     'BOOK_THUMBNAILS_DIR': './book_thumbnails',
     'OCR_BOOKS_DIR': './ocr_books',
 }
-# Global session storage
 SESSION_DATA = {
     'authenticated': False,
     'session_id': str(uuid.uuid4()),
@@ -48,103 +39,103 @@ SESSION_DATA = {
     'groq_client': None
 }
-# Initialize models and clients (cached)
 def load_models():
-    """Load and cache models and clients"""
     if SESSION_DATA['embedding_model'] is None:
         print("Loading embedding model...")
         SESSION_DATA['embedding_model'] = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
     if SESSION_DATA['groq_client'] is None:
         if CONFIG['GROQ_API_KEY']:
             print("Initializing Groq client...")
             SESSION_DATA['groq_client'] = Groq(api_key=CONFIG['GROQ_API_KEY'])
         else:
             print("Warning: GROQ_API_KEY not found")
     return SESSION_DATA['embedding_model'], SESSION_DATA['groq_client']
-# Audio processing functions
 def trim_audio_to_duration(input_path, output_path, duration=10):
-    """Trim audio to specified duration using ffmpeg"""
     try:
-        # Use ffmpeg to trim audio to first N seconds
         cmd = [
             'ffmpeg', '-i', input_path,
             '-t', str(duration),
             '-acodec', 'copy',
-            '-y',  # Overwrite output file
             output_path
         ]
         result = subprocess.run(cmd, capture_output=True, text=True)
         if result.returncode == 0:
             return True
         else:
             print(f"FFmpeg error: {result.stderr}")
             return False
     except Exception as e:
         print(f"Error trimming audio: {str(e)}")
         return False
 def transcribe_audio(audio_file):
-    """Transcribe audio using Groq Whisper API (first 10 seconds only)"""
     if audio_file is None:
         return ""
     if not CONFIG['GROQ_API_KEY'] or SESSION_DATA['groq_client'] is None:
         return "Error: Groq API key not configured"
     try:
-        # Create temporary file for trimmed audio
         with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
             trimmed_audio_path = tmp_file.name
-        # Trim audio to first 10 seconds
         if not trim_audio_to_duration(audio_file, trimmed_audio_path, CONFIG['AUDIO_CLIP_DURATION']):
-            # If trimming fails, use original file but warn user
             print("Warning: Could not trim audio, using full duration")
             trimmed_audio_path = audio_file
-        # Transcribe using Groq Whisper API
         with open(trimmed_audio_path, "rb") as file:
             transcription = SESSION_DATA['groq_client'].audio.transcriptions.create(
                 file=(os.path.basename(trimmed_audio_path), file.read()),
                 model="whisper-large-v3",
                 response_format="verbose_json",
-                language="hi"  # Specify Hindi language
             )
-        # Clean up temporary file if we created one
         if trimmed_audio_path != audio_file:
             try:
                 os.unlink(trimmed_audio_path)
             except:
                 pass
         return transcription.text
     except Exception as e:
-        # Clean up on error
         try:
             if 'trimmed_audio_path' in locals() and trimmed_audio_path != audio_file:
                 os.unlink(trimmed_audio_path)
         except:
             pass
         return f"Transcription error: {str(e)}"
 def text_to_speech(text):
-    """Convert text to speech in Hindi"""
     if not text or len(text.strip()) == 0:
         return None
     try:
         tts = gTTS(text=text, lang='hi', slow=False)
-        # Save to temporary file
         with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
             tts.save(tmp_file.name)
             return tmp_file.name
@@ -152,96 +143,70 @@ def text_to_speech(text):
         print(f"TTS Error: {str(e)}")
         return None
-# Text extraction functions
 def extract_text_from_pdf(pdf_path):
-    """Extract text from PDF using PyMuPDF (assumes selectable text)"""
     text_content = ""
     try:
         pdf_document = fitz.open(pdf_path)
         total_pages = len(pdf_document)
         print(f"Processing PDF with {total_pages} pages...")
-        # Process all pages (removed page limit for production use)
         for page_num in range(total_pages):
             page = pdf_document.load_page(page_num)
             page_text = page.get_text()
-            # Add page text if it exists
             if page_text.strip():
                 text_content += page_text + "\n"
-            else:
-                print(f"Warning: Page {page_num + 1} appears to have no selectable text")
         pdf_document.close()
         if not text_content.strip():
             return "Error: No selectable text found in PDF. Please ensure the PDF contains selectable text, not just images."
         return text_content
     except Exception as e:
         return f"Error extracting text: {str(e)}"
 def extract_metadata(text):
-    """Extract author name and book title from text"""
     lines = [line.strip() for line in text.split('\n')[:25] if line.strip()]
     author_name = "अज्ञात लेखक"
     book_title = "अनाम पुस्तक"
-    # Simple heuristics for metadata extraction
     for i, line in enumerate(lines):
-        # Look for author patterns
         if any(word in line.lower() for word in ['लेखक', 'author', 'by', 'द्वारा', 'रचयिता']):
             author_name = line
-        # First substantial line might be title
         elif 10 < len(line) < 100 and not any(char.isdigit() for char in line[:20]):
             if book_title == "अनाम पुस्तक":
                 book_title = line
     return author_name, book_title
 def chunk_text(text, chunk_size=400, overlap=50):
-    """Split text into overlapping chunks"""
     words = text.split()
     chunks = []
     for i in range(0, len(words), chunk_size - overlap):
         chunk = ' '.join(words[i:i + chunk_size])
         if chunk.strip():
             chunks.append(chunk)
     return chunks
-# Vector search functions
 def create_embeddings(chunks):
-    """Create embeddings and FAISS index"""
     embedding_model, _ = load_models()
     embeddings = embedding_model.encode(chunks, show_progress_bar=False)
-    # Create FAISS index
     dimension = embeddings.shape[1]
     index = faiss.IndexFlatIP(dimension)
-    # Normalize embeddings for cosine similarity
     faiss.normalize_L2(embeddings)
     index.add(embeddings.astype('float32'))
     return index
 def search_similar_chunks(query, top_k=3):
-    """Search for similar chunks"""
     if SESSION_DATA['faiss_index'] is None or not SESSION_DATA['document_chunks']:
         return []
     embedding_model, _ = load_models()
     query_embedding = embedding_model.encode([query], show_progress_bar=False)
     faiss.normalize_L2(query_embedding)
     scores, indices = SESSION_DATA['faiss_index'].search(query_embedding.astype('float32'), top_k)
     results = []
     for i, idx in enumerate(indices[0]):
         if idx >= 0 and idx < len(SESSION_DATA['document_chunks']):
@@ -249,28 +214,25 @@ def search_similar_chunks(query, top_k=3):
                 'text': SESSION_DATA['document_chunks'][idx],
                 'score': float(scores[0][i])
             })
     return results
-# LLM functions
 def call_groq_api(prompt, model="llama-3.1-8b-instant"):
-    """Call Groq API for LLM inference"""
     if not CONFIG['GROQ_API_KEY'] or CONFIG['GROQ_API_KEY'] == 'your_groq_api_key_here':
         return "⚠️ Groq API key not configured. Please set GROQ_API_KEY environment variable."
     url = "https://api.groq.com/openai/v1/chat/completions"
     headers = {
         "Authorization": f"Bearer {CONFIG['GROQ_API_KEY']}",
         "Content-Type": "application/json"
     }
     data = {
         "model": model,
         "messages": [{"role": "user", "content": prompt}],
         "temperature": 0.7,
-        "max_tokens": 800
     }
     try:
         response = requests.post(url, headers=headers, json=data, timeout=30)
         response.raise_for_status()
@@ -279,12 +241,11 @@ def call_groq_api(prompt, model="llama-3.1-8b-instant"):
         return f"Error calling LLM: {str(e)}"
 def generate_rag_response(query, context_chunks):
-    """Generate response using RAG"""
     if not context_chunks:
         return "मुझे इस प्रश्न का उत्तर देने के लिए पर्याप्त जानकारी नहीं मिली।"
     context = "\n\n".join([chunk['text'] for chunk in context_chunks])
     prompt = f"""आप एक हिंदी पुस्तक सहायक हैं। निम्नलिखित जानकारी के आधार पर प्रश्न का उत्तर दें:
 पुस्तक: {SESSION_DATA['book_title']}
@@ -300,127 +261,91 @@ def generate_rag_response(query, context_chunks):
 - उत्तर की शुरुआत में पुस्तक और लेखक का संदर्भ शामिल करें
 - केवल दिए गए संदर्भ के आधार पर ही उत्तर दें
 """
     response = call_groq_api(prompt)
     return response
-# Authentication function
 def authenticate(passcode):
-    """Check passcode authentication"""
     if passcode == CONFIG['PASSCODE']:
         SESSION_DATA['authenticated'] = True
-        return gr.update(visible=False), gr.update(visible=True), "✅ Access granted! / पहुंच मिली!"
     else:
-        return gr.update(visible=True), gr.update(visible=False), "❌ Invalid passcode / गलत पासकोड"
-# Document processing function
 def process_document(pdf_file):
-    """Process uploaded PDF document"""
     if pdf_file is None:
-        return "कृपया एक PDF फ़ाइल अपलोड करें।", "", "", gr.update(visible=False)
     try:
-        # Check file size
         file_size = os.path.getsize(pdf_file.name)
         if file_size > CONFIG['MAX_FILE_SIZE']:
-            return f"फ़ाइल बहुत बड़ी है! अधिकतम आकार: {CONFIG['MAX_FILE_SIZE'] // (1024*1024)}MB", "", "", gr.update(visible=False)
-        # Extract text (no OCR - assumes selectable text)
         text_content = extract_text_from_pdf(pdf_file.name)
         if not text_content.strip() or "Error" in text_content:
-            return text_content, "", "", gr.update(visible=False)
-        # Extract metadata
         author_name, book_title = extract_metadata(text_content)
         SESSION_DATA['author_name'] = author_name
         SESSION_DATA['book_title'] = book_title
-        # Create chunks
         chunks = chunk_text(text_content)
         SESSION_DATA['document_chunks'] = chunks
-        # Create embeddings and index
-        print("Creating embeddings and search index...")
         SESSION_DATA['faiss_index'] = create_embeddings(chunks)
-        # Reset query count
         SESSION_DATA['query_count'] = 0
-        # Calculate statistics
-        word_count = len(text_content.split())
-        char_count = len(text_content)
-        success_msg = f"""✅ दस्तावेज़ सफलतापूर्वक प्रसंस्करित!
-📖 पुस्तक: {book_title}
-✍️ लेखक: {author_name}
-📄 टेक्स्ट खंड: {len(chunks)}
-📊 शब्द संख्या: {word_count:,}
-📝 अक्षर संख्या: {char_count:,}
-अब आप प्रश्न पूछ सकते हैं।"""
-        return success_msg, book_title, author_name, gr.update(visible=True)
     except Exception as e:
-        return f"दस्तावेज़ प्रसंस्करण में त्रुटि: {str(e)}", "", "", gr.update(visible=False)
-# Query processing function
-def process_query(audio_input, text_input):
-    """Process user query (audio or text)"""
     if SESSION_DATA['query_count'] >= CONFIG['MAX_QUERIES_PER_SESSION']:
-        return "⚠️ प्रश्न सीमा समाप्त (5 प्रश्न प्रति सत्र)", None, f"प्रश्न: {SESSION_DATA['query_count']}/{CONFIG['MAX_QUERIES_PER_SESSION']}"
     if not SESSION_DATA['document_chunks']:
-        return "कृपया पहले एक PDF दस्तावेज़ अपलोड करें।", None, f"प्रश्न: {SESSION_DATA['query_count']}/{CONFIG['MAX_QUERIES_PER_SESSION']}"
-    # Get query text
     query_text = ""
-    if audio_input:
         query_text = transcribe_audio(audio_input)
         if "error" in query_text.lower():
             query_text = ""
     if not query_text.strip() and text_input.strip():
         query_text = text_input.strip()
     if not query_text.strip():
-        return "कृपया आवाज़ या टेक्स्ट के माध्यम से प्रश्न दें।", None, f"प्रश्न: {SESSION_DATA['query_count']}/{CONFIG['MAX_QUERIES_PER_SESSION']}"
     try:
-        # Search similar chunks
         similar_chunks = search_similar_chunks(query_text)
-        # Generate response
         response_text = generate_rag_response(query_text, similar_chunks)
-        # Generate TTS
         audio_response = text_to_speech(response_text)
-        # Update query count
         SESSION_DATA['query_count'] += 1
-        # Format response with context
-        formatted_response = f"""**प्रश्न:** {query_text}
-**उत्तर:**
-{response_text}
-**संदर्भ स्रोत:**
-"""
-        for i, chunk in enumerate(similar_chunks):
-            formatted_response += f"\n{i+1}. {chunk['text'][:150]}... (स्कोर: {chunk['score']:.3f})"
-        return formatted_response, audio_response, f"प्रश्न: {SESSION_DATA['query_count']}/{CONFIG['MAX_QUERIES_PER_SESSION']}"
     except Exception as e:
-        return f"प्रश्न प्रसंस्करण में त्रुटि: {str(e)}", None, f"प्रश्न: {SESSION_DATA['query_count']}/{CONFIG['MAX_QUERIES_PER_SESSION']}"
 def reset_session():
-    """Reset the session"""
     SESSION_DATA.update({
         'query_count': 0,
         'document_chunks': [],
@@ -429,429 +354,159 @@ def reset_session():
         'book_title': '',
         'session_id': str(uuid.uuid4())
     })
-    return "✅ नया सत्र शुरू किया गया!", "", "", gr.update(visible=False), "प्रश्न: 0/5"
-# Book management functions
-def get_available_books():
-    """Get list of available books with their thumbnails and text files"""
-    books = []
-    try:
-        # Get all image files from thumbnails directory
-        thumbnail_dir = CONFIG['BOOK_THUMBNAILS_DIR']
-        ocr_dir = CONFIG['OCR_BOOKS_DIR']
-        if os.path.exists(thumbnail_dir):
-            thumbnail_files = [f for f in os.listdir(thumbnail_dir)
-                             if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp'))]
-        else:
-            thumbnail_files = []
-        # Get all text files from OCR directory
-        if os.path.exists(ocr_dir):
-            text_files = [f for f in os.listdir(ocr_dir)
-                         if f.lower().endswith('.txt')]
-        else:
-            text_files = []
-        # Create book entries
-        for text_file in text_files:
-            book_name = os.path.splitext(text_file)[0]
-            # Look for matching thumbnail
-            thumbnail_path = None
-            for thumb_file in thumbnail_files:
-                thumb_name = os.path.splitext(thumb_file)[0]
-                if thumb_name.lower() == book_name.lower():
-                    thumbnail_path = os.path.join(thumbnail_dir, thumb_file)
-                    break
-            # If no matching thumbnail found, use a default placeholder
-            if not thumbnail_path:
-                # Create a simple text-based placeholder
-                placeholder_path = create_text_placeholder(book_name)
-                thumbnail_path = placeholder_path
-            books.append({
-                'name': book_name,
-                'display_name': book_name.replace('_', ' ').title(),
-                'text_file': os.path.join(ocr_dir, text_file),
-                'thumbnail': thumbnail_path
-            })
-        return books
-    except Exception as e:
-        print(f"Error getting available books: {str(e)}")
-        return []
-def create_text_placeholder(book_name):
-    """Create a simple text placeholder image for books without thumbnails"""
-    try:
-        import matplotlib.pyplot as plt
-        import matplotlib.patches as patches
-        # Create a simple text-based image
-        fig, ax = plt.subplots(1, 1, figsize=(3, 4))
-        ax.set_xlim(0, 1)
-        ax.set_ylim(0, 1)
-        ax.axis('off')
-        # Add background
-        rect = patches.Rectangle((0, 0), 1, 1, linewidth=2, edgecolor='#2E86AB', facecolor='#E8F4FD')
-        ax.add_patch(rect)
-        # Add text
-        ax.text(0.5, 0.5, book_name.replace('_', '\n'),
-                ha='center', va='center', fontsize=10, weight='bold', color='#2E86AB')
-        # Save to temporary file
-        placeholder_path = os.path.join(tempfile.gettempdir(), f"{book_name}_placeholder.png")
-        plt.savefig(placeholder_path, dpi=100, bbox_inches='tight')
-        plt.close()
-        return placeholder_path
-    except Exception as e:
-        print(f"Error creating placeholder: {str(e)}")
-        return None
-def load_book_text(book_info):
-    """Load text content from a pre-existing book"""
-    try:
-        with open(book_info['text_file'], 'r', encoding='utf-8') as file:
-            content = file.read()
-        if not content.strip():
-            return "Error: Empty text file"
-        return content
-    except Exception as e:
-        return f"Error loading book text: {str(e)}"
-def process_selected_book(selected_book_name):
-    """Process a pre-selected book"""
-    if not selected_book_name or selected_book_name == "None":
-        return "कृपया एक पुस्तक चुनें।", "", "", gr.update(visible=False)
-    try:
-        # Get available books
-        available_books = get_available_books()
-        # Find the selected book
-        selected_book = None
-        for book in available_books:
-            if book['name'] == selected_book_name:
-                selected_book = book
-                break
-        if not selected_book:
-            return "चुनी गई पुस्तक नहीं मिली।", "", "", gr.update(visible=False)
-        # Load text content
-        text_content = load_book_text(selected_book)
-        if not text_content.strip() or "Error" in text_content:
-            return text_content, "", "", gr.update(visible=False)
-        # Extract metadata (use book name if no metadata found in text)
-        author_name, book_title = extract_metadata(text_content)
-        # If metadata extraction didn't work well, use the book name
-        if author_name == "अज्ञात लेखक":
-            author_name = "संग्रहित पुस्तक"
-        if book_title == "अनाम पुस्तक":
-            book_title = selected_book['display_name']
-        SESSION_DATA['author_name'] = author_name
-        SESSION_DATA['book_title'] = book_title
-        # Create chunks
-        chunks = chunk_text(text_content)
-        SESSION_DATA['document_chunks'] = chunks
-        # Create embeddings and index
-        print("Creating embeddings and search index for selected book...")
-        SESSION_DATA['faiss_index'] = create_embeddings(chunks)
-        # Reset query count
-        SESSION_DATA['query_count'] = 0
-        # Calculate statistics
-        word_count = len(text_content.split())
-        char_count = len(text_content)
-        success_msg = f"""✅ पुस्तक सफलतापूर्वक लोड की गई!
-📖 पुस्तक: {book_title}
-✍️ लेखक: {author_name}
-📄 टेक्स्ट खंड: {len(chunks)}
-📊 शब्द संख्या: {word_count:,}
-📝 अक्षर संख्या: {char_count:,}
-अब आप प्रश्न पूछ सकते ���ैं।"""
-        return success_msg, book_title, author_name, gr.update(visible=True)
-    except Exception as e:
-        return f"पुस्तक लोड करने में त्रुटि: {str(e)}", "", "", gr.update(visible=False)
-def create_book_gallery():
-    """Create a gallery of available books with thumbnails"""
-    available_books = get_available_books()
-    if not available_books:
-        return [], "कोई पुस्तक उपलब्ध नहीं है।"
-    # Create gallery data: list of (image_path, title) tuples
-    gallery_data = []
-    book_names = ["None"]  # Add None option
-    for book in available_books:
-        if book['thumbnail'] and os.path.exists(book['thumbnail']):
-            gallery_data.append((book['thumbnail'], book['display_name']))
-            book_names.append(book['name'])
-    return gallery_data, book_names
-def handle_gallery_selection(evt: gr.SelectData):
-    """Handle book selection from gallery click"""
-    if evt.index is None:
-        return "None"
-    # Get available books to map gallery index to book name
-    available_books = get_available_books()
-    # Filter books that have valid thumbnails (same as in create_book_gallery)
-    valid_books = []
-    for book in available_books:
-        if book['thumbnail'] and os.path.exists(book['thumbnail']):
-            valid_books.append(book)
-    # Check if the selected index is valid
-    if 0 <= evt.index < len(valid_books):
-        selected_book = valid_books[evt.index]
-        return selected_book['name']
-    return "None"
-# Create Gradio interface
 def create_interface():
-    """Create the Gradio interface"""
     with gr.Blocks(
-        title="Hindi RAG Voice Demo - Groq Whisper",
         theme=gr.themes.Soft(),
         css="""
-        .main-header { text-align: center; color: #2E86AB; margin-bottom: 2rem; }
-        .section-header { color: #A23B72; font-weight: bold; margin: 1rem 0; }
-        .info-box { background: #F18F01; color: white; padding: 1rem; border-radius: 8px; margin: 1rem 0; }
         """
     ) as demo:
         gr.HTML("""
-        <div class="main-header">
-            <h1>📚 Hindi RAG Voice Demo - Groq Whisper</h1>
-            <h3>हिंदी पुस्तक आवाज़ सहायक</h3>
-            <p>AI-powered interactive book assistant with Groq Whisper API</p>
-            <p><em>Audio transcription limited to first 10 seconds</em></p>
         </div>
         """)
-        # Authentication section
         with gr.Group(visible=True) as auth_section:
-            gr.Markdown("### 🔐 Access Control / पहुंच नियंत्रण")
-            gr.Markdown("Please enter the passcode to access the demo / कृपया डेमो एक्सेस करने के लिए पासकोड दर्ज करें")
             passcode_input = gr.Textbox(
-                label="Passcode / पासकोड",
                 type="password",
-                placeholder="Enter passcode here..."
             )
-            auth_button = gr.Button("🔓 Access Demo / डेमो एक्सेस करें", variant="primary")
             auth_status = gr.Textbox(label="Status", interactive=False)
-        # Main application section
         with gr.Group(visible=False) as main_section:
-            # Session info
-            with gr.Row():
-                with gr.Column(scale=3):
-                    gr.Markdown("### 📊 Session Information")
-                with gr.Column(scale=1):
-                    query_counter = gr.Textbox(
-                        label="Query Usage",
-                        value="प्रश्न: 0/5",
-                        interactive=False
-                    )
-            # Document selection/upload section
-            gr.Markdown("### 📁 Step 1: Choose Your Book / अपनी पुस्तक चुनें")
-            # Book selection section
-            with gr.Tab("📚 Select from Library / पुस्तकालय से चुनें"):
-                gr.Markdown("**Choose from available books / उपलब्ध पुस्तकों में से चुनें**")
-                # Initialize book gallery and dropdown
-                available_books = get_available_books()
-                gallery_data, book_options = create_book_gallery()
-                if available_books:
-                    book_gallery = gr.Gallery(
-                        value=gallery_data,
-                        label="Available Books / उपलब्ध पुस्तकें",
-                        show_label=True,
-                        elem_id="book_gallery",
-                        columns=3,
-                        rows=2,
-                        height="auto",
-                        allow_preview=True
-                    )
-                    book_dropdown = gr.Dropdown(
-                        choices=book_options,
-                        label="Select Book / पुस्तक चुनें",
-                        value="None",
-                        interactive=True
-                    )
-                    select_book_btn = gr.Button("📖 Load Selected Book / चुनी गई पुस्तक लोड करें", variant="primary")
-                else:
-                    gr.Markdown("⚠️ No books available in library / पुस्तकालय में कोई पुस्तक उपलब्ध नहीं है")
-                    book_dropdown = gr.Dropdown(choices=["None"], value="None", visible=False)
-                    select_book_btn = gr.Button("No books available", interactive=False)
-            # PDF upload section
-            with gr.Tab("📄 Upload PDF / PDF अपलोड करें"):
-                gr.Markdown("**Upload your own PDF / अपनी PDF अपलोड करें**")
-                gr.Markdown("**Note:** Please ensure your PDF contains selectable text (not scanned images)")
                 pdf_upload = gr.File(
-                    label="Upload PDF / PDF अपलोड करें",
                     file_types=[".pdf"],
                     type="filepath"
                 )
-                process_pdf_btn = gr.Button("📖 Process PDF / PDF प्रसंस्करित करें", variant="primary")
-            doc_status = gr.Textbox(label="Processing Status / प्रसंस्करण स्थिति", interactive=False)
-            with gr.Row():
-                book_title_display = gr.Textbox(label="Book Title / पुस्तक शीर्षक", interactive=False)
-                author_display = gr.Textbox(label="Author / लेखक", interactive=False)
-            # Query section
-            with gr.Group(visible=False) as query_section:
-                gr.Markdown("### 🎤 Step 2: Ask Questions / प्रश्न पूछें")
-                gr.Markdown("**Note:** Audio recordings are limited to first 10 seconds for transcription")
                 with gr.Row():
-                    with gr.Column():
-                        audio_input = gr.Audio(
-                            label="🎙️ Record Voice Question / आवाज़ प्रश्न रिकॉर्ड करें",
-                            sources=["microphone"],
-                            type="filepath"
-                        )
-                    with gr.Column():
-                        text_input = gr.Textbox(
-                            label="💬 Or Type Question / या प्रश्न टाइप करें",
-                            placeholder="उदाहरण: इस पुस्तक में मुख्य विषय क्या है?",
-                            lines=3
-                        )
-                ask_button = gr.Button("🔍 Get Answer / उत्तर पाएं", variant="primary", size="lg")
-                # Response section
-                with gr.Column():
-                    response_text = gr.Textbox(
-                        label="📝 Response / उत्तर",
-                        lines=8,
-                        interactive=False
                     )
-                    response_audio = gr.Audio(
-                        label="🔊 Audio Response / आवाज़ उत्तर",
-                        interactive=False
                     )
-            # Reset section
-            gr.Markdown("---")
-            with gr.Row():
-                reset_btn = gr.Button("🔄 Start New Session / नया सत्र शुरू करें", variant="secondary")
-                with gr.Column():
-                    gr.Markdown("""
-                    **Requirements & Limits / आवश्यकताएं और सीमा:**
-                    - PDF with selectable text (no scanned images)
-                    - Max file size: 10MB
-                    - Max queries: 5 per session
-                    - Audio transcription: First 10 seconds only
-                    - Supported: Hindi & English text
-                    - Requires: Groq API key and ffmpeg
-                    """)
-        # Event handlers
         auth_button.click(
             authenticate,
             inputs=[passcode_input],
             outputs=[auth_section, main_section, auth_status]
         )
-        # Book selection event handler
-        if 'select_book_btn' in locals():
-            select_book_btn.click(
-                process_selected_book,
-                inputs=[book_dropdown],
-                outputs=[doc_status, book_title_display, author_display, query_section]
-            )
-        # Gallery selection event handler
-        if 'book_gallery' in locals():
-            book_gallery.select(
-                handle_gallery_selection,
-                outputs=[book_dropdown]
-            )
-        # PDF upload event handler
-        if 'process_pdf_btn' in locals():
-            process_pdf_btn.click(
-                process_document,
-                inputs=[pdf_upload],
-                outputs=[doc_status, book_title_display, author_display, query_section]
-            )
-        ask_button.click(
             process_query,
-            inputs=[audio_input, text_input],
-            outputs=[response_text, response_audio, query_counter]
         )
         reset_btn.click(
             reset_session,
-            outputs=[doc_status, book_title_display, author_display, query_section, query_counter]
         )
-        # Load models on startup
         demo.load(load_models)
     return demo
-# Main function
 def main():
-    """Main function to launch the application"""
-    print("🚀 Starting Hindi RAG Voice Demo (Groq Whisper API Version)...")
-    print("📋 Loading AI models (this may take a moment)...")
-    # Pre-load models
     load_models()
-    # Create and launch interface
     demo = create_interface()
-    print("✅ Models loaded successfully!")
-    print(f"🔑 Demo passcode: {CONFIG['PASSCODE']}")
-    print("🌐 Launching web interface...")
     demo.launch(
         share=True,

 import gradio as gr
 import os
 import tempfile
 import time
 import uuid
 from datetime import datetime
+import fitz
 import requests
 import json
 import numpy as np
 import warnings
 warnings.filterwarnings("ignore")
 CONFIG = {
     'PASSCODE': os.getenv('PASSCODE'),
+    'MAX_FILE_SIZE': 10 * 1024 * 1024,
+    'MAX_QUERIES_PER_SESSION': 10,
+    'MAX_AUDIO_DURATION': 120,
     'GROQ_API_KEY': os.getenv('GAPI'),
+    'AUDIO_CLIP_DURATION': 10,
     'BOOK_THUMBNAILS_DIR': './book_thumbnails',
     'OCR_BOOKS_DIR': './ocr_books',
 }
 SESSION_DATA = {
     'authenticated': False,
     'session_id': str(uuid.uuid4()),
     'groq_client': None
 }
+# Predefined questions for books
+PREDEFINED_QUESTIONS = {
+    'general': [
+        "इस पुस्तक का मुख्य विषय क्या है?",
+        "लेखक ने इस पुस्तक में क्या संदेश दिया है?",
+        "इस पुस्तक में कौन से मुख्य पात्र हैं?"
+    ],
+    'analysis': [
+        "इस पुस्तक की मुख्य शिक्षा क्या है?",
+        "लेखक की लेखन शैली कैसी है?",
+        "इस पुस्तक में कौन सा मुख्य संघर्ष है?"
+    ],
+    'content': [
+        "इस कहानी का क्या अंत है?",
+        "पुस्तक में कौन सी मुख्य घटनाएं हैं?",
+        "मुख्य पात्र का चरित्र कैसा है?"
+    ]
+}
 def load_models():
     if SESSION_DATA['embedding_model'] is None:
         print("Loading embedding model...")
         SESSION_DATA['embedding_model'] = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
     if SESSION_DATA['groq_client'] is None:
         if CONFIG['GROQ_API_KEY']:
             print("Initializing Groq client...")
             SESSION_DATA['groq_client'] = Groq(api_key=CONFIG['GROQ_API_KEY'])
         else:
             print("Warning: GROQ_API_KEY not found")
     return SESSION_DATA['embedding_model'], SESSION_DATA['groq_client']
 def trim_audio_to_duration(input_path, output_path, duration=10):
     try:
         cmd = [
             'ffmpeg', '-i', input_path,
             '-t', str(duration),
             '-acodec', 'copy',
+            '-y',
             output_path
         ]
         result = subprocess.run(cmd, capture_output=True, text=True)
         if result.returncode == 0:
             return True
         else:
             print(f"FFmpeg error: {result.stderr}")
             return False
     except Exception as e:
         print(f"Error trimming audio: {str(e)}")
         return False
 def transcribe_audio(audio_file):
     if audio_file is None:
         return ""
     if not CONFIG['GROQ_API_KEY'] or SESSION_DATA['groq_client'] is None:
         return "Error: Groq API key not configured"
     try:
         with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
             trimmed_audio_path = tmp_file.name
         if not trim_audio_to_duration(audio_file, trimmed_audio_path, CONFIG['AUDIO_CLIP_DURATION']):
             print("Warning: Could not trim audio, using full duration")
             trimmed_audio_path = audio_file
         with open(trimmed_audio_path, "rb") as file:
             transcription = SESSION_DATA['groq_client'].audio.transcriptions.create(
                 file=(os.path.basename(trimmed_audio_path), file.read()),
                 model="whisper-large-v3",
                 response_format="verbose_json",
+                language="hi"
             )
         if trimmed_audio_path != audio_file:
             try:
                 os.unlink(trimmed_audio_path)
             except:
                 pass
         return transcription.text
     except Exception as e:
         try:
             if 'trimmed_audio_path' in locals() and trimmed_audio_path != audio_file:
                 os.unlink(trimmed_audio_path)
         except:
             pass
         return f"Transcription error: {str(e)}"
 def text_to_speech(text):
     if not text or len(text.strip()) == 0:
         return None
     try:
         tts = gTTS(text=text, lang='hi', slow=False)
         with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
             tts.save(tmp_file.name)
             return tmp_file.name
         print(f"TTS Error: {str(e)}")
         return None
 def extract_text_from_pdf(pdf_path):
     text_content = ""
     try:
         pdf_document = fitz.open(pdf_path)
         total_pages = len(pdf_document)
         print(f"Processing PDF with {total_pages} pages...")
         for page_num in range(total_pages):
             page = pdf_document.load_page(page_num)
             page_text = page.get_text()
             if page_text.strip():
                 text_content += page_text + "\n"
         pdf_document.close()
         if not text_content.strip():
             return "Error: No selectable text found in PDF. Please ensure the PDF contains selectable text, not just images."
         return text_content
     except Exception as e:
         return f"Error extracting text: {str(e)}"
 def extract_metadata(text):
     lines = [line.strip() for line in text.split('\n')[:25] if line.strip()]
     author_name = "अज्ञात लेखक"
     book_title = "अनाम पुस्तक"
     for i, line in enumerate(lines):
         if any(word in line.lower() for word in ['लेखक', 'author', 'by', 'द्वारा', 'रचयिता']):
             author_name = line
         elif 10 < len(line) < 100 and not any(char.isdigit() for char in line[:20]):
             if book_title == "अनाम पुस्तक":
                 book_title = line
     return author_name, book_title
 def chunk_text(text, chunk_size=400, overlap=50):
     words = text.split()
     chunks = []
     for i in range(0, len(words), chunk_size - overlap):
         chunk = ' '.join(words[i:i + chunk_size])
         if chunk.strip():
             chunks.append(chunk)
     return chunks
 def create_embeddings(chunks):
     embedding_model, _ = load_models()
     embeddings = embedding_model.encode(chunks, show_progress_bar=False)
     dimension = embeddings.shape[1]
     index = faiss.IndexFlatIP(dimension)
     faiss.normalize_L2(embeddings)
     index.add(embeddings.astype('float32'))
     return index
 def search_similar_chunks(query, top_k=3):
     if SESSION_DATA['faiss_index'] is None or not SESSION_DATA['document_chunks']:
         return []
     embedding_model, _ = load_models()
     query_embedding = embedding_model.encode([query], show_progress_bar=False)
     faiss.normalize_L2(query_embedding)
     scores, indices = SESSION_DATA['faiss_index'].search(query_embedding.astype('float32'), top_k)
     results = []
     for i, idx in enumerate(indices[0]):
         if idx >= 0 and idx < len(SESSION_DATA['document_chunks']):
                 'text': SESSION_DATA['document_chunks'][idx],
                 'score': float(scores[0][i])
             })
     return results
 def call_groq_api(prompt, model="llama-3.1-8b-instant"):
     if not CONFIG['GROQ_API_KEY'] or CONFIG['GROQ_API_KEY'] == 'your_groq_api_key_here':
         return "⚠️ Groq API key not configured. Please set GROQ_API_KEY environment variable."
     url = "https://api.groq.com/openai/v1/chat/completions"
     headers = {
         "Authorization": f"Bearer {CONFIG['GROQ_API_KEY']}",
         "Content-Type": "application/json"
     }
     data = {
         "model": model,
         "messages": [{"role": "user", "content": prompt}],
         "temperature": 0.7,
+        "max_tokens": 600
     }
     try:
         response = requests.post(url, headers=headers, json=data, timeout=30)
         response.raise_for_status()
         return f"Error calling LLM: {str(e)}"
 def generate_rag_response(query, context_chunks):
     if not context_chunks:
         return "मुझे इस प्रश्न का उत्तर देने के लिए पर्याप्त जानकारी नहीं मिली।"
     context = "\n\n".join([chunk['text'] for chunk in context_chunks])
     prompt = f"""आप एक हिंदी पुस्तक सहायक हैं। निम्नलिखित जानकारी के आधार पर प्रश्न का उत्तर दें:
 पुस्तक: {SESSION_DATA['book_title']}
 - उत्तर की शुरुआत में पुस्तक और लेखक का संदर्भ शामिल करें
 - केवल दिए गए संदर्भ के आधार पर ही उत्तर दें
 """
     response = call_groq_api(prompt)
     return response
 def authenticate(passcode):
     if passcode == CONFIG['PASSCODE']:
         SESSION_DATA['authenticated'] = True
+        return gr.update(visible=False), gr.update(visible=True), "✅ Welcome!"
     else:
+        return gr.update(visible=True), gr.update(visible=False), "❌ Invalid passcode"
 def process_document(pdf_file):
     if pdf_file is None:
+        return "Please upload a PDF file", "", "", gr.update(visible=True), gr.update(visible=False), gr.update(choices=[])
     try:
         file_size = os.path.getsize(pdf_file.name)
         if file_size > CONFIG['MAX_FILE_SIZE']:
+            return f"File too large! Max size: {CONFIG['MAX_FILE_SIZE'] // (1024*1024)}MB", "", "", gr.update(visible=True), gr.update(visible=False), gr.update(choices=[])
         text_content = extract_text_from_pdf(pdf_file.name)
         if not text_content.strip() or "Error" in text_content:
+            return text_content, "", "", gr.update(visible=True), gr.update(visible=False), gr.update(choices=[])
         author_name, book_title = extract_metadata(text_content)
         SESSION_DATA['author_name'] = author_name
         SESSION_DATA['book_title'] = book_title
         chunks = chunk_text(text_content)
         SESSION_DATA['document_chunks'] = chunks
         SESSION_DATA['faiss_index'] = create_embeddings(chunks)
         SESSION_DATA['query_count'] = 0
+        # Generate predefined questions
+        questions = []
+        for category in PREDEFINED_QUESTIONS.values():
+            questions.extend(category)
+        success_msg = f"✅ Document processed successfully!"
+        return success_msg, book_title, author_name, gr.update(visible=False), gr.update(visible=True), gr.update(choices=questions[:6])
     except Exception as e:
+        return f"Error processing document: {str(e)}", "", "", gr.update(visible=True), gr.update(visible=False), gr.update(choices=[])
+def show_questions():
+    """Show the questions section"""
+    return gr.update(visible=False), gr.update(visible=True)
+def process_query(audio_input, text_input, predefined_question):
     if SESSION_DATA['query_count'] >= CONFIG['MAX_QUERIES_PER_SESSION']:
+        return "⚠️ Query limit reached", None
     if not SESSION_DATA['document_chunks']:
+        return "Please upload a document first", None
     query_text = ""
+    # Priority: Predefined > Audio > Text
+    if predefined_question and predefined_question != "Select a question...":
+        query_text = predefined_question
+    elif audio_input:
         query_text = transcribe_audio(audio_input)
         if "error" in query_text.lower():
             query_text = ""
     if not query_text.strip() and text_input.strip():
         query_text = text_input.strip()
     if not query_text.strip():
+        return "Please ask a question", None
     try:
         similar_chunks = search_similar_chunks(query_text)
         response_text = generate_rag_response(query_text, similar_chunks)
         audio_response = text_to_speech(response_text)
         SESSION_DATA['query_count'] += 1
+        formatted_response = f"**प्रश्न:** {query_text}\n\n**उत्तर:** {response_text}"
+        return formatted_response, audio_response
     except Exception as e:
+        return f"Error processing query: {str(e)}", None
 def reset_session():
     SESSION_DATA.update({
         'query_count': 0,
         'document_chunks': [],
         'book_title': '',
         'session_id': str(uuid.uuid4())
     })
+    return "✅ New session started!", "", "", gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(choices=[])
 def create_interface():
     with gr.Blocks(
+        title="Hindi Book Assistant",
         theme=gr.themes.Soft(),
         css="""
+        .main-container { max-width: 1200px; margin: 0 auto; }
+        .section-header { font-size: 1.2em; font-weight: bold; margin: 1em 0; }
+        .upload-area { border: 2px dashed #ccc; padding: 2em; text-align: center; margin: 1em 0; }
         """
     ) as demo:
         gr.HTML("""
+        <div style="text-align: center; padding: 2em;">
+            <h1>📚 Hindi Book Assistant</h1>
+            <p>AI-powered assistant for Hindi books with voice support</p>
         </div>
         """)
+        # Authentication Section
         with gr.Group(visible=True) as auth_section:
+            gr.Markdown("### 🔐 Enter Passcode")
             passcode_input = gr.Textbox(
+                label="Passcode",
                 type="password",
+                placeholder="Enter access code..."
             )
+            auth_button = gr.Button("🔓 Access", variant="primary")
             auth_status = gr.Textbox(label="Status", interactive=False)
+        # Main Interface
         with gr.Group(visible=False) as main_section:
+            # Step 1: Upload Document
+            with gr.Group(visible=True) as upload_section:
+                gr.Markdown("### 📄 Upload Your Book")
                 pdf_upload = gr.File(
+                    label="Choose PDF file",
                     file_types=[".pdf"],
                     type="filepath"
                 )
+                process_btn = gr.Button("📖 Process Book", variant="primary", size="lg")
+                doc_status = gr.Textbox(label="Status", interactive=False)
+            # Step 2: Book Info (shown after processing)
+            with gr.Group(visible=False) as book_info_section:
+                gr.Markdown("### 📚 Book Information")
                 with gr.Row():
+                    book_title_display = gr.Textbox(label="Book Title", interactive=False)
+                    author_display = gr.Textbox(label="Author", interactive=False)
+                continue_btn = gr.Button("➡️ Continue to Questions", variant="primary", size="lg")
+            # Step 3: Ask Questions (shown after continue)
+            with gr.Group(visible=False) as query_section:
+                gr.Markdown("### 💬 Ask Questions About Your Book")
+                with gr.Tab("🎯 Quick Questions"):
+                    predefined_dropdown = gr.Dropdown(
+                        label="Choose a question",
+                        choices=[],
+                        value=None,
+                        interactive=True
+                    )
+                    ask_predefined_btn = gr.Button("🔍 Ask This Question", variant="primary")
+                with gr.Tab("🎤 Voice Question"):
+                    audio_input = gr.Audio(
+                        label="Record your question (Hindi/English)",
+                        sources=["microphone"],
+                        type="filepath"
                     )
+                    ask_voice_btn = gr.Button("🔍 Ask Voice Question", variant="primary")
+                with gr.Tab("⌨️ Type Question"):
+                    text_input = gr.Textbox(
+                        label="Type your question",
+                        placeholder="Example: इस पुस्तक का मुख्य विषय क्या है?",
+                        lines=2
                     )
+                    ask_text_btn = gr.Button("🔍 Ask Text Question", variant="primary")
+                # Response Section
+                gr.Markdown("### 📝 Answer")
+                response_text = gr.Textbox(
+                    label="Response",
+                    lines=6,
+                    interactive=False
+                )
+                response_audio = gr.Audio(
+                    label="🔊 Audio Response",
+                    interactive=False
+                )
+                # Reset Button
+                gr.Markdown("---")
+                reset_btn = gr.Button("🔄 Start New Session", variant="secondary")
+        # Event Handlers
         auth_button.click(
             authenticate,
             inputs=[passcode_input],
             outputs=[auth_section, main_section, auth_status]
         )
+        process_btn.click(
+            process_document,
+            inputs=[pdf_upload],
+            outputs=[doc_status, book_title_display, author_display, upload_section, book_info_section, predefined_dropdown]
+        )
+        continue_btn.click(
+            show_questions,
+            outputs=[book_info_section, query_section]
+        )
+        ask_predefined_btn.click(
             process_query,
+            inputs=[gr.State(None), gr.State(""), predefined_dropdown],
+            outputs=[response_text, response_audio]
         )
+        ask_voice_btn.click(
+            process_query,
+            inputs=[audio_input, gr.State(""), gr.State("")],
+            outputs=[response_text, response_audio]
+        )
+        ask_text_btn.click(
+            process_query,
+            inputs=[gr.State(None), text_input, gr.State("")],
+            outputs=[response_text, response_audio]
+        )
         reset_btn.click(
             reset_session,
+            outputs=[doc_status, book_title_display, author_display, upload_section, book_info_section, query_section, predefined_dropdown]
         )
         demo.load(load_models)
     return demo
 def main():
+    print("🚀 Starting Hindi Book Assistant...")
+    print("📋 Loading AI models...")
     load_models()
     demo = create_interface()
+    print("✅ Ready!")
+    print(f"🔑 Passcode: {CONFIG['PASSCODE']}")
     demo.launch(
         share=True,