Spaces:
Running
Running
| import gradio as gr | |
| import os | |
| import tempfile | |
| import time | |
| import uuid | |
| from datetime import datetime | |
| import fitz | |
| import requests | |
| import json | |
| import numpy as np | |
| from sentence_transformers import SentenceTransformer | |
| import faiss | |
| from groq import Groq | |
| from gtts import gTTS | |
| import subprocess | |
| import warnings | |
| warnings.filterwarnings("ignore") | |
| CONFIG = { | |
| 'PASSCODE': os.getenv('PASSCODE'), | |
| 'MAX_FILE_SIZE': 10 * 1024 * 1024, | |
| 'MAX_QUERIES_PER_SESSION': 10, | |
| 'MAX_AUDIO_DURATION': 120, | |
| 'GROQ_API_KEY': os.getenv('GAPI'), | |
| 'AUDIO_CLIP_DURATION': 10, | |
| 'BOOK_THUMBNAILS_DIR': './book_thumbnails', | |
| 'OCR_BOOKS_DIR': './ocr_books', | |
| } | |
| SESSION_DATA = { | |
| 'authenticated': False, | |
| 'session_id': str(uuid.uuid4()), | |
| 'query_count': 0, | |
| 'document_chunks': [], | |
| 'faiss_index': None, | |
| 'author_name': '', | |
| 'book_title': '', | |
| 'embedding_model': None, | |
| 'groq_client': None | |
| } | |
| # Predefined questions for books | |
| PREDEFINED_QUESTIONS = { | |
| 'general': [ | |
| "इस पुस्तक का मुख्य विषय क्या है?", | |
| "लेखक ने इस पुस्तक में क्या संदेश दिया है?", | |
| "इस पुस्तक में कौन से मुख्य पात्र हैं?" | |
| ], | |
| 'analysis': [ | |
| "इस पुस्तक की मुख्य शिक्षा क्या है?", | |
| "लेखक की लेखन शैली कैसी है?", | |
| "इस पुस्तक में कौन सा मुख्य संघर्ष है?" | |
| ], | |
| 'content': [ | |
| "इस कहानी का क्या अंत है?", | |
| "पुस्तक में कौन सी मुख्य घटनाएं हैं?", | |
| "मुख्य पात्र का चरित्र कैसा है?" | |
| ] | |
| } | |
| def load_models(): | |
| if SESSION_DATA['embedding_model'] is None: | |
| print("Loading embedding model...") | |
| SESSION_DATA['embedding_model'] = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2') | |
| if SESSION_DATA['groq_client'] is None: | |
| if CONFIG['GROQ_API_KEY']: | |
| print("Initializing Groq client...") | |
| SESSION_DATA['groq_client'] = Groq(api_key=CONFIG['GROQ_API_KEY']) | |
| else: | |
| print("Warning: GROQ_API_KEY not found") | |
| return SESSION_DATA['embedding_model'], SESSION_DATA['groq_client'] | |
| def trim_audio_to_duration(input_path, output_path, duration=10): | |
| try: | |
| cmd = [ | |
| 'ffmpeg', '-i', input_path, | |
| '-t', str(duration), | |
| '-acodec', 'copy', | |
| '-y', | |
| output_path | |
| ] | |
| result = subprocess.run(cmd, capture_output=True, text=True) | |
| if result.returncode == 0: | |
| return True | |
| else: | |
| print(f"FFmpeg error: {result.stderr}") | |
| return False | |
| except Exception as e: | |
| print(f"Error trimming audio: {str(e)}") | |
| return False | |
| def transcribe_audio(audio_file): | |
| if audio_file is None: | |
| return "" | |
| if not CONFIG['GROQ_API_KEY'] or SESSION_DATA['groq_client'] is None: | |
| return "Error: Groq API key not configured" | |
| try: | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file: | |
| trimmed_audio_path = tmp_file.name | |
| if not trim_audio_to_duration(audio_file, trimmed_audio_path, CONFIG['AUDIO_CLIP_DURATION']): | |
| print("Warning: Could not trim audio, using full duration") | |
| trimmed_audio_path = audio_file | |
| with open(trimmed_audio_path, "rb") as file: | |
| transcription = SESSION_DATA['groq_client'].audio.transcriptions.create( | |
| file=(os.path.basename(trimmed_audio_path), file.read()), | |
| model="whisper-large-v3", | |
| response_format="verbose_json", | |
| language="hi" | |
| ) | |
| if trimmed_audio_path != audio_file: | |
| try: | |
| os.unlink(trimmed_audio_path) | |
| except: | |
| pass | |
| return transcription.text | |
| except Exception as e: | |
| try: | |
| if 'trimmed_audio_path' in locals() and trimmed_audio_path != audio_file: | |
| os.unlink(trimmed_audio_path) | |
| except: | |
| pass | |
| return f"Transcription error: {str(e)}" | |
| def text_to_speech(text): | |
| if not text or len(text.strip()) == 0: | |
| return None | |
| try: | |
| tts = gTTS(text=text, lang='hi', slow=False) | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file: | |
| tts.save(tmp_file.name) | |
| return tmp_file.name | |
| except Exception as e: | |
| print(f"TTS Error: {str(e)}") | |
| return None | |
| def extract_text_from_pdf(pdf_path): | |
| text_content = "" | |
| try: | |
| pdf_document = fitz.open(pdf_path) | |
| total_pages = len(pdf_document) | |
| print(f"Processing PDF with {total_pages} pages...") | |
| for page_num in range(total_pages): | |
| page = pdf_document.load_page(page_num) | |
| page_text = page.get_text() | |
| if page_text.strip(): | |
| text_content += page_text + "\n" | |
| pdf_document.close() | |
| if not text_content.strip(): | |
| return "Error: No selectable text found in PDF. Please ensure the PDF contains selectable text, not just images." | |
| return text_content | |
| except Exception as e: | |
| return f"Error extracting text: {str(e)}" | |
| def extract_metadata(text): | |
| lines = [line.strip() for line in text.split('\n')[:25] if line.strip()] | |
| author_name = "अज्ञात लेखक" | |
| book_title = "अनाम पुस्तक" | |
| for i, line in enumerate(lines): | |
| if any(word in line.lower() for word in ['लेखक', 'author', 'by', 'द्वारा', 'रचयिता']): | |
| author_name = line | |
| elif 10 < len(line) < 100 and not any(char.isdigit() for char in line[:20]): | |
| if book_title == "अनाम पुस्तक": | |
| book_title = line | |
| return author_name, book_title | |
| def chunk_text(text, chunk_size=400, overlap=50): | |
| words = text.split() | |
| chunks = [] | |
| for i in range(0, len(words), chunk_size - overlap): | |
| chunk = ' '.join(words[i:i + chunk_size]) | |
| if chunk.strip(): | |
| chunks.append(chunk) | |
| return chunks | |
| def create_embeddings(chunks): | |
| embedding_model, _ = load_models() | |
| embeddings = embedding_model.encode(chunks, show_progress_bar=False) | |
| dimension = embeddings.shape[1] | |
| index = faiss.IndexFlatIP(dimension) | |
| faiss.normalize_L2(embeddings) | |
| index.add(embeddings.astype('float32')) | |
| return index | |
| def search_similar_chunks(query, top_k=3): | |
| if SESSION_DATA['faiss_index'] is None or not SESSION_DATA['document_chunks']: | |
| return [] | |
| embedding_model, _ = load_models() | |
| query_embedding = embedding_model.encode([query], show_progress_bar=False) | |
| faiss.normalize_L2(query_embedding) | |
| scores, indices = SESSION_DATA['faiss_index'].search(query_embedding.astype('float32'), top_k) | |
| results = [] | |
| for i, idx in enumerate(indices[0]): | |
| if idx >= 0 and idx < len(SESSION_DATA['document_chunks']): | |
| results.append({ | |
| 'text': SESSION_DATA['document_chunks'][idx], | |
| 'score': float(scores[0][i]) | |
| }) | |
| return results | |
| def call_groq_api(prompt, model="llama-3.1-8b-instant"): | |
| if not CONFIG['GROQ_API_KEY'] or CONFIG['GROQ_API_KEY'] == 'your_groq_api_key_here': | |
| return "⚠️ Groq API key not configured. Please set GROQ_API_KEY environment variable." | |
| url = "https://api.groq.com/openai/v1/chat/completions" | |
| headers = { | |
| "Authorization": f"Bearer {CONFIG['GROQ_API_KEY']}", | |
| "Content-Type": "application/json" | |
| } | |
| data = { | |
| "model": model, | |
| "messages": [{"role": "user", "content": prompt}], | |
| "temperature": 0.7, | |
| "max_tokens": 600 | |
| } | |
| try: | |
| response = requests.post(url, headers=headers, json=data, timeout=30) | |
| response.raise_for_status() | |
| return response.json()['choices'][0]['message']['content'] | |
| except Exception as e: | |
| return f"Error calling LLM: {str(e)}" | |
| def generate_rag_response(query, context_chunks): | |
| if not context_chunks: | |
| return "मुझे इस प्रश्न का उत्तर देने के लिए पर्याप्त जानकारी नहीं मिली।" | |
| context = "\n\n".join([chunk['text'] for chunk in context_chunks]) | |
| prompt = f"""आप एक हिंदी पुस्तक सहायक हैं। निम्नलिखित जानकारी के आधार पर प्रश्न का उत्तर दें: | |
| पुस्तक: {SESSION_DATA['book_title']} | |
| लेखक: {SESSION_DATA['author_name']} | |
| संदर्भ: | |
| {context} | |
| प्रश्न: {query} | |
| निर्देश: | |
| - हिंदी में संक्षिप्त और सटीक उत्तर दें | |
| - उत्तर की शुरुआत में पुस्तक और लेखक का संदर्भ शामिल करें | |
| - केवल दिए गए संदर्भ के आधार पर ही उत्तर दें | |
| """ | |
| response = call_groq_api(prompt) | |
| return response | |
| def authenticate(passcode): | |
| if passcode == CONFIG['PASSCODE']: | |
| SESSION_DATA['authenticated'] = True | |
| return gr.update(visible=False), gr.update(visible=True), "✅ Welcome!" | |
| else: | |
| return gr.update(visible=True), gr.update(visible=False), "❌ Invalid passcode" | |
| def process_document(pdf_file): | |
| if pdf_file is None: | |
| return "Please upload a PDF file", "", "", gr.update(visible=True), gr.update(visible=False), gr.update(choices=[]) | |
| try: | |
| file_size = os.path.getsize(pdf_file.name) | |
| if file_size > CONFIG['MAX_FILE_SIZE']: | |
| return f"File too large! Max size: {CONFIG['MAX_FILE_SIZE'] // (1024*1024)}MB", "", "", gr.update(visible=True), gr.update(visible=False), gr.update(choices=[]) | |
| text_content = extract_text_from_pdf(pdf_file.name) | |
| if not text_content.strip() or "Error" in text_content: | |
| return text_content, "", "", gr.update(visible=True), gr.update(visible=False), gr.update(choices=[]) | |
| author_name, book_title = extract_metadata(text_content) | |
| SESSION_DATA['author_name'] = author_name | |
| SESSION_DATA['book_title'] = book_title | |
| chunks = chunk_text(text_content) | |
| SESSION_DATA['document_chunks'] = chunks | |
| SESSION_DATA['faiss_index'] = create_embeddings(chunks) | |
| SESSION_DATA['query_count'] = 0 | |
| # Generate predefined questions | |
| questions = [] | |
| for category in PREDEFINED_QUESTIONS.values(): | |
| questions.extend(category) | |
| success_msg = f"✅ Document processed successfully!" | |
| return success_msg, book_title, author_name, gr.update(visible=False), gr.update(visible=True), gr.update(choices=questions[:6]) | |
| except Exception as e: | |
| return f"Error processing document: {str(e)}", "", "", gr.update(visible=True), gr.update(visible=False), gr.update(choices=[]) | |
| def show_questions(): | |
| """Show the questions section""" | |
| return gr.update(visible=False), gr.update(visible=True) | |
| def process_query(audio_input, text_input, predefined_question): | |
| if SESSION_DATA['query_count'] >= CONFIG['MAX_QUERIES_PER_SESSION']: | |
| return "⚠️ Query limit reached", None | |
| if not SESSION_DATA['document_chunks']: | |
| return "Please upload a document first", None | |
| query_text = "" | |
| # Priority: Predefined > Audio > Text | |
| if predefined_question and predefined_question != "Select a question...": | |
| query_text = predefined_question | |
| elif audio_input: | |
| query_text = transcribe_audio(audio_input) | |
| if "error" in query_text.lower(): | |
| query_text = "" | |
| if not query_text.strip() and text_input.strip(): | |
| query_text = text_input.strip() | |
| if not query_text.strip(): | |
| return "Please ask a question", None | |
| try: | |
| similar_chunks = search_similar_chunks(query_text) | |
| response_text = generate_rag_response(query_text, similar_chunks) | |
| audio_response = text_to_speech(response_text) | |
| SESSION_DATA['query_count'] += 1 | |
| formatted_response = f"**प्रश्न:** {query_text}\n\n**उत्तर:** {response_text}" | |
| return formatted_response, audio_response | |
| except Exception as e: | |
| return f"Error processing query: {str(e)}", None | |
| def reset_session(): | |
| SESSION_DATA.update({ | |
| 'query_count': 0, | |
| 'document_chunks': [], | |
| 'faiss_index': None, | |
| 'author_name': '', | |
| 'book_title': '', | |
| 'session_id': str(uuid.uuid4()) | |
| }) | |
| return "✅ New session started!", "", "", gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(choices=[]) | |
| def create_interface(): | |
| with gr.Blocks( | |
| title="Hindi Book Assistant", | |
| theme=gr.themes.Soft(), | |
| css=""" | |
| .main-container { max-width: 1200px; margin: 0 auto; } | |
| .section-header { font-size: 1.2em; font-weight: bold; margin: 1em 0; } | |
| .upload-area { border: 2px dashed #ccc; padding: 2em; text-align: center; margin: 1em 0; } | |
| """ | |
| ) as demo: | |
| gr.HTML(""" | |
| <div style="text-align: center; padding: 2em;"> | |
| <h1>📚 Hindi Book Assistant</h1> | |
| <p>AI-powered assistant for Hindi books with voice support</p> | |
| </div> | |
| """) | |
| # Authentication Section | |
| with gr.Group(visible=True) as auth_section: | |
| gr.Markdown("### 🔐 Enter Passcode") | |
| passcode_input = gr.Textbox( | |
| label="Passcode", | |
| type="password", | |
| placeholder="Enter access code..." | |
| ) | |
| auth_button = gr.Button("🔓 Access", variant="primary") | |
| auth_status = gr.Textbox(label="Status", interactive=False) | |
| # Main Interface | |
| with gr.Group(visible=False) as main_section: | |
| # Step 1: Upload Document | |
| with gr.Group(visible=True) as upload_section: | |
| gr.Markdown("### 📄 Upload Your Book") | |
| pdf_upload = gr.File( | |
| label="Choose PDF file", | |
| file_types=[".pdf"], | |
| type="filepath" | |
| ) | |
| process_btn = gr.Button("📖 Process Book", variant="primary", size="lg") | |
| doc_status = gr.Textbox(label="Status", interactive=False) | |
| # Step 2: Book Info (shown after processing) | |
| with gr.Group(visible=False) as book_info_section: | |
| gr.Markdown("### 📚 Book Information") | |
| with gr.Row(): | |
| book_title_display = gr.Textbox(label="Book Title", interactive=False) | |
| author_display = gr.Textbox(label="Author", interactive=False) | |
| continue_btn = gr.Button("➡️ Continue to Questions", variant="primary", size="lg") | |
| # Step 3: Ask Questions (shown after continue) | |
| with gr.Group(visible=False) as query_section: | |
| gr.Markdown("### 💬 Ask Questions About Your Book") | |
| with gr.Tab("🎯 Quick Questions"): | |
| predefined_dropdown = gr.Dropdown( | |
| label="Choose a question", | |
| choices=[], | |
| value=None, | |
| interactive=True | |
| ) | |
| ask_predefined_btn = gr.Button("🔍 Ask This Question", variant="primary") | |
| with gr.Tab("🎤 Voice Question"): | |
| audio_input = gr.Audio( | |
| label="Record your question (Hindi/English)", | |
| sources=["microphone"], | |
| type="filepath" | |
| ) | |
| ask_voice_btn = gr.Button("🔍 Ask Voice Question", variant="primary") | |
| with gr.Tab("⌨️ Type Question"): | |
| text_input = gr.Textbox( | |
| label="Type your question", | |
| placeholder="Example: इस पुस्तक का मुख्य विषय क्या है?", | |
| lines=2 | |
| ) | |
| ask_text_btn = gr.Button("🔍 Ask Text Question", variant="primary") | |
| # Response Section | |
| gr.Markdown("### 📝 Answer") | |
| response_text = gr.Textbox( | |
| label="Response", | |
| lines=6, | |
| interactive=False | |
| ) | |
| response_audio = gr.Audio( | |
| label="🔊 Audio Response", | |
| interactive=False | |
| ) | |
| # Reset Button | |
| gr.Markdown("---") | |
| reset_btn = gr.Button("🔄 Start New Session", variant="secondary") | |
| # Event Handlers | |
| auth_button.click( | |
| authenticate, | |
| inputs=[passcode_input], | |
| outputs=[auth_section, main_section, auth_status] | |
| ) | |
| process_btn.click( | |
| process_document, | |
| inputs=[pdf_upload], | |
| outputs=[doc_status, book_title_display, author_display, upload_section, book_info_section, predefined_dropdown] | |
| ) | |
| continue_btn.click( | |
| show_questions, | |
| outputs=[book_info_section, query_section] | |
| ) | |
| ask_predefined_btn.click( | |
| process_query, | |
| inputs=[gr.State(None), gr.State(""), predefined_dropdown], | |
| outputs=[response_text, response_audio] | |
| ) | |
| ask_voice_btn.click( | |
| process_query, | |
| inputs=[audio_input, gr.State(""), gr.State("")], | |
| outputs=[response_text, response_audio] | |
| ) | |
| ask_text_btn.click( | |
| process_query, | |
| inputs=[gr.State(None), text_input, gr.State("")], | |
| outputs=[response_text, response_audio] | |
| ) | |
| reset_btn.click( | |
| reset_session, | |
| outputs=[doc_status, book_title_display, author_display, upload_section, book_info_section, query_section, predefined_dropdown] | |
| ) | |
| demo.load(load_models) | |
| return demo | |
| def main(): | |
| print("🚀 Starting Hindi Book Assistant...") | |
| print("📋 Loading AI models...") | |
| load_models() | |
| demo = create_interface() | |
| print("✅ Ready!") | |
| print(f"🔑 Passcode: {CONFIG['PASSCODE']}") | |
| demo.launch( | |
| share=True, | |
| show_error=True, | |
| ) | |
| if __name__ == "__main__": | |
| main() |