Files changed (1) hide show
  1. app.py +194 -539
app.py CHANGED
@@ -1,17 +1,10 @@
1
- #!/usr/bin/env python3
2
- """
3
- Hindi RAG Voice Demo - Gradio Implementation (Groq Whisper API Version)
4
- A streamlined voice-enabled RAG system for Hindi content using Gradio
5
- Uses Groq Whisper API for transcription and assumes PDFs have selectable text
6
- """
7
-
8
  import gradio as gr
9
  import os
10
  import tempfile
11
  import time
12
  import uuid
13
  from datetime import datetime
14
- import fitz # PyMuPDF
15
  import requests
16
  import json
17
  import numpy as np
@@ -23,19 +16,17 @@ import subprocess
23
  import warnings
24
  warnings.filterwarnings("ignore")
25
 
26
- # Global configuration
27
  CONFIG = {
28
  'PASSCODE': os.getenv('PASSCODE'),
29
- 'MAX_FILE_SIZE': 10 * 1024 * 1024, # 10MB
30
- 'MAX_QUERIES_PER_SESSION': 5,
31
- 'MAX_AUDIO_DURATION': 120, # 2 minutes
32
  'GROQ_API_KEY': os.getenv('GAPI'),
33
- 'AUDIO_CLIP_DURATION': 10, # First 10 seconds only
34
  'BOOK_THUMBNAILS_DIR': './book_thumbnails',
35
  'OCR_BOOKS_DIR': './ocr_books',
36
  }
37
 
38
- # Global session storage
39
  SESSION_DATA = {
40
  'authenticated': False,
41
  'session_id': str(uuid.uuid4()),
@@ -48,103 +39,103 @@ SESSION_DATA = {
48
  'groq_client': None
49
  }
50
 
51
- # Initialize models and clients (cached)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  def load_models():
53
- """Load and cache models and clients"""
54
  if SESSION_DATA['embedding_model'] is None:
55
  print("Loading embedding model...")
56
  SESSION_DATA['embedding_model'] = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
57
-
58
  if SESSION_DATA['groq_client'] is None:
59
  if CONFIG['GROQ_API_KEY']:
60
  print("Initializing Groq client...")
61
  SESSION_DATA['groq_client'] = Groq(api_key=CONFIG['GROQ_API_KEY'])
62
  else:
63
  print("Warning: GROQ_API_KEY not found")
64
-
65
  return SESSION_DATA['embedding_model'], SESSION_DATA['groq_client']
66
 
67
- # Audio processing functions
68
  def trim_audio_to_duration(input_path, output_path, duration=10):
69
- """Trim audio to specified duration using ffmpeg"""
70
  try:
71
- # Use ffmpeg to trim audio to first N seconds
72
  cmd = [
73
  'ffmpeg', '-i', input_path,
74
  '-t', str(duration),
75
  '-acodec', 'copy',
76
- '-y', # Overwrite output file
77
  output_path
78
  ]
79
-
80
  result = subprocess.run(cmd, capture_output=True, text=True)
81
-
82
  if result.returncode == 0:
83
  return True
84
  else:
85
  print(f"FFmpeg error: {result.stderr}")
86
  return False
87
-
88
  except Exception as e:
89
  print(f"Error trimming audio: {str(e)}")
90
  return False
91
 
92
  def transcribe_audio(audio_file):
93
- """Transcribe audio using Groq Whisper API (first 10 seconds only)"""
94
  if audio_file is None:
95
  return ""
96
-
97
  if not CONFIG['GROQ_API_KEY'] or SESSION_DATA['groq_client'] is None:
98
  return "Error: Groq API key not configured"
99
-
100
  try:
101
- # Create temporary file for trimmed audio
102
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
103
  trimmed_audio_path = tmp_file.name
104
-
105
- # Trim audio to first 10 seconds
106
  if not trim_audio_to_duration(audio_file, trimmed_audio_path, CONFIG['AUDIO_CLIP_DURATION']):
107
- # If trimming fails, use original file but warn user
108
  print("Warning: Could not trim audio, using full duration")
109
  trimmed_audio_path = audio_file
110
-
111
- # Transcribe using Groq Whisper API
112
  with open(trimmed_audio_path, "rb") as file:
113
  transcription = SESSION_DATA['groq_client'].audio.transcriptions.create(
114
  file=(os.path.basename(trimmed_audio_path), file.read()),
115
  model="whisper-large-v3",
116
  response_format="verbose_json",
117
- language="hi" # Specify Hindi language
118
  )
119
-
120
- # Clean up temporary file if we created one
121
  if trimmed_audio_path != audio_file:
122
  try:
123
  os.unlink(trimmed_audio_path)
124
  except:
125
  pass
126
-
127
  return transcription.text
128
-
129
  except Exception as e:
130
- # Clean up on error
131
  try:
132
  if 'trimmed_audio_path' in locals() and trimmed_audio_path != audio_file:
133
  os.unlink(trimmed_audio_path)
134
  except:
135
  pass
136
-
137
  return f"Transcription error: {str(e)}"
138
 
139
  def text_to_speech(text):
140
- """Convert text to speech in Hindi"""
141
  if not text or len(text.strip()) == 0:
142
  return None
143
-
144
  try:
145
  tts = gTTS(text=text, lang='hi', slow=False)
146
-
147
- # Save to temporary file
148
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
149
  tts.save(tmp_file.name)
150
  return tmp_file.name
@@ -152,96 +143,70 @@ def text_to_speech(text):
152
  print(f"TTS Error: {str(e)}")
153
  return None
154
 
155
- # Text extraction functions
156
  def extract_text_from_pdf(pdf_path):
157
- """Extract text from PDF using PyMuPDF (assumes selectable text)"""
158
  text_content = ""
159
-
160
  try:
161
  pdf_document = fitz.open(pdf_path)
162
  total_pages = len(pdf_document)
163
-
164
  print(f"Processing PDF with {total_pages} pages...")
165
-
166
- # Process all pages (removed page limit for production use)
167
  for page_num in range(total_pages):
168
  page = pdf_document.load_page(page_num)
169
  page_text = page.get_text()
170
-
171
- # Add page text if it exists
172
  if page_text.strip():
173
  text_content += page_text + "\n"
174
- else:
175
- print(f"Warning: Page {page_num + 1} appears to have no selectable text")
176
-
177
  pdf_document.close()
178
 
179
  if not text_content.strip():
180
  return "Error: No selectable text found in PDF. Please ensure the PDF contains selectable text, not just images."
181
 
182
  return text_content
183
-
184
  except Exception as e:
185
  return f"Error extracting text: {str(e)}"
186
 
187
  def extract_metadata(text):
188
- """Extract author name and book title from text"""
189
  lines = [line.strip() for line in text.split('\n')[:25] if line.strip()]
190
-
191
  author_name = "अज्ञात लेखक"
192
  book_title = "अनाम पुस्तक"
193
-
194
- # Simple heuristics for metadata extraction
195
  for i, line in enumerate(lines):
196
- # Look for author patterns
197
  if any(word in line.lower() for word in ['लेखक', 'author', 'by', 'द्वारा', 'रचयिता']):
198
  author_name = line
199
- # First substantial line might be title
200
  elif 10 < len(line) < 100 and not any(char.isdigit() for char in line[:20]):
201
  if book_title == "अनाम पुस्तक":
202
  book_title = line
203
-
204
  return author_name, book_title
205
 
206
  def chunk_text(text, chunk_size=400, overlap=50):
207
- """Split text into overlapping chunks"""
208
  words = text.split()
209
  chunks = []
210
-
211
  for i in range(0, len(words), chunk_size - overlap):
212
  chunk = ' '.join(words[i:i + chunk_size])
213
  if chunk.strip():
214
  chunks.append(chunk)
215
-
216
  return chunks
217
 
218
- # Vector search functions
219
  def create_embeddings(chunks):
220
- """Create embeddings and FAISS index"""
221
  embedding_model, _ = load_models()
222
  embeddings = embedding_model.encode(chunks, show_progress_bar=False)
223
-
224
- # Create FAISS index
225
  dimension = embeddings.shape[1]
226
  index = faiss.IndexFlatIP(dimension)
227
-
228
- # Normalize embeddings for cosine similarity
229
  faiss.normalize_L2(embeddings)
230
  index.add(embeddings.astype('float32'))
231
-
232
  return index
233
 
234
  def search_similar_chunks(query, top_k=3):
235
- """Search for similar chunks"""
236
  if SESSION_DATA['faiss_index'] is None or not SESSION_DATA['document_chunks']:
237
  return []
238
-
239
  embedding_model, _ = load_models()
240
  query_embedding = embedding_model.encode([query], show_progress_bar=False)
241
  faiss.normalize_L2(query_embedding)
242
-
243
  scores, indices = SESSION_DATA['faiss_index'].search(query_embedding.astype('float32'), top_k)
244
-
245
  results = []
246
  for i, idx in enumerate(indices[0]):
247
  if idx >= 0 and idx < len(SESSION_DATA['document_chunks']):
@@ -249,28 +214,25 @@ def search_similar_chunks(query, top_k=3):
249
  'text': SESSION_DATA['document_chunks'][idx],
250
  'score': float(scores[0][i])
251
  })
252
-
253
  return results
254
 
255
- # LLM functions
256
  def call_groq_api(prompt, model="llama-3.1-8b-instant"):
257
- """Call Groq API for LLM inference"""
258
  if not CONFIG['GROQ_API_KEY'] or CONFIG['GROQ_API_KEY'] == 'your_groq_api_key_here':
259
  return "⚠️ Groq API key not configured. Please set GROQ_API_KEY environment variable."
260
-
261
  url = "https://api.groq.com/openai/v1/chat/completions"
262
  headers = {
263
  "Authorization": f"Bearer {CONFIG['GROQ_API_KEY']}",
264
  "Content-Type": "application/json"
265
  }
266
-
267
  data = {
268
  "model": model,
269
  "messages": [{"role": "user", "content": prompt}],
270
  "temperature": 0.7,
271
- "max_tokens": 800
272
  }
273
-
274
  try:
275
  response = requests.post(url, headers=headers, json=data, timeout=30)
276
  response.raise_for_status()
@@ -279,12 +241,11 @@ def call_groq_api(prompt, model="llama-3.1-8b-instant"):
279
  return f"Error calling LLM: {str(e)}"
280
 
281
  def generate_rag_response(query, context_chunks):
282
- """Generate response using RAG"""
283
  if not context_chunks:
284
  return "मुझे इस प्रश्न का उत्तर देने के लिए पर्याप्त जानकारी नहीं मिली।"
285
-
286
  context = "\n\n".join([chunk['text'] for chunk in context_chunks])
287
-
288
  prompt = f"""आप एक हिंदी पुस्तक सहायक हैं। निम्नलिखित जानकारी के आधार पर प्रश्न का उत्तर दें:
289
 
290
  पुस्तक: {SESSION_DATA['book_title']}
@@ -300,127 +261,91 @@ def generate_rag_response(query, context_chunks):
300
  - उत्तर की शुरुआत में पुस्तक और लेखक का संदर्भ शामिल करें
301
  - केवल दिए गए संदर्भ के आधार पर ही उत्तर दें
302
  """
303
-
304
  response = call_groq_api(prompt)
305
  return response
306
 
307
- # Authentication function
308
  def authenticate(passcode):
309
- """Check passcode authentication"""
310
  if passcode == CONFIG['PASSCODE']:
311
  SESSION_DATA['authenticated'] = True
312
- return gr.update(visible=False), gr.update(visible=True), "✅ Access granted! / पहुंच मिली!"
313
  else:
314
- return gr.update(visible=True), gr.update(visible=False), "❌ Invalid passcode / गलत पासकोड"
315
 
316
- # Document processing function
317
  def process_document(pdf_file):
318
- """Process uploaded PDF document"""
319
  if pdf_file is None:
320
- return "कृपया एक PDF फ़ाइल अपलोड करें।", "", "", gr.update(visible=False)
321
-
322
  try:
323
- # Check file size
324
  file_size = os.path.getsize(pdf_file.name)
325
  if file_size > CONFIG['MAX_FILE_SIZE']:
326
- return f"फ़ाइल बहुत बड़ी है! अधिकतम आकार: {CONFIG['MAX_FILE_SIZE'] // (1024*1024)}MB", "", "", gr.update(visible=False)
327
-
328
- # Extract text (no OCR - assumes selectable text)
329
  text_content = extract_text_from_pdf(pdf_file.name)
330
-
331
  if not text_content.strip() or "Error" in text_content:
332
- return text_content, "", "", gr.update(visible=False)
333
-
334
- # Extract metadata
335
  author_name, book_title = extract_metadata(text_content)
336
  SESSION_DATA['author_name'] = author_name
337
  SESSION_DATA['book_title'] = book_title
338
-
339
- # Create chunks
340
  chunks = chunk_text(text_content)
341
  SESSION_DATA['document_chunks'] = chunks
342
-
343
- # Create embeddings and index
344
- print("Creating embeddings and search index...")
345
  SESSION_DATA['faiss_index'] = create_embeddings(chunks)
346
-
347
- # Reset query count
348
  SESSION_DATA['query_count'] = 0
349
-
350
- # Calculate statistics
351
- word_count = len(text_content.split())
352
- char_count = len(text_content)
353
-
354
- success_msg = f"""✅ दस्तावेज़ सफलतापूर्वक प्रसंस्करित!
355
 
356
- 📖 पुस्तक: {book_title}
357
- ✍️ लेखक: {author_name}
358
- 📄 टेक्स्ट खंड: {len(chunks)}
359
- 📊 शब्द संख्या: {word_count:,}
360
- 📝 अक्षर संख्या: {char_count:,}
361
 
362
- अब आप प्रश्न पूछ सकते हैं।"""
363
-
364
- return success_msg, book_title, author_name, gr.update(visible=True)
365
 
 
 
366
  except Exception as e:
367
- return f"दस्तावेज़ प्रसंस्करण में त्रुटि: {str(e)}", "", "", gr.update(visible=False)
368
 
369
- # Query processing function
370
- def process_query(audio_input, text_input):
371
- """Process user query (audio or text)"""
 
 
372
  if SESSION_DATA['query_count'] >= CONFIG['MAX_QUERIES_PER_SESSION']:
373
- return "⚠️ प्रश्न सीमा समाप्त (5 प्रश्न प्रति सत्र)", None, f"प्रश्न: {SESSION_DATA['query_count']}/{CONFIG['MAX_QUERIES_PER_SESSION']}"
374
-
375
  if not SESSION_DATA['document_chunks']:
376
- return "कृपया पहले एक PDF दस्तावेज़ अपलोड करें।", None, f"प्रश्न: {SESSION_DATA['query_count']}/{CONFIG['MAX_QUERIES_PER_SESSION']}"
377
-
378
- # Get query text
379
  query_text = ""
380
 
381
- if audio_input:
 
 
 
382
  query_text = transcribe_audio(audio_input)
383
  if "error" in query_text.lower():
384
  query_text = ""
385
 
386
  if not query_text.strip() and text_input.strip():
387
  query_text = text_input.strip()
388
-
389
  if not query_text.strip():
390
- return "कृपया आवाज़ या टेक्स्ट के माध्यम से प्रश्न दें।", None, f"प्रश्न: {SESSION_DATA['query_count']}/{CONFIG['MAX_QUERIES_PER_SESSION']}"
391
-
392
  try:
393
- # Search similar chunks
394
  similar_chunks = search_similar_chunks(query_text)
395
-
396
- # Generate response
397
  response_text = generate_rag_response(query_text, similar_chunks)
398
-
399
- # Generate TTS
400
  audio_response = text_to_speech(response_text)
401
-
402
- # Update query count
403
  SESSION_DATA['query_count'] += 1
404
-
405
- # Format response with context
406
- formatted_response = f"""**प्रश्न:** {query_text}
407
 
408
- **उत्तर:**
409
- {response_text}
410
 
411
- **संदर्भ स्रोत:**
412
- """
413
-
414
- for i, chunk in enumerate(similar_chunks):
415
- formatted_response += f"\n{i+1}. {chunk['text'][:150]}... (स्कोर: {chunk['score']:.3f})"
416
-
417
- return formatted_response, audio_response, f"प्रश्न: {SESSION_DATA['query_count']}/{CONFIG['MAX_QUERIES_PER_SESSION']}"
418
-
419
  except Exception as e:
420
- return f"प्रश्न प्रसंस्करण में त्रुटि: {str(e)}", None, f"प्रश्न: {SESSION_DATA['query_count']}/{CONFIG['MAX_QUERIES_PER_SESSION']}"
421
 
422
  def reset_session():
423
- """Reset the session"""
424
  SESSION_DATA.update({
425
  'query_count': 0,
426
  'document_chunks': [],
@@ -429,429 +354,159 @@ def reset_session():
429
  'book_title': '',
430
  'session_id': str(uuid.uuid4())
431
  })
432
- return "✅ नया सत्र शुरू किया गया!", "", "", gr.update(visible=False), "प्रश्न: 0/5"
433
-
434
- # Book management functions
435
- def get_available_books():
436
- """Get list of available books with their thumbnails and text files"""
437
- books = []
438
-
439
- try:
440
- # Get all image files from thumbnails directory
441
- thumbnail_dir = CONFIG['BOOK_THUMBNAILS_DIR']
442
- ocr_dir = CONFIG['OCR_BOOKS_DIR']
443
-
444
- if os.path.exists(thumbnail_dir):
445
- thumbnail_files = [f for f in os.listdir(thumbnail_dir)
446
- if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp'))]
447
- else:
448
- thumbnail_files = []
449
-
450
- # Get all text files from OCR directory
451
- if os.path.exists(ocr_dir):
452
- text_files = [f for f in os.listdir(ocr_dir)
453
- if f.lower().endswith('.txt')]
454
- else:
455
- text_files = []
456
-
457
- # Create book entries
458
- for text_file in text_files:
459
- book_name = os.path.splitext(text_file)[0]
460
-
461
- # Look for matching thumbnail
462
- thumbnail_path = None
463
- for thumb_file in thumbnail_files:
464
- thumb_name = os.path.splitext(thumb_file)[0]
465
- if thumb_name.lower() == book_name.lower():
466
- thumbnail_path = os.path.join(thumbnail_dir, thumb_file)
467
- break
468
-
469
- # If no matching thumbnail found, use a default placeholder
470
- if not thumbnail_path:
471
- # Create a simple text-based placeholder
472
- placeholder_path = create_text_placeholder(book_name)
473
- thumbnail_path = placeholder_path
474
-
475
- books.append({
476
- 'name': book_name,
477
- 'display_name': book_name.replace('_', ' ').title(),
478
- 'text_file': os.path.join(ocr_dir, text_file),
479
- 'thumbnail': thumbnail_path
480
- })
481
-
482
- return books
483
-
484
- except Exception as e:
485
- print(f"Error getting available books: {str(e)}")
486
- return []
487
 
488
- def create_text_placeholder(book_name):
489
- """Create a simple text placeholder image for books without thumbnails"""
490
- try:
491
- import matplotlib.pyplot as plt
492
- import matplotlib.patches as patches
493
-
494
- # Create a simple text-based image
495
- fig, ax = plt.subplots(1, 1, figsize=(3, 4))
496
- ax.set_xlim(0, 1)
497
- ax.set_ylim(0, 1)
498
- ax.axis('off')
499
-
500
- # Add background
501
- rect = patches.Rectangle((0, 0), 1, 1, linewidth=2, edgecolor='#2E86AB', facecolor='#E8F4FD')
502
- ax.add_patch(rect)
503
-
504
- # Add text
505
- ax.text(0.5, 0.5, book_name.replace('_', '\n'),
506
- ha='center', va='center', fontsize=10, weight='bold', color='#2E86AB')
507
-
508
- # Save to temporary file
509
- placeholder_path = os.path.join(tempfile.gettempdir(), f"{book_name}_placeholder.png")
510
- plt.savefig(placeholder_path, dpi=100, bbox_inches='tight')
511
- plt.close()
512
-
513
- return placeholder_path
514
-
515
- except Exception as e:
516
- print(f"Error creating placeholder: {str(e)}")
517
- return None
518
-
519
- def load_book_text(book_info):
520
- """Load text content from a pre-existing book"""
521
- try:
522
- with open(book_info['text_file'], 'r', encoding='utf-8') as file:
523
- content = file.read()
524
-
525
- if not content.strip():
526
- return "Error: Empty text file"
527
-
528
- return content
529
-
530
- except Exception as e:
531
- return f"Error loading book text: {str(e)}"
532
-
533
- def process_selected_book(selected_book_name):
534
- """Process a pre-selected book"""
535
- if not selected_book_name or selected_book_name == "None":
536
- return "कृपया एक पुस्तक चुनें।", "", "", gr.update(visible=False)
537
-
538
- try:
539
- # Get available books
540
- available_books = get_available_books()
541
-
542
- # Find the selected book
543
- selected_book = None
544
- for book in available_books:
545
- if book['name'] == selected_book_name:
546
- selected_book = book
547
- break
548
-
549
- if not selected_book:
550
- return "चुनी गई पुस्तक नहीं मिली।", "", "", gr.update(visible=False)
551
-
552
- # Load text content
553
- text_content = load_book_text(selected_book)
554
-
555
- if not text_content.strip() or "Error" in text_content:
556
- return text_content, "", "", gr.update(visible=False)
557
-
558
- # Extract metadata (use book name if no metadata found in text)
559
- author_name, book_title = extract_metadata(text_content)
560
-
561
- # If metadata extraction didn't work well, use the book name
562
- if author_name == "अज्ञात लेखक":
563
- author_name = "संग्रहित पुस्तक"
564
- if book_title == "अनाम पुस्तक":
565
- book_title = selected_book['display_name']
566
-
567
- SESSION_DATA['author_name'] = author_name
568
- SESSION_DATA['book_title'] = book_title
569
-
570
- # Create chunks
571
- chunks = chunk_text(text_content)
572
- SESSION_DATA['document_chunks'] = chunks
573
-
574
- # Create embeddings and index
575
- print("Creating embeddings and search index for selected book...")
576
- SESSION_DATA['faiss_index'] = create_embeddings(chunks)
577
-
578
- # Reset query count
579
- SESSION_DATA['query_count'] = 0
580
-
581
- # Calculate statistics
582
- word_count = len(text_content.split())
583
- char_count = len(text_content)
584
-
585
- success_msg = f"""✅ पुस्तक सफलतापूर्वक लोड की गई!
586
-
587
- 📖 पुस्तक: {book_title}
588
- ✍️ लेखक: {author_name}
589
- 📄 टेक्स्ट खंड: {len(chunks)}
590
- 📊 शब्द संख्या: {word_count:,}
591
- 📝 अक्षर संख्या: {char_count:,}
592
-
593
- अब आप प्रश्न पूछ सकते ���ैं।"""
594
-
595
- return success_msg, book_title, author_name, gr.update(visible=True)
596
-
597
- except Exception as e:
598
- return f"पुस्तक लोड करने में त्रुटि: {str(e)}", "", "", gr.update(visible=False)
599
-
600
- def create_book_gallery():
601
- """Create a gallery of available books with thumbnails"""
602
- available_books = get_available_books()
603
-
604
- if not available_books:
605
- return [], "कोई पुस्तक उपलब्ध नहीं है।"
606
-
607
- # Create gallery data: list of (image_path, title) tuples
608
- gallery_data = []
609
- book_names = ["None"] # Add None option
610
-
611
- for book in available_books:
612
- if book['thumbnail'] and os.path.exists(book['thumbnail']):
613
- gallery_data.append((book['thumbnail'], book['display_name']))
614
- book_names.append(book['name'])
615
-
616
- return gallery_data, book_names
617
-
618
- def handle_gallery_selection(evt: gr.SelectData):
619
- """Handle book selection from gallery click"""
620
- if evt.index is None:
621
- return "None"
622
-
623
- # Get available books to map gallery index to book name
624
- available_books = get_available_books()
625
-
626
- # Filter books that have valid thumbnails (same as in create_book_gallery)
627
- valid_books = []
628
- for book in available_books:
629
- if book['thumbnail'] and os.path.exists(book['thumbnail']):
630
- valid_books.append(book)
631
-
632
- # Check if the selected index is valid
633
- if 0 <= evt.index < len(valid_books):
634
- selected_book = valid_books[evt.index]
635
- return selected_book['name']
636
-
637
- return "None"
638
-
639
- # Create Gradio interface
640
  def create_interface():
641
- """Create the Gradio interface"""
642
-
643
  with gr.Blocks(
644
- title="Hindi RAG Voice Demo - Groq Whisper",
645
  theme=gr.themes.Soft(),
646
  css="""
647
- .main-header { text-align: center; color: #2E86AB; margin-bottom: 2rem; }
648
- .section-header { color: #A23B72; font-weight: bold; margin: 1rem 0; }
649
- .info-box { background: #F18F01; color: white; padding: 1rem; border-radius: 8px; margin: 1rem 0; }
650
  """
651
  ) as demo:
652
-
653
  gr.HTML("""
654
- <div class="main-header">
655
- <h1>📚 Hindi RAG Voice Demo - Groq Whisper</h1>
656
- <h3>हिंदी पुस्तक आवाज़ सहायक</h3>
657
- <p>AI-powered interactive book assistant with Groq Whisper API</p>
658
- <p><em>Audio transcription limited to first 10 seconds</em></p>
659
  </div>
660
  """)
661
-
662
- # Authentication section
663
  with gr.Group(visible=True) as auth_section:
664
- gr.Markdown("### 🔐 Access Control / पहुंच नियंत्रण")
665
- gr.Markdown("Please enter the passcode to access the demo / कृपया डेमो एक्सेस करने के लिए पासकोड दर्ज करें")
666
-
667
  passcode_input = gr.Textbox(
668
- label="Passcode / पासकोड",
669
  type="password",
670
- placeholder="Enter passcode here..."
671
  )
672
- auth_button = gr.Button("🔓 Access Demo / डेमो एक्सेस करें", variant="primary")
673
  auth_status = gr.Textbox(label="Status", interactive=False)
674
-
675
- # Main application section
676
  with gr.Group(visible=False) as main_section:
677
 
678
- # Session info
679
- with gr.Row():
680
- with gr.Column(scale=3):
681
- gr.Markdown("### 📊 Session Information")
682
- with gr.Column(scale=1):
683
- query_counter = gr.Textbox(
684
- label="Query Usage",
685
- value="प्रश्न: 0/5",
686
- interactive=False
687
- )
688
-
689
- # Document selection/upload section
690
- gr.Markdown("### 📁 Step 1: Choose Your Book / अपनी पुस्तक चुनें")
691
-
692
- # Book selection section
693
- with gr.Tab("📚 Select from Library / पुस्तकालय से चुनें"):
694
- gr.Markdown("**Choose from available books / उपलब्ध पुस्तकों में से चुनें**")
695
-
696
- # Initialize book gallery and dropdown
697
- available_books = get_available_books()
698
- gallery_data, book_options = create_book_gallery()
699
-
700
- if available_books:
701
- book_gallery = gr.Gallery(
702
- value=gallery_data,
703
- label="Available Books / उपलब्ध पुस्तकें",
704
- show_label=True,
705
- elem_id="book_gallery",
706
- columns=3,
707
- rows=2,
708
- height="auto",
709
- allow_preview=True
710
- )
711
-
712
- book_dropdown = gr.Dropdown(
713
- choices=book_options,
714
- label="Select Book / पुस्तक चुनें",
715
- value="None",
716
- interactive=True
717
- )
718
-
719
- select_book_btn = gr.Button("📖 Load Selected Book / चुनी गई पुस्तक लोड करें", variant="primary")
720
- else:
721
- gr.Markdown("⚠️ No books available in library / पुस्तकालय में कोई पुस्तक उपलब्ध नहीं है")
722
- book_dropdown = gr.Dropdown(choices=["None"], value="None", visible=False)
723
- select_book_btn = gr.Button("No books available", interactive=False)
724
-
725
- # PDF upload section
726
- with gr.Tab("📄 Upload PDF / PDF अपलोड करें"):
727
- gr.Markdown("**Upload your own PDF / अपनी PDF अपलोड करें**")
728
- gr.Markdown("**Note:** Please ensure your PDF contains selectable text (not scanned images)")
729
-
730
  pdf_upload = gr.File(
731
- label="Upload PDF / PDF अपलोड करें",
732
  file_types=[".pdf"],
733
  type="filepath"
734
  )
735
- process_pdf_btn = gr.Button("📖 Process PDF / PDF प्रसंस्करित करें", variant="primary")
736
-
737
- doc_status = gr.Textbox(label="Processing Status / प्रसंस्करण स्थिति", interactive=False)
738
-
739
- with gr.Row():
740
- book_title_display = gr.Textbox(label="Book Title / पुस्तक शीर्षक", interactive=False)
741
- author_display = gr.Textbox(label="Author / लेखक", interactive=False)
742
-
743
- # Query section
744
- with gr.Group(visible=False) as query_section:
745
- gr.Markdown("### 🎤 Step 2: Ask Questions / प्रश्न पूछें")
746
- gr.Markdown("**Note:** Audio recordings are limited to first 10 seconds for transcription")
747
-
748
  with gr.Row():
749
- with gr.Column():
750
- audio_input = gr.Audio(
751
- label="🎙️ Record Voice Question / आवाज़ प्रश्न रिकॉर्ड करें",
752
- sources=["microphone"],
753
- type="filepath"
754
- )
755
-
756
- with gr.Column():
757
- text_input = gr.Textbox(
758
- label="💬 Or Type Question / या प्रश्न टाइप करें",
759
- placeholder="उदाहरण: इस पुस्तक में मुख्य विषय क्या है?",
760
- lines=3
761
- )
762
 
763
- ask_button = gr.Button("🔍 Get Answer / उत्तर पाएं", variant="primary", size="lg")
 
 
 
 
 
 
 
764
 
765
- # Response section
766
- with gr.Column():
767
- response_text = gr.Textbox(
768
- label="📝 Response / उत्तर",
769
- lines=8,
770
- interactive=False
771
  )
772
-
773
- response_audio = gr.Audio(
774
- label="🔊 Audio Response / आवाज़ उत्तर",
775
- interactive=False
 
 
 
776
  )
777
-
778
- # Reset section
779
- gr.Markdown("---")
780
- with gr.Row():
781
- reset_btn = gr.Button("🔄 Start New Session / नया सत्र शुरू करें", variant="secondary")
 
 
 
 
782
 
783
- with gr.Column():
784
- gr.Markdown("""
785
- **Requirements & Limits / आवश्यकताएं और सीमा:**
786
- - PDF with selectable text (no scanned images)
787
- - Max file size: 10MB
788
- - Max queries: 5 per session
789
- - Audio transcription: First 10 seconds only
790
- - Supported: Hindi & English text
791
- - Requires: Groq API key and ffmpeg
792
- """)
793
-
794
- # Event handlers
795
  auth_button.click(
796
  authenticate,
797
  inputs=[passcode_input],
798
  outputs=[auth_section, main_section, auth_status]
799
  )
800
-
801
- # Book selection event handler
802
- if 'select_book_btn' in locals():
803
- select_book_btn.click(
804
- process_selected_book,
805
- inputs=[book_dropdown],
806
- outputs=[doc_status, book_title_display, author_display, query_section]
807
- )
808
-
809
- # Gallery selection event handler
810
- if 'book_gallery' in locals():
811
- book_gallery.select(
812
- handle_gallery_selection,
813
- outputs=[book_dropdown]
814
- )
815
-
816
- # PDF upload event handler
817
- if 'process_pdf_btn' in locals():
818
- process_pdf_btn.click(
819
- process_document,
820
- inputs=[pdf_upload],
821
- outputs=[doc_status, book_title_display, author_display, query_section]
822
- )
823
-
824
- ask_button.click(
825
  process_query,
826
- inputs=[audio_input, text_input],
827
- outputs=[response_text, response_audio, query_counter]
828
  )
829
-
 
 
 
 
 
 
 
 
 
 
 
 
830
  reset_btn.click(
831
  reset_session,
832
- outputs=[doc_status, book_title_display, author_display, query_section, query_counter]
833
  )
834
-
835
- # Load models on startup
836
  demo.load(load_models)
837
-
838
  return demo
839
 
840
- # Main function
841
  def main():
842
- """Main function to launch the application"""
843
- print("🚀 Starting Hindi RAG Voice Demo (Groq Whisper API Version)...")
844
- print("📋 Loading AI models (this may take a moment)...")
845
 
846
- # Pre-load models
847
  load_models()
848
-
849
- # Create and launch interface
850
  demo = create_interface()
851
 
852
- print("✅ Models loaded successfully!")
853
- print(f"🔑 Demo passcode: {CONFIG['PASSCODE']}")
854
- print("🌐 Launching web interface...")
855
 
856
  demo.launch(
857
  share=True,
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import os
3
  import tempfile
4
  import time
5
  import uuid
6
  from datetime import datetime
7
+ import fitz
8
  import requests
9
  import json
10
  import numpy as np
 
16
  import warnings
17
  warnings.filterwarnings("ignore")
18
 
 
19
  CONFIG = {
20
  'PASSCODE': os.getenv('PASSCODE'),
21
+ 'MAX_FILE_SIZE': 10 * 1024 * 1024,
22
+ 'MAX_QUERIES_PER_SESSION': 10,
23
+ 'MAX_AUDIO_DURATION': 120,
24
  'GROQ_API_KEY': os.getenv('GAPI'),
25
+ 'AUDIO_CLIP_DURATION': 10,
26
  'BOOK_THUMBNAILS_DIR': './book_thumbnails',
27
  'OCR_BOOKS_DIR': './ocr_books',
28
  }
29
 
 
30
  SESSION_DATA = {
31
  'authenticated': False,
32
  'session_id': str(uuid.uuid4()),
 
39
  'groq_client': None
40
  }
41
 
42
+ # Predefined questions for books
43
+ PREDEFINED_QUESTIONS = {
44
+ 'general': [
45
+ "इस पुस्तक का मुख्य विषय क्या है?",
46
+ "लेखक ने इस पुस्तक में क्या संदेश दिया है?",
47
+ "इस पुस्तक में कौन से मुख्य पात्र हैं?"
48
+ ],
49
+ 'analysis': [
50
+ "इस पुस्तक की मुख्य शिक्षा क्या है?",
51
+ "लेखक की लेखन शैली कैसी है?",
52
+ "इस पुस्तक में कौन सा मुख्य संघर्ष है?"
53
+ ],
54
+ 'content': [
55
+ "इस कहानी का क्या अंत है?",
56
+ "पुस्तक में कौन सी मुख्य घटनाएं हैं?",
57
+ "मुख्य पात्र का चरित्र कैसा है?"
58
+ ]
59
+ }
60
+
61
  def load_models():
 
62
  if SESSION_DATA['embedding_model'] is None:
63
  print("Loading embedding model...")
64
  SESSION_DATA['embedding_model'] = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
65
+
66
  if SESSION_DATA['groq_client'] is None:
67
  if CONFIG['GROQ_API_KEY']:
68
  print("Initializing Groq client...")
69
  SESSION_DATA['groq_client'] = Groq(api_key=CONFIG['GROQ_API_KEY'])
70
  else:
71
  print("Warning: GROQ_API_KEY not found")
72
+
73
  return SESSION_DATA['embedding_model'], SESSION_DATA['groq_client']
74
 
 
75
  def trim_audio_to_duration(input_path, output_path, duration=10):
 
76
  try:
 
77
  cmd = [
78
  'ffmpeg', '-i', input_path,
79
  '-t', str(duration),
80
  '-acodec', 'copy',
81
+ '-y',
82
  output_path
83
  ]
 
84
  result = subprocess.run(cmd, capture_output=True, text=True)
 
85
  if result.returncode == 0:
86
  return True
87
  else:
88
  print(f"FFmpeg error: {result.stderr}")
89
  return False
 
90
  except Exception as e:
91
  print(f"Error trimming audio: {str(e)}")
92
  return False
93
 
94
  def transcribe_audio(audio_file):
 
95
  if audio_file is None:
96
  return ""
97
+
98
  if not CONFIG['GROQ_API_KEY'] or SESSION_DATA['groq_client'] is None:
99
  return "Error: Groq API key not configured"
100
+
101
  try:
 
102
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
103
  trimmed_audio_path = tmp_file.name
104
+
 
105
  if not trim_audio_to_duration(audio_file, trimmed_audio_path, CONFIG['AUDIO_CLIP_DURATION']):
 
106
  print("Warning: Could not trim audio, using full duration")
107
  trimmed_audio_path = audio_file
108
+
 
109
  with open(trimmed_audio_path, "rb") as file:
110
  transcription = SESSION_DATA['groq_client'].audio.transcriptions.create(
111
  file=(os.path.basename(trimmed_audio_path), file.read()),
112
  model="whisper-large-v3",
113
  response_format="verbose_json",
114
+ language="hi"
115
  )
116
+
 
117
  if trimmed_audio_path != audio_file:
118
  try:
119
  os.unlink(trimmed_audio_path)
120
  except:
121
  pass
122
+
123
  return transcription.text
124
+
125
  except Exception as e:
 
126
  try:
127
  if 'trimmed_audio_path' in locals() and trimmed_audio_path != audio_file:
128
  os.unlink(trimmed_audio_path)
129
  except:
130
  pass
 
131
  return f"Transcription error: {str(e)}"
132
 
133
  def text_to_speech(text):
 
134
  if not text or len(text.strip()) == 0:
135
  return None
136
+
137
  try:
138
  tts = gTTS(text=text, lang='hi', slow=False)
 
 
139
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
140
  tts.save(tmp_file.name)
141
  return tmp_file.name
 
143
  print(f"TTS Error: {str(e)}")
144
  return None
145
 
 
146
  def extract_text_from_pdf(pdf_path):
 
147
  text_content = ""
 
148
  try:
149
  pdf_document = fitz.open(pdf_path)
150
  total_pages = len(pdf_document)
 
151
  print(f"Processing PDF with {total_pages} pages...")
152
+
 
153
  for page_num in range(total_pages):
154
  page = pdf_document.load_page(page_num)
155
  page_text = page.get_text()
 
 
156
  if page_text.strip():
157
  text_content += page_text + "\n"
158
+
 
 
159
  pdf_document.close()
160
 
161
  if not text_content.strip():
162
  return "Error: No selectable text found in PDF. Please ensure the PDF contains selectable text, not just images."
163
 
164
  return text_content
165
+
166
  except Exception as e:
167
  return f"Error extracting text: {str(e)}"
168
 
169
  def extract_metadata(text):
 
170
  lines = [line.strip() for line in text.split('\n')[:25] if line.strip()]
 
171
  author_name = "अज्ञात लेखक"
172
  book_title = "अनाम पुस्तक"
173
+
 
174
  for i, line in enumerate(lines):
 
175
  if any(word in line.lower() for word in ['लेखक', 'author', 'by', 'द्वारा', 'रचयिता']):
176
  author_name = line
 
177
  elif 10 < len(line) < 100 and not any(char.isdigit() for char in line[:20]):
178
  if book_title == "अनाम पुस्तक":
179
  book_title = line
180
+
181
  return author_name, book_title
182
 
183
  def chunk_text(text, chunk_size=400, overlap=50):
 
184
  words = text.split()
185
  chunks = []
 
186
  for i in range(0, len(words), chunk_size - overlap):
187
  chunk = ' '.join(words[i:i + chunk_size])
188
  if chunk.strip():
189
  chunks.append(chunk)
 
190
  return chunks
191
 
 
192
  def create_embeddings(chunks):
 
193
  embedding_model, _ = load_models()
194
  embeddings = embedding_model.encode(chunks, show_progress_bar=False)
 
 
195
  dimension = embeddings.shape[1]
196
  index = faiss.IndexFlatIP(dimension)
 
 
197
  faiss.normalize_L2(embeddings)
198
  index.add(embeddings.astype('float32'))
 
199
  return index
200
 
201
  def search_similar_chunks(query, top_k=3):
 
202
  if SESSION_DATA['faiss_index'] is None or not SESSION_DATA['document_chunks']:
203
  return []
204
+
205
  embedding_model, _ = load_models()
206
  query_embedding = embedding_model.encode([query], show_progress_bar=False)
207
  faiss.normalize_L2(query_embedding)
 
208
  scores, indices = SESSION_DATA['faiss_index'].search(query_embedding.astype('float32'), top_k)
209
+
210
  results = []
211
  for i, idx in enumerate(indices[0]):
212
  if idx >= 0 and idx < len(SESSION_DATA['document_chunks']):
 
214
  'text': SESSION_DATA['document_chunks'][idx],
215
  'score': float(scores[0][i])
216
  })
 
217
  return results
218
 
 
219
  def call_groq_api(prompt, model="llama-3.1-8b-instant"):
 
220
  if not CONFIG['GROQ_API_KEY'] or CONFIG['GROQ_API_KEY'] == 'your_groq_api_key_here':
221
  return "⚠️ Groq API key not configured. Please set GROQ_API_KEY environment variable."
222
+
223
  url = "https://api.groq.com/openai/v1/chat/completions"
224
  headers = {
225
  "Authorization": f"Bearer {CONFIG['GROQ_API_KEY']}",
226
  "Content-Type": "application/json"
227
  }
228
+
229
  data = {
230
  "model": model,
231
  "messages": [{"role": "user", "content": prompt}],
232
  "temperature": 0.7,
233
+ "max_tokens": 600
234
  }
235
+
236
  try:
237
  response = requests.post(url, headers=headers, json=data, timeout=30)
238
  response.raise_for_status()
 
241
  return f"Error calling LLM: {str(e)}"
242
 
243
  def generate_rag_response(query, context_chunks):
 
244
  if not context_chunks:
245
  return "मुझे इस प्रश्न का उत्तर देने के लिए पर्याप्त जानकारी नहीं मिली।"
246
+
247
  context = "\n\n".join([chunk['text'] for chunk in context_chunks])
248
+
249
  prompt = f"""आप एक हिंदी पुस्तक सहायक हैं। निम्नलिखित जानकारी के आधार पर प्रश्न का उत्तर दें:
250
 
251
  पुस्तक: {SESSION_DATA['book_title']}
 
261
  - उत्तर की शुरुआत में पुस्तक और लेखक का संदर्भ शामिल करें
262
  - केवल दिए गए संदर्भ के आधार पर ही उत्तर दें
263
  """
264
+
265
  response = call_groq_api(prompt)
266
  return response
267
 
 
268
  def authenticate(passcode):
 
269
  if passcode == CONFIG['PASSCODE']:
270
  SESSION_DATA['authenticated'] = True
271
+ return gr.update(visible=False), gr.update(visible=True), "✅ Welcome!"
272
  else:
273
+ return gr.update(visible=True), gr.update(visible=False), "❌ Invalid passcode"
274
 
 
275
  def process_document(pdf_file):
 
276
  if pdf_file is None:
277
+ return "Please upload a PDF file", "", "", gr.update(visible=True), gr.update(visible=False), gr.update(choices=[])
278
+
279
  try:
 
280
  file_size = os.path.getsize(pdf_file.name)
281
  if file_size > CONFIG['MAX_FILE_SIZE']:
282
+ return f"File too large! Max size: {CONFIG['MAX_FILE_SIZE'] // (1024*1024)}MB", "", "", gr.update(visible=True), gr.update(visible=False), gr.update(choices=[])
283
+
 
284
  text_content = extract_text_from_pdf(pdf_file.name)
 
285
  if not text_content.strip() or "Error" in text_content:
286
+ return text_content, "", "", gr.update(visible=True), gr.update(visible=False), gr.update(choices=[])
287
+
 
288
  author_name, book_title = extract_metadata(text_content)
289
  SESSION_DATA['author_name'] = author_name
290
  SESSION_DATA['book_title'] = book_title
291
+
 
292
  chunks = chunk_text(text_content)
293
  SESSION_DATA['document_chunks'] = chunks
 
 
 
294
  SESSION_DATA['faiss_index'] = create_embeddings(chunks)
 
 
295
  SESSION_DATA['query_count'] = 0
 
 
 
 
 
 
296
 
297
+ # Generate predefined questions
298
+ questions = []
299
+ for category in PREDEFINED_QUESTIONS.values():
300
+ questions.extend(category)
 
301
 
302
+ success_msg = f"✅ Document processed successfully!"
 
 
303
 
304
+ return success_msg, book_title, author_name, gr.update(visible=False), gr.update(visible=True), gr.update(choices=questions[:6])
305
+
306
  except Exception as e:
307
+ return f"Error processing document: {str(e)}", "", "", gr.update(visible=True), gr.update(visible=False), gr.update(choices=[])
308
 
309
+ def show_questions():
310
+ """Show the questions section"""
311
+ return gr.update(visible=False), gr.update(visible=True)
312
+
313
+ def process_query(audio_input, text_input, predefined_question):
314
  if SESSION_DATA['query_count'] >= CONFIG['MAX_QUERIES_PER_SESSION']:
315
+ return "⚠️ Query limit reached", None
316
+
317
  if not SESSION_DATA['document_chunks']:
318
+ return "Please upload a document first", None
319
+
 
320
  query_text = ""
321
 
322
+ # Priority: Predefined > Audio > Text
323
+ if predefined_question and predefined_question != "Select a question...":
324
+ query_text = predefined_question
325
+ elif audio_input:
326
  query_text = transcribe_audio(audio_input)
327
  if "error" in query_text.lower():
328
  query_text = ""
329
 
330
  if not query_text.strip() and text_input.strip():
331
  query_text = text_input.strip()
332
+
333
  if not query_text.strip():
334
+ return "Please ask a question", None
335
+
336
  try:
 
337
  similar_chunks = search_similar_chunks(query_text)
 
 
338
  response_text = generate_rag_response(query_text, similar_chunks)
 
 
339
  audio_response = text_to_speech(response_text)
 
 
340
  SESSION_DATA['query_count'] += 1
 
 
 
341
 
342
+ formatted_response = f"**प्रश्न:** {query_text}\n\n**उत्तर:** {response_text}"
343
+ return formatted_response, audio_response
344
 
 
 
 
 
 
 
 
 
345
  except Exception as e:
346
+ return f"Error processing query: {str(e)}", None
347
 
348
  def reset_session():
 
349
  SESSION_DATA.update({
350
  'query_count': 0,
351
  'document_chunks': [],
 
354
  'book_title': '',
355
  'session_id': str(uuid.uuid4())
356
  })
357
+ return "✅ New session started!", "", "", gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(choices=[])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
358
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
359
  def create_interface():
 
 
360
  with gr.Blocks(
361
+ title="Hindi Book Assistant",
362
  theme=gr.themes.Soft(),
363
  css="""
364
+ .main-container { max-width: 1200px; margin: 0 auto; }
365
+ .section-header { font-size: 1.2em; font-weight: bold; margin: 1em 0; }
366
+ .upload-area { border: 2px dashed #ccc; padding: 2em; text-align: center; margin: 1em 0; }
367
  """
368
  ) as demo:
369
+
370
  gr.HTML("""
371
+ <div style="text-align: center; padding: 2em;">
372
+ <h1>📚 Hindi Book Assistant</h1>
373
+ <p>AI-powered assistant for Hindi books with voice support</p>
 
 
374
  </div>
375
  """)
376
+
377
+ # Authentication Section
378
  with gr.Group(visible=True) as auth_section:
379
+ gr.Markdown("### 🔐 Enter Passcode")
 
 
380
  passcode_input = gr.Textbox(
381
+ label="Passcode",
382
  type="password",
383
+ placeholder="Enter access code..."
384
  )
385
+ auth_button = gr.Button("🔓 Access", variant="primary")
386
  auth_status = gr.Textbox(label="Status", interactive=False)
387
+
388
+ # Main Interface
389
  with gr.Group(visible=False) as main_section:
390
 
391
+ # Step 1: Upload Document
392
+ with gr.Group(visible=True) as upload_section:
393
+ gr.Markdown("### 📄 Upload Your Book")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
394
  pdf_upload = gr.File(
395
+ label="Choose PDF file",
396
  file_types=[".pdf"],
397
  type="filepath"
398
  )
399
+ process_btn = gr.Button("📖 Process Book", variant="primary", size="lg")
400
+ doc_status = gr.Textbox(label="Status", interactive=False)
401
+
402
+ # Step 2: Book Info (shown after processing)
403
+ with gr.Group(visible=False) as book_info_section:
404
+ gr.Markdown("### 📚 Book Information")
 
 
 
 
 
 
 
405
  with gr.Row():
406
+ book_title_display = gr.Textbox(label="Book Title", interactive=False)
407
+ author_display = gr.Textbox(label="Author", interactive=False)
408
+ continue_btn = gr.Button("➡️ Continue to Questions", variant="primary", size="lg")
409
+
410
+ # Step 3: Ask Questions (shown after continue)
411
+ with gr.Group(visible=False) as query_section:
412
+ gr.Markdown("### 💬 Ask Questions About Your Book")
 
 
 
 
 
 
413
 
414
+ with gr.Tab("🎯 Quick Questions"):
415
+ predefined_dropdown = gr.Dropdown(
416
+ label="Choose a question",
417
+ choices=[],
418
+ value=None,
419
+ interactive=True
420
+ )
421
+ ask_predefined_btn = gr.Button("🔍 Ask This Question", variant="primary")
422
 
423
+ with gr.Tab("🎤 Voice Question"):
424
+ audio_input = gr.Audio(
425
+ label="Record your question (Hindi/English)",
426
+ sources=["microphone"],
427
+ type="filepath"
 
428
  )
429
+ ask_voice_btn = gr.Button("🔍 Ask Voice Question", variant="primary")
430
+
431
+ with gr.Tab("⌨️ Type Question"):
432
+ text_input = gr.Textbox(
433
+ label="Type your question",
434
+ placeholder="Example: इस पुस्तक का मुख्य विषय क्या है?",
435
+ lines=2
436
  )
437
+ ask_text_btn = gr.Button("🔍 Ask Text Question", variant="primary")
438
+
439
+ # Response Section
440
+ gr.Markdown("### 📝 Answer")
441
+ response_text = gr.Textbox(
442
+ label="Response",
443
+ lines=6,
444
+ interactive=False
445
+ )
446
 
447
+ response_audio = gr.Audio(
448
+ label="🔊 Audio Response",
449
+ interactive=False
450
+ )
451
+
452
+ # Reset Button
453
+ gr.Markdown("---")
454
+ reset_btn = gr.Button("🔄 Start New Session", variant="secondary")
455
+
456
+ # Event Handlers
 
 
457
  auth_button.click(
458
  authenticate,
459
  inputs=[passcode_input],
460
  outputs=[auth_section, main_section, auth_status]
461
  )
462
+
463
+ process_btn.click(
464
+ process_document,
465
+ inputs=[pdf_upload],
466
+ outputs=[doc_status, book_title_display, author_display, upload_section, book_info_section, predefined_dropdown]
467
+ )
468
+
469
+ continue_btn.click(
470
+ show_questions,
471
+ outputs=[book_info_section, query_section]
472
+ )
473
+
474
+ ask_predefined_btn.click(
 
 
 
 
 
 
 
 
 
 
 
 
475
  process_query,
476
+ inputs=[gr.State(None), gr.State(""), predefined_dropdown],
477
+ outputs=[response_text, response_audio]
478
  )
479
+
480
+ ask_voice_btn.click(
481
+ process_query,
482
+ inputs=[audio_input, gr.State(""), gr.State("")],
483
+ outputs=[response_text, response_audio]
484
+ )
485
+
486
+ ask_text_btn.click(
487
+ process_query,
488
+ inputs=[gr.State(None), text_input, gr.State("")],
489
+ outputs=[response_text, response_audio]
490
+ )
491
+
492
  reset_btn.click(
493
  reset_session,
494
+ outputs=[doc_status, book_title_display, author_display, upload_section, book_info_section, query_section, predefined_dropdown]
495
  )
496
+
 
497
  demo.load(load_models)
498
+
499
  return demo
500
 
 
501
  def main():
502
+ print("🚀 Starting Hindi Book Assistant...")
503
+ print("📋 Loading AI models...")
 
504
 
 
505
  load_models()
 
 
506
  demo = create_interface()
507
 
508
+ print("✅ Ready!")
509
+ print(f"🔑 Passcode: {CONFIG['PASSCODE']}")
 
510
 
511
  demo.launch(
512
  share=True,