Natwar commited on
Commit
0a2dccb
Β·
verified Β·
1 Parent(s): 9b1bb16

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +558 -0
app.py ADDED
@@ -0,0 +1,558 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """Omni-RAG Analyst v10 (Stable).ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1U8IVDRfGNbCZ-1UgIv9Zn0sdRL73zKH9
8
+ """
9
+
10
+ # --- 1. Dependency Installation ---
11
+ # This block checks for and installs all required libraries,
12
+ # removing the need for a requirements.txt file.
13
+
14
+ import os
15
+ import subprocess
16
+ import sys
17
+ import time
18
+
19
+ def install_dependencies():
20
+ """
21
+ Installs all necessary Python libraries for the application.
22
+ Uses -q for a quieter installation.
23
+ """
24
+ print("Starting dependency installation...")
25
+ start_time = time.time()
26
+
27
+ libraries = [
28
+ "gradio>=4.0.0",
29
+ "transformers[torch]",
30
+ "sentence-transformers",
31
+ "scikit-learn",
32
+ "faiss-cpu",
33
+ "pypdf",
34
+ "tavily-python", # Tavily Search
35
+ "google-search-results", # SerpApi Search
36
+ "openai",
37
+ "google-generativeai",
38
+ "gTTS",
39
+ "soundfile"
40
+ ]
41
+
42
+ installed_all = True
43
+ for lib in libraries:
44
+ print(f"Installing {lib}...")
45
+ try:
46
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "--disable-pip-version-check", lib])
47
+ except subprocess.CalledProcessError as e:
48
+ print(f"!!! CRITICAL: Failed to install {lib}. Error: {e}")
49
+ installed_all = False
50
+
51
+ end_time = time.time()
52
+ if installed_all:
53
+ print(f"All dependencies installed in {end_time - start_time:.2f} seconds.")
54
+ else:
55
+ print(f"!!! WARNING: One or more dependencies failed to install. The app may not run.")
56
+
57
+ # --- Run the installation ---
58
+ print("Checking for required dependencies...")
59
+ try:
60
+ import gradio
61
+ import pypdf
62
+ import faiss
63
+ import sentence_transformers
64
+ import gtts
65
+ import serpapi
66
+ print("All key dependencies seem to be satisfied.")
67
+ except ImportError:
68
+ print("Missing one or more dependencies. Running installer...")
69
+ install_dependencies()
70
+ print("\n" + "="*50)
71
+ print("INSTALLATION COMPLETE. If in a notebook, please RESTART THE KERNEL now.")
72
+ print("="*50 + "\n")
73
+
74
+
75
+ # --- 2. All Imports (Now that we know they are installed) ---
76
+ import gradio as gr
77
+ import pypdf
78
+ import faiss
79
+ import numpy as np
80
+ from transformers import pipeline
81
+ from sentence_transformers import SentenceTransformer
82
+ from sklearn.feature_extraction.text import TfidfVectorizer
83
+ from sklearn.metrics.pairwise import cosine_similarity
84
+ import torch
85
+ import openai
86
+ import google.generativeai as genai
87
+ from tavily import TavilyClient
88
+ from serpapi import GoogleSearch
89
+ from gtts import gTTS
90
+ import logging
91
+
92
+ # Set up basic logging
93
+ logging.basicConfig(level=logging.INFO)
94
+ logger = logging.getLogger(__name__)
95
+
96
+ # --- 3. COLAB-SPECIFIC: Mount Google Drive for Model Caching ---
97
+ IN_COLAB = 'google.colab' in sys.modules
98
+ MODEL_CACHE_DIR = "./hf_cache"
99
+ DRIVE_MOUNT_FAILED = False
100
+
101
+ if IN_COLAB:
102
+ print("Running in Google Colab. Mounting Google Drive for model cache...")
103
+ try:
104
+ from google.colab import drive
105
+ drive.mount('/content/drive')
106
+ MODEL_CACHE_DIR = "/content/drive/MyDrive/colab_hf_cache"
107
+ os.makedirs(MODEL_CACHE_DIR, exist_ok=True)
108
+ print(f"βœ… Google Drive mounted. Hugging Face models will be cached in: {MODEL_CACHE_DIR}")
109
+ except Exception as e:
110
+ print(f"⚠️ WARNING: Failed to mount Google Drive. Models will be re-downloaded. Error: {e}")
111
+ MODEL_CACHE_DIR = "./hf_cache"
112
+ DRIVE_MOUNT_FAILED = True
113
+ else:
114
+ print("Not running in Colab. Using local cache directory.")
115
+
116
+
117
+ # --- 4. Economic Model Loading (with Graceful Degradation & Caching) ---
118
+
119
+ logger.info(f"Loading local AI models (this may take a moment)...")
120
+ logger.info(f"Using cache directory: {MODEL_CACHE_DIR}")
121
+
122
+ # --- Summarizer & Vectorizers (Essential) ---
123
+ try:
124
+ logger.info("Loading vectorization models...")
125
+ # This call sets the global cache directory for all of Hugging Face
126
+ dense_model = SentenceTransformer(
127
+ 'all-MiniLM-L6-v2',
128
+ cache_folder=MODEL_CACHE_DIR
129
+ )
130
+ sparse_vectorizer = TfidfVectorizer()
131
+
132
+ logger.info("Loading summarizer agent...")
133
+ summarizer = pipeline(
134
+ "summarization",
135
+ model="sshleifer/distilbart-cnn-12-6",
136
+ min_length=25,
137
+ max_length=150
138
+ )
139
+ except Exception as e:
140
+ logger.error(f"CRITICAL: Failed to load essential models. The app may not work. Error: {e}")
141
+
142
+ # --- Speech-to-Text (Optional) ---
143
+ stt_enabled = False
144
+ stt_pipeline = None
145
+ try:
146
+ logger.info("Loading Speech-to-Text (Whisper) agent...")
147
+ stt_pipeline = pipeline(
148
+ "automatic-speech-recognition",
149
+ model="openai/whisper-base.en"
150
+ )
151
+ stt_enabled = True
152
+ logger.info("βœ… Local STT (Whisper) model loaded successfully. Voice input enabled.")
153
+ except Exception as e:
154
+ logger.warning(f"⚠️ WARNING: Failed to load local STT model. Voice input will be disabled. Error: {e}")
155
+
156
+ # --- 5. ETL & Vectorization Functions (Organic/Document Flow) ---
157
+
158
+ def extract_text_from_pdf(pdf_file):
159
+ if pdf_file is None: return "", "Please upload a PDF file."
160
+ try:
161
+ pdf_reader = pypdf.PdfReader(pdf_file.name)
162
+ text = "".join(page.extract_text() or "" for page in pdf_reader.pages)
163
+ return text, None
164
+ except Exception as e:
165
+ return "", f"Error reading PDF: {str(e)}"
166
+
167
+ def chunk_text(text, chunk_size=500, overlap=50):
168
+ tokens = text.split()
169
+ chunks = [" ".join(tokens[i:i + chunk_size]) for i in range(0, len(tokens), chunk_size - overlap) if " ".join(tokens[i:i + chunk_size]).strip()]
170
+ return chunks
171
+
172
+ def build_vector_stores(chunks):
173
+ if not chunks: return None, None, "No text chunks to index."
174
+ try:
175
+ logger.info(f"Building vector stores for {len(chunks)} chunks...")
176
+ embeddings_dense = dense_model.encode(chunks)
177
+ index_dense = faiss.IndexFlatL2(embeddings_dense.shape[1])
178
+ index_dense.add(np.array(embeddings_dense).astype('float32'))
179
+ sparse_vectorizer.fit(chunks)
180
+ embeddings_sparse = sparse_vectorizer.transform(chunks)
181
+ logger.info("Vector stores built successfully.")
182
+ return index_dense, embeddings_sparse, None
183
+ except Exception as e:
184
+ logger.error(f"Error building vector stores: {e}")
185
+ return None, None, f"Error building vector stores: {str(e)}"
186
+
187
+ # --- 6. RAG & Analysis Functions (Organic/Document Flow) ---
188
+
189
+ def search_dense(query, index_dense, chunks, k=3):
190
+ query_embedding = dense_model.encode([query])
191
+ _, indices = index_dense.search(np.array(query_embedding).astype('float32'), k)
192
+ return [chunks[i] for i in indices[0]]
193
+
194
+ def search_sparse(query, embeddings_sparse, chunks, k=3):
195
+ query_embedding = sparse_vectorizer.transform([query])
196
+ similarities = cosine_similarity(query_embedding, embeddings_sparse).flatten()
197
+ top_k_indices = similarities.argsort()[-k:][::-1]
198
+ return [chunks[i] for i in top_k_indices]
199
+
200
+ def search_hybrid(query, index_dense, embeddings_sparse, chunks, k=3):
201
+ dense_results = search_dense(query, index_dense, chunks, k)
202
+ sparse_results = search_sparse(query, embeddings_sparse, chunks, k)
203
+ return list(dict.fromkeys(dense_results + sparse_results))
204
+
205
+ def run_analysis_agent(retrieved_chunks):
206
+ if not retrieved_chunks: return "No data for analysis."
207
+ full_retrieved_text = " ".join(retrieved_chunks)
208
+ try:
209
+ analysis_vectorizer = TfidfVectorizer(stop_words='english', max_features=10)
210
+ tfidf_matrix = analysis_vectorizer.fit_transform([full_retrieved_text])
211
+ feature_names = analysis_vectorizer.get_feature_names_out()
212
+ scores = tfidf_matrix.toarray().flatten()
213
+ keyword_data = {"Keyword": [], "Importance Score": []}
214
+ for i in scores.argsort()[-5:][::-1]:
215
+ keyword_data["Keyword"].append(feature_names[i])
216
+ keyword_data["Importance Score"].append(round(float(scores[i]), 3))
217
+ return keyword_data
218
+ except Exception:
219
+ return "Analysis failed (not enough unique content)."
220
+
221
+ def run_summary_agent(retrieved_chunks, query):
222
+ """Summarization agent, now with truncation to prevent errors."""
223
+ if not retrieved_chunks: return "No relevant information found."
224
+ context = " ".join(retrieved_chunks)
225
+ prompt = f"Based on the following information:\n---\n{context}\n---\nPlease provide a concise answer to the query: \"{query}\""
226
+ try:
227
+ # We add truncation=True to automatically cut down
228
+ # inputs that are too long for the model (1024 tokens).
229
+ summary = summarizer(prompt, truncation=True)[0]['summary_text']
230
+ return summary
231
+ except Exception as e:
232
+ logger.error(f"Summarization agent failed: {e}")
233
+ return f"Summarization agent failed: {str(e)}"
234
+
235
+ # --- 7. Web Search Functions (Non-Organic/Web Flow) ---
236
+
237
+ def run_tavily_search_agent(query, tavily_api_key):
238
+ """Uses Tavily to search the web."""
239
+ if not tavily_api_key:
240
+ raise gr.Error("Tavily API Key is required for this search provider.")
241
+ try:
242
+ client = TavilyClient(api_key=tavily_api_key)
243
+ response = client.search(query=query, search_depth="basic")
244
+ context = "\n".join([f"Source: {res['url']}\nContent: {res['content']}" for res in response['results']])
245
+ return context
246
+ except Exception as e:
247
+ raise gr.Error(f"Tavily web search failed: {str(e)}")
248
+
249
+ def run_serpapi_search_agent(query, serpapi_api_key):
250
+ """Uses SerpApi to search the web."""
251
+ if not serpapi_api_key:
252
+ raise gr.Error("SerpApi API Key is required for this search provider.")
253
+ try:
254
+ params = {
255
+ "q": query,
256
+ "api_key": serpapi_api_key,
257
+ "engine": "google",
258
+ }
259
+ search = GoogleSearch(params)
260
+ response = search.get_dict()
261
+
262
+ snippets = []
263
+ if "answer_box" in response and "snippet" in response["answer_box"]:
264
+ snippets.append(f"Source: Google Answer Box\nContent: {response['answer_box']['snippet']}")
265
+ if "organic_results" in response:
266
+ for res in response["organic_results"][:4]:
267
+ if "snippet" in res:
268
+ snippets.append(f"Source: {res['link']}\nContent: {res['snippet']}")
269
+
270
+ if not snippets:
271
+ return "No snippets found by SerpApi for this query."
272
+
273
+ return "\n".join(snippets)
274
+ except Exception as e:
275
+ raise gr.Error(f"SerpApi web search failed: {str(e)}")
276
+
277
+
278
+ def run_llm_synthesis_agent(context, query, llm_provider, openai_key, gemini_key, openrouter_key):
279
+ system_prompt = "You are a helpful assistant. Answer the user's query based *only* on the provided context from a web search."
280
+ user_prompt = f"Here is the web search context:\n---\n{context}\n---\nNow, please answer this query: \"{query}\""
281
+
282
+ try:
283
+ if llm_provider == "OpenAI":
284
+ if not openai_key: raise gr.Error("OpenAI API Key is required.")
285
+ client = openai.OpenAI(api_key=openai_key)
286
+ response = client.chat.completions.create(
287
+ model="gpt-3.5-turbo",
288
+ messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}]
289
+ )
290
+ return response.choices[0].message.content
291
+
292
+ elif llm_provider == "Gemini":
293
+ if not gemini_key: raise gr.Error("Gemini API Key is required.")
294
+ genai.configure(api_key=gemini_key)
295
+ model = genai.GenerativeModel('gemini-pro')
296
+ full_prompt = f"{system_prompt}\n\n{user_prompt}"
297
+ response = model.generate_content(full_prompt)
298
+ return response.text
299
+
300
+ elif llm_provider == "OpenRouter":
301
+ if not openrouter_key: raise gr.Error("OpenRouter API Key is required.")
302
+ client = openai.OpenAI(
303
+ base_url="https://openrouter.ai/api/v1",
304
+ api_key=openrouter_key
305
+ )
306
+ response = client.chat.completions.create(
307
+ model="mistralai/mistral-7b-instruct:free",
308
+ messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}]
309
+ )
310
+ return response.choices[0].message.content
311
+
312
+ except Exception as e:
313
+ logger.error(f"LLM Synthesis failed for {llm_provider}: {e}")
314
+ raise gr.Error(f"LLM Synthesis failed: {str(e)}")
315
+
316
+ # --- 8. Voice I/O Functions (Economic & Robust) ---
317
+
318
+ def transcribe_audio(audio_filepath):
319
+ """Speech-to-Text: Transcribes audio file to text using small Whisper."""
320
+ if not stt_enabled or stt_pipeline is None:
321
+ gr.Warning("STT model is not loaded. Cannot transcribe audio.")
322
+ return ""
323
+ if audio_filepath is None:
324
+ return ""
325
+ try:
326
+ text = stt_pipeline(audio_filepath)["text"]
327
+ return text
328
+ except Exception as e:
329
+ gr.Warning(f"STT failed during transcription: {str(e)}")
330
+ return ""
331
+
332
+ def synthesize_speech(text):
333
+ """Text-to-Speech: Uses gTTS API (zero local compute). Fails gracefully."""
334
+ if not text:
335
+ return None, gr.Button(visible=False), gr.Audio(visible=False)
336
+ try:
337
+ tts = gTTS(text)
338
+ tts.save("response_audio.mp3")
339
+ return "response_audio.mp3", gr.Button(visible=False), gr.Audio(value="response_audio.mp3", autoplay=True, visible=True)
340
+ except Exception as e:
341
+ gr.Warning(f"TTS failed (e.g., no internet connection): {str(e)}")
342
+ return None, gr.Button(visible=False), gr.Audio(visible=False)
343
+
344
+ # --- 9. Main Gradio Functions (Controller Logic) ---
345
+
346
+ document_cache = {"filename": None, "chunks": [], "index_dense": None, "embeddings_sparse": None}
347
+
348
+ def process_document(pdf_file, progress=gr.Progress()):
349
+ if pdf_file is None:
350
+ return "Please upload a PDF.", "Ask a question...", "Analyze Query", gr.Tabs(visible=False), "Web Search"
351
+ if document_cache["filename"] == pdf_file.name:
352
+ return f"βœ… Document '{pdf_file.name}' is ready.", "Ask a question...", gr.Button(interactive=True), gr.Tabs(visible=True), "Document"
353
+
354
+ progress(0, desc="Extracting text...")
355
+ text, error = extract_text_from_pdf(pdf_file)
356
+ if error: return f"Error: {error}", "Ask a question...", gr.Button(interactive=False), gr.Tabs(visible=False), "Web Search"
357
+ progress(0.3, desc="Chunking text...")
358
+ chunks = chunk_text(text)
359
+ if not chunks:
360
+ return "Error: No text chunks found.", "Ask a question...", gr.Button(interactive=False), gr.Tabs(visible=False), "Web Search"
361
+ progress(0.6, desc=f"Building vector stores for {len(chunks)} chunks...")
362
+ index_dense, embeddings_sparse, error = build_vector_stores(chunks)
363
+ if error: return f"Error: {error}", "Ask a question...", gr.Button(interactive=False), gr.Tabs(visible=False), "Web Search"
364
+
365
+ document_cache.update({"filename": pdf_file.name, "chunks": chunks, "index_dense": index_dense, "embeddings_sparse": embeddings_sparse})
366
+ status = f"βœ… Success: Indexed '{pdf_file.name}'. Ready to chat."
367
+
368
+ return status, "Ask a question about the document...", gr.Button(interactive=True), gr.Tabs(visible=True), "Document"
369
+
370
+
371
+ def run_main_query(query, search_type, query_source,
372
+ openai_key, gemini_key, openrouter_key,
373
+ search_provider, tavily_key, serpapi_key,
374
+ llm_provider):
375
+ if not query:
376
+ raise gr.Error("Please enter a query.")
377
+
378
+ yield "Processing...", None, None, gr.Button(visible=False), gr.Audio(visible=False)
379
+
380
+ try:
381
+ if query_source == "Document":
382
+ if not document_cache["index_dense"]:
383
+ raise gr.Error("Please upload and process a document first.")
384
+ yield "1. πŸ’¬ Running 'Research Agent' on document...", None, None, gr.Button(visible=False), gr.Audio(visible=False)
385
+ if search_type == "Hybrid (Recommended)":
386
+ chunks = search_hybrid(query, document_cache["index_dense"], document_cache["embeddings_sparse"], document_cache["chunks"])
387
+ elif search_type == "Dense (Semantic)":
388
+ chunks = search_dense(query, document_cache["index_dense"], document_cache["chunks"])
389
+ else:
390
+ chunks = search_sparse(query, document_cache["embeddings_sparse"], document_cache["chunks"])
391
+
392
+ yield "2. 🧠 Running 'Summary Agent' (local)...", None, None, gr.Button(visible=False), gr.Audio(visible=False)
393
+ answer = run_summary_agent(chunks, query)
394
+ yield "3. πŸ“Š Running 'Analysis Agent' (local)...", answer, None, gr.Button(visible=False), gr.Audio(visible=False)
395
+ analysis = run_analysis_agent(chunks)
396
+ yield "βœ… Document query complete.", answer, analysis, gr.Button(visible=True, interactive=True), gr.Audio(visible=False)
397
+
398
+ else:
399
+ yield f"1. πŸ’¬ Running 'Web Search Agent' ({search_provider})...", None, None, gr.Button(visible=False), gr.Audio(visible=False)
400
+
401
+ if search_provider == "Tavily":
402
+ web_context = run_tavily_search_agent(query, tavily_key)
403
+ elif search_provider == "SerpApi":
404
+ web_context = run_serpapi_search_agent(query, serpapi_key)
405
+ else:
406
+ raise gr.Error("Invalid search provider selected.")
407
+
408
+ yield f"2. 🧠 Running 'Web Synthesis Agent' ({llm_provider})...", None, None, gr.Button(visible=False), gr.Audio(visible=False)
409
+ answer = run_llm_synthesis_agent(web_context, query, llm_provider, openai_key, gemini_key, openrouter_key)
410
+ yield "βœ… Web query complete.", answer, None, gr.Button(visible=True, interactive=True), gr.Audio(visible=False)
411
+
412
+ except gr.Error as e:
413
+ yield f"Error: {e}", None, None, gr.Button(visible=False), gr.Audio(visible=False)
414
+ except Exception as e:
415
+ logger.error(f"An unexpected error occurred: {e}")
416
+ yield f"An unexpected error occurred: {str(e)}", None, None, gr.Button(visible=False), gr.Audio(visible=False)
417
+
418
+
419
+ # --- 10. Gradio Interface Definition ---
420
+
421
+ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="orange")) as demo:
422
+
423
+ gr.Markdown(
424
+ """
425
+ # πŸš€ Omni-RAG Analyst v10 (Stable)
426
+ *A multi-source, multi-modal demo by **Natwar Upadhyay***
427
+ *OCI Data Science & AI Vector Search Certified Professional*
428
+
429
+ ### What problem does this solve?
430
+ Generic chatbots give generic answers. This tool gives you answers based on **specific information** from two sources:
431
+ 1. **Your Documents (Organic):** Upload a PDF to chat with your own data.
432
+ 2. **The Live Web (Non-Organic):** Connects to Google (via SerpApi) or Tavily to answer up-to-the-minute questions.
433
+
434
+ It showcases a full **ETL -> Vector Search -> RAG** pipeline using economic, resource-friendly models.
435
+ """
436
+ )
437
+
438
+ if IN_COLAB and DRIVE_MOUNT_FAILED:
439
+ gr.Markdown(
440
+ """
441
+ <div style="background-color: #FFF3CD; border: 1px solid #FFEEBA; padding: 10px; border-radius: 5px;">
442
+ ⚠️ **Google Drive Mount Failed:** Your Colab session couldn't connect to Google Drive (you may need to grant permissions).
443
+ The app will still work, but the large AI models (2GB+) will be **re-downloaded** for this session.
444
+ </div>
445
+ """
446
+ )
447
+
448
+ with gr.Accordion("Step 1: API Key Configuration (Required for Web Search)", open=False):
449
+ gr.Markdown(
450
+ """
451
+ To use the **Web Search** feature, you need API keys for **one** Search Provider and **one** LLM Synthesis provider.
452
+ """
453
+ )
454
+ with gr.Row():
455
+ with gr.Column():
456
+ gr.Markdown("#### (A) Search Provider Keys")
457
+ search_provider_dropdown = gr.Dropdown(
458
+ label="Choose Search Provider",
459
+ choices=["Tavily", "SerpApi"],
460
+ value="Tavily"
461
+ )
462
+ tavily_key_box = gr.Textbox(label="Tavily API Key", placeholder="tvly-...", type="password")
463
+ serpapi_key_box = gr.Textbox(label="SerpApi API Key", placeholder="...", type="password")
464
+ with gr.Column():
465
+ gr.Markdown("#### (B) LLM Synthesis Keys")
466
+ llm_provider_dropdown = gr.Dropdown(
467
+ label="Choose LLM Provider",
468
+ choices=["OpenAI", "Gemini", "OpenRouter"],
469
+ value="OpenAI"
470
+ )
471
+ openai_key_box = gr.Textbox(label="OpenAI API Key", placeholder="sk-...", type="password")
472
+ gemini_key_box = gr.Textbox(label="Gemini API Key", placeholder="AIzaSy...", type="password")
473
+ openrouter_key_box = gr.Textbox(label="OpenRouter API Key", placeholder="sk-or-...", type="password")
474
+
475
+
476
+ with gr.Row():
477
+ with gr.Column(scale=1):
478
+ gr.Markdown("### Step 2: Load Document (For 'Document' Source)")
479
+ pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"])
480
+ upload_status = gr.Textbox(label="Processing Status", interactive=False, lines=3)
481
+
482
+ with gr.Column(scale=2):
483
+ gr.Markdown("### Step 3: Configure & Query")
484
+
485
+ stt_audio = gr.Audio(
486
+ label="πŸŽ™οΈ Record Query (or type below)",
487
+ sources=["microphone"],
488
+ type="filepath",
489
+ visible=stt_enabled
490
+ )
491
+
492
+ if not stt_enabled:
493
+ gr.Markdown("*(Local voice input (STT) failed to load. Please type your query.)*")
494
+
495
+ query_box = gr.Textbox(label="Query", placeholder="Ask a question...", interactive=True)
496
+
497
+ with gr.Row():
498
+ query_source_radio = gr.Radio(
499
+ label="Query Source",
500
+ choices=["Document", "Web Search"],
501
+ value="Web Search",
502
+ interactive=True
503
+ )
504
+ search_type_dropdown = gr.Dropdown(
505
+ label="Document Search Strategy",
506
+ choices=["Hybrid (Recommended)", "Dense (Semantic)", "Sparse (Keyword)"],
507
+ value="Hybrid (Recommended)",
508
+ info=" (Only applies if 'Document' is selected)"
509
+ )
510
+
511
+ analyze_button = gr.Button("Analyze Query", variant="primary", interactive=True)
512
+
513
+ with gr.Tabs(visible=True) as result_tabs:
514
+ with gr.TabItem("Synthesized Answer"):
515
+ answer_output = gr.Textbox(label="Answer (from AI Agent)", lines=5)
516
+ speak_button = gr.Button("πŸ”Š Speak Answer", visible=False)
517
+ audio_output = gr.Audio(label="AI Voice Output", autoplay=False, visible=False, type="filepath")
518
+
519
+ with gr.TabItem("Document Context Analysis"):
520
+ analysis_output = gr.Dataframe(label="Keyword Analysis (from 'Analysis Agent')")
521
+ gr.Markdown("*This tab only populates when 'Document' is the query source.*")
522
+
523
+ # --- 11. Wire up the components ---
524
+
525
+ stt_audio.stop_recording(
526
+ fn=transcribe_audio,
527
+ inputs=[stt_audio],
528
+ outputs=[query_box]
529
+ )
530
+
531
+ pdf_upload.upload(
532
+ fn=process_document,
533
+ inputs=[pdf_upload],
534
+ outputs=[upload_status, query_box, analyze_button, result_tabs, query_source_radio],
535
+ show_progress="full"
536
+ )
537
+
538
+ analyze_button.click(
539
+ fn=run_main_query,
540
+ inputs=[
541
+ query_box, search_type_dropdown, query_source_radio,
542
+ openai_key_box, gemini_key_box, openrouter_key_box,
543
+ search_provider_dropdown, tavily_key_box, serpapi_key_box,
544
+ llm_provider_dropdown
545
+ ],
546
+ # --- THIS IS THE FIX ---
547
+ outputs=[upload_status, answer_output, analysis_output, speak_button, audio_output]
548
+ )
549
+
550
+ speak_button.click(
551
+ fn=synthesize_speech,
552
+ inputs=[answer_output],
553
+ outputs=[audio_output, speak_button, audio_output]
554
+ )
555
+
556
+ if __name__ == "__main__":
557
+ demo.launch(debug=True)
558
+