Zahid0123 commited on
Commit
c5ef678
ยท
verified ยท
1 Parent(s): e97ee06

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +720 -149
app.py CHANGED
@@ -1,38 +1,64 @@
1
  # ===================================================================
2
- # AI Research Agent - MINIMAL STABLE VERSION
3
  # ===================================================================
4
  import os
5
  import re
6
  import json
 
 
7
  import logging
8
  import requests
9
  import tempfile
10
  import time
 
11
  from pathlib import Path
12
  from typing import List, Dict, Any, Optional
13
  from datetime import datetime
 
14
 
 
15
  import numpy as np
16
  import pandas as pd
17
  from tqdm import tqdm
 
 
18
  import PyPDF2
19
  from sentence_transformers import SentenceTransformer
20
  import faiss
 
 
 
21
  from groq import Groq
 
 
22
  import gradio as gr
23
  from gtts import gTTS
 
 
 
 
 
24
 
 
 
 
 
 
25
  logging.basicConfig(level=logging.INFO)
26
  logger = logging.getLogger(__name__)
27
 
28
  # ===================================================================
29
- # WEB SEARCH
30
  # ===================================================================
 
31
  class WebSearchTool:
32
- def __init__(self):
 
 
33
  self.base_url = "https://api.duckduckgo.com/"
34
 
35
- def search(self, query: str) -> Dict[str, Any]:
 
36
  try:
37
  params = {
38
  'q': query,
@@ -41,47 +67,94 @@ class WebSearchTool:
41
  'no_html': '1',
42
  'skip_disambig': '1'
43
  }
44
- response = requests.get(self.base_url, params=params, timeout=10,
45
- headers={'User-Agent': 'Research Agent'})
46
  response.raise_for_status()
47
  data = response.json()
48
- return {
 
49
  'query': query,
50
  'abstract': data.get('Abstract', ''),
 
51
  'answer': data.get('Answer', ''),
 
52
  'results_found': bool(any([data.get('Abstract'), data.get('Answer')]))
53
  }
 
 
 
 
 
 
 
 
 
54
  except Exception as e:
55
  logger.error(f"Web search failed: {e}")
56
  return {'query': query, 'error': str(e), 'results_found': False}
57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  # ===================================================================
59
  # DOCUMENT PROCESSING
60
  # ===================================================================
 
61
  class DocumentProcessor:
 
 
 
62
  def load_documents(self, data_directory: str) -> List[Dict[str, Any]]:
63
  documents = []
64
  data_path = Path(data_directory)
65
  if not data_path.exists():
66
  return documents
67
 
68
- for file_path in data_path.rglob('*.pdf'):
 
69
  try:
70
- text = ""
71
- with open(file_path, 'rb') as f:
72
- pdf_reader = PyPDF2.PdfReader(f)
73
- for page in pdf_reader.pages:
74
- text += page.extract_text() + "\n"
75
- if text.strip():
76
- documents.append({
77
- 'doc_id': file_path.name,
78
- 'content': text,
79
- 'file_path': str(file_path)
80
- })
81
  except Exception as e:
82
  logger.error(f"Error loading {file_path}: {e}")
83
  return documents
84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  class DocumentChunker:
86
  def __init__(self, chunk_size: int = 512, chunk_overlap: int = 50):
87
  self.chunk_size = chunk_size
@@ -89,59 +162,107 @@ class DocumentChunker:
89
 
90
  def chunk_documents(self, documents: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
91
  chunks = []
92
- for doc in documents:
93
  doc_chunks = self._split_text(doc['content'])
94
  for i, chunk_text in enumerate(doc_chunks):
95
- chunks.append({
96
  'chunk_id': f"{doc['doc_id']}_chunk_{i}",
97
  'content': chunk_text,
98
- 'doc_id': doc['doc_id']
99
- })
 
 
 
 
100
  return chunks
101
 
102
  def _split_text(self, text: str) -> List[str]:
103
  text = re.sub(r'\s+', ' ', text.strip())
104
  if len(text) <= self.chunk_size:
105
  return [text]
106
-
107
  chunks = []
108
  start = 0
109
  while start < len(text):
110
- end = min(start + self.chunk_size, len(text))
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  chunks.append(text[start:end].strip())
112
  start = end - self.chunk_overlap
113
-
114
- return [c for c in chunks if len(c.strip()) > 10]
 
115
 
116
  class EmbeddingGenerator:
117
- def __init__(self):
118
- self.model = SentenceTransformer('all-MiniLM-L6-v2')
 
119
 
120
  def generate_embeddings(self, chunks: List[Dict[str, Any]]) -> np.ndarray:
121
  texts = [chunk['content'] for chunk in chunks]
122
- embeddings = self.model.encode(texts, batch_size=32, show_progress_bar=False, convert_to_numpy=True)
123
  return embeddings
124
 
125
  def get_query_embedding(self, query: str) -> np.ndarray:
126
  return self.model.encode([query], convert_to_numpy=True)[0]
127
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  # ===================================================================
129
  # RETRIEVER
130
  # ===================================================================
 
131
  class DocumentRetriever:
132
- def __init__(self):
133
- self.model = SentenceTransformer('all-MiniLM-L6-v2')
134
  self.index = None
135
  self.chunks = []
 
136
 
137
- def build_index(self, chunks: List[Dict[str, Any]], embeddings: np.ndarray):
138
  self.chunks = chunks
 
139
  embedding_dim = embeddings.shape[1]
140
  self.index = faiss.IndexFlatIP(embedding_dim)
141
- embeddings_normalized = self._normalize(embeddings)
142
  self.index.add(embeddings_normalized.astype(np.float32))
143
 
144
- def _normalize(self, embeddings: np.ndarray) -> np.ndarray:
145
  norms = np.linalg.norm(embeddings, axis=1, keepdims=True)
146
  norms[norms == 0] = 1
147
  return embeddings / norms
@@ -149,144 +270,594 @@ class DocumentRetriever:
149
  def search(self, query: str, k: int = 5) -> List[Dict[str, Any]]:
150
  if not self.index:
151
  return []
152
-
153
- query_embedding = self.model.encode([query], convert_to_numpy=True)[0]
154
- query_normalized = self._normalize(query_embedding.reshape(1, -1))
155
- scores, indices = self.index.search(query_normalized.astype(np.float32), k)
156
-
157
  results = []
158
- for score, idx in zip(scores[0], indices[0]):
159
- if idx >= 0 and idx < len(self.chunks):
160
  chunk = self.chunks[idx].copy()
161
- chunk['score'] = float(score)
162
  results.append(chunk)
163
  return results
164
 
 
165
  # ===================================================================
166
- # MAIN AGENT
167
  # ===================================================================
168
- class ResearchAgent:
 
169
  def __init__(self):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
  self.retriever = None
171
  self.groq_client = None
172
- self.web_search = WebSearchTool()
173
- self.conversation = []
174
-
175
- groq_key = os.getenv("GROQ_API_KEY")
176
- if groq_key:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  try:
178
- self.groq_client = Groq(api_key=groq_key)
179
- print("โœ… Groq API connected")
 
180
  except Exception as e:
181
- print(f"โš ๏ธ Groq not available: {e}")
182
- self.groq_client = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
 
184
- def upload_docs(self, files):
185
- if not files:
186
- return "โŒ No files uploaded"
187
-
188
  try:
189
- os.makedirs("sample_data", exist_ok=True)
190
-
191
- for file in files:
192
- if hasattr(file, 'name'):
193
- dest = os.path.join("sample_data", os.path.basename(file.name))
194
- with open(dest, "wb") as f:
195
- f.write(file.read())
196
-
197
- processor = DocumentProcessor()
198
- documents = processor.load_documents("sample_data")
199
-
200
- if not documents:
201
- return "โŒ No valid PDFs found"
202
-
203
- chunker = DocumentChunker()
204
- chunks = chunker.chunk_documents(documents)
205
-
206
- embedder = EmbeddingGenerator()
207
- embeddings = embedder.generate_embeddings(chunks)
208
-
209
- self.retriever = DocumentRetriever()
210
- self.retriever.build_index(chunks, embeddings)
211
-
212
- return f"โœ… Loaded {len(documents)} documents with {len(chunks)} chunks"
213
-
214
  except Exception as e:
215
- return f"โŒ Error: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
 
217
- def process_query(self, query: str, history):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
  if not query.strip():
219
- return history
220
-
221
- history = history or []
222
- history.append({"role": "user", "content": query})
223
-
 
 
224
  try:
225
- # Search documents
226
- answer = ""
227
- if self.retriever:
228
- docs = self.retriever.search(query, k=5)
229
- if docs:
230
- context = "\n".join([d['content'][:200] for d in docs])
231
-
232
- if self.groq_client:
233
- response = self.groq_client.chat.completions.create(
234
- model="llama-3.1-8b-instant",
235
- messages=[
236
- {"role": "system", "content": "You are a helpful research assistant."},
237
- {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {query}"}
238
- ],
239
- temperature=0.3,
240
- max_tokens=500
241
- )
242
- answer = response.choices[0].message.content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
  else:
244
- answer = f"Based on documents: {context[:300]}..."
245
- else:
246
- answer = "No relevant documents found"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
247
  else:
248
- answer = "๐Ÿ“„ Please upload PDF documents first"
249
-
250
- history.append({"role": "assistant", "content": answer})
251
-
252
  except Exception as e:
253
- history.append({"role": "assistant", "content": f"Error: {str(e)}"})
254
-
255
- return history
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
 
257
  # ===================================================================
258
- # GRADIO INTERFACE
259
  # ===================================================================
260
- def main():
261
- agent = ResearchAgent()
262
-
263
- with gr.Blocks(title="Research Agent") as demo:
264
- gr.Markdown("# ๐Ÿค– AI Research Agent")
265
-
 
 
 
 
 
 
266
  with gr.Row():
267
- with gr.Column():
268
- chatbot = gr.Chatbot(height=400)
269
- msg = gr.Textbox(label="Question", placeholder="Ask something...", lines=1)
270
- btn_send = gr.Button("Send", variant="primary")
271
- btn_clear = gr.Button("Clear")
272
-
273
- with gr.Column():
274
- gr.Markdown("### Upload Documents")
275
- file_input = gr.Files(label="PDF Files", file_types=[".pdf"])
276
- upload_status = gr.Textbox(label="Status", interactive=False, lines=4)
277
-
278
- # Events
279
- def respond(message, chat_history):
280
- return agent.process_query(message, chat_history), ""
281
-
282
- msg.submit(respond, [msg, chatbot], [chatbot, msg])
283
- btn_send.click(respond, [msg, chatbot], [chatbot, msg])
284
- btn_clear.click(lambda: [], outputs=[chatbot])
285
-
286
- file_input.change(agent.upload_docs, inputs=[file_input], outputs=[upload_status])
287
-
288
- return demo
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
 
290
  if __name__ == "__main__":
291
- app = main()
292
- app.launch(share=False)
 
 
 
 
 
 
 
 
 
 
1
  # ===================================================================
2
+ # AI Research Agent - Agentic RAG System for Hugging Face Spaces
3
  # ===================================================================
4
  import os
5
  import re
6
  import json
7
+ import ast
8
+ import operator
9
  import logging
10
  import requests
11
  import tempfile
12
  import time
13
+ import asyncio
14
  from pathlib import Path
15
  from typing import List, Dict, Any, Optional
16
  from datetime import datetime
17
+ from urllib.parse import quote_plus
18
 
19
+ # Core Libraries
20
  import numpy as np
21
  import pandas as pd
22
  from tqdm import tqdm
23
+
24
+ # ML & Embedding
25
  import PyPDF2
26
  from sentence_transformers import SentenceTransformer
27
  import faiss
28
+
29
+ # LLM & Web
30
+ import groq
31
  from groq import Groq
32
+
33
+ # UI & Voice
34
  import gradio as gr
35
  from gtts import gTTS
36
+ try:
37
+ import speech_recognition as sr
38
+ STT_AVAILABLE = True
39
+ except ImportError:
40
+ STT_AVAILABLE = False
41
 
42
+ GTTS_AVAILABLE = True
43
+
44
+ # ===================================================================
45
+ # CONFIGURATION & LOGGING
46
+ # ===================================================================
47
  logging.basicConfig(level=logging.INFO)
48
  logger = logging.getLogger(__name__)
49
 
50
  # ===================================================================
51
+ # UTILITY CLASSES
52
  # ===================================================================
53
+
54
  class WebSearchTool:
55
+ def __init__(self, max_results: int = 5, timeout: int = 10):
56
+ self.max_results = max_results
57
+ self.timeout = timeout
58
  self.base_url = "https://api.duckduckgo.com/"
59
 
60
+ def search(self, query: str, num_results: Optional[int] = None) -> Dict[str, Any]:
61
+ num_results = num_results or self.max_results
62
  try:
63
  params = {
64
  'q': query,
 
67
  'no_html': '1',
68
  'skip_disambig': '1'
69
  }
70
+ response = requests.get(self.base_url, params=params, timeout=self.timeout,
71
+ headers={'User-Agent': 'AI Research Agent 1.0'})
72
  response.raise_for_status()
73
  data = response.json()
74
+
75
+ results = {
76
  'query': query,
77
  'abstract': data.get('Abstract', ''),
78
+ 'abstract_source': data.get('AbstractSource', ''),
79
  'answer': data.get('Answer', ''),
80
+ 'related_topics': [],
81
  'results_found': bool(any([data.get('Abstract'), data.get('Answer')]))
82
  }
83
+
84
+ if 'RelatedTopics' in data:
85
+ for topic in data['RelatedTopics'][:num_results]:
86
+ if isinstance(topic, dict) and 'Text' in topic:
87
+ results['related_topics'].append({
88
+ 'text': topic.get('Text', ''),
89
+ 'url': topic.get('FirstURL', '')
90
+ })
91
+ return results
92
  except Exception as e:
93
  logger.error(f"Web search failed: {e}")
94
  return {'query': query, 'error': str(e), 'results_found': False}
95
 
96
+
97
+ class ConfigManager:
98
+ DEFAULT_CONFIG = {
99
+ 'embedding_model': 'all-MiniLM-L6-v2',
100
+ 'groq_model': 'llama-3.1-8b-instant',
101
+ 'max_iterations': 5,
102
+ 'confidence_threshold': 0.7,
103
+ 'retrieval_k': 5,
104
+ 'chunk_size': 512,
105
+ 'chunk_overlap': 50
106
+ }
107
+
108
+ @staticmethod
109
+ def load_config():
110
+ return ConfigManager.DEFAULT_CONFIG.copy()
111
+
112
+
113
  # ===================================================================
114
  # DOCUMENT PROCESSING
115
  # ===================================================================
116
+
117
  class DocumentProcessor:
118
+ def __init__(self):
119
+ self.supported_extensions = {'.txt', '.md', '.pdf'}
120
+
121
  def load_documents(self, data_directory: str) -> List[Dict[str, Any]]:
122
  documents = []
123
  data_path = Path(data_directory)
124
  if not data_path.exists():
125
  return documents
126
 
127
+ files = [f for f in data_path.rglob('*') if f.suffix.lower() in self.supported_extensions]
128
+ for file_path in tqdm(files, desc="Loading documents"):
129
  try:
130
+ content = self._extract_text(file_path)
131
+ if content.strip():
132
+ doc = {
133
+ 'doc_id': str(file_path.relative_to(data_path)),
134
+ 'content': content,
135
+ 'file_path': str(file_path),
136
+ 'file_type': file_path.suffix.lower()
137
+ }
138
+ documents.append(doc)
 
 
139
  except Exception as e:
140
  logger.error(f"Error loading {file_path}: {e}")
141
  return documents
142
 
143
+ def _extract_text(self, file_path: Path) -> str:
144
+ extension = file_path.suffix.lower()
145
+ if extension == '.txt':
146
+ with open(file_path, 'r', encoding='utf-8') as f:
147
+ return f.read()
148
+ elif extension == '.pdf':
149
+ text = ""
150
+ with open(file_path, 'rb') as f:
151
+ pdf_reader = PyPDF2.PdfReader(f)
152
+ for page in pdf_reader.pages:
153
+ text += page.extract_text() + "\n"
154
+ return text
155
+ return ""
156
+
157
+
158
  class DocumentChunker:
159
  def __init__(self, chunk_size: int = 512, chunk_overlap: int = 50):
160
  self.chunk_size = chunk_size
 
162
 
163
  def chunk_documents(self, documents: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
164
  chunks = []
165
+ for doc in tqdm(documents, desc="Chunking documents"):
166
  doc_chunks = self._split_text(doc['content'])
167
  for i, chunk_text in enumerate(doc_chunks):
168
+ chunk = {
169
  'chunk_id': f"{doc['doc_id']}_chunk_{i}",
170
  'content': chunk_text,
171
+ 'doc_id': doc['doc_id'],
172
+ 'chunk_index': i,
173
+ 'source_file': doc['file_path'],
174
+ 'file_type': doc['file_type']
175
+ }
176
+ chunks.append(chunk)
177
  return chunks
178
 
179
  def _split_text(self, text: str) -> List[str]:
180
  text = re.sub(r'\s+', ' ', text.strip())
181
  if len(text) <= self.chunk_size:
182
  return [text]
183
+
184
  chunks = []
185
  start = 0
186
  while start < len(text):
187
+ end = start + self.chunk_size
188
+ if end >= len(text):
189
+ chunks.append(text[start:])
190
+ break
191
+
192
+ chunk = text[start:end]
193
+ last_sentence = max(chunk.rfind('.'), chunk.rfind('!'), chunk.rfind('?'))
194
+ if last_sentence > start + self.chunk_size // 2:
195
+ end = start + last_sentence + 1
196
+ else:
197
+ last_space = chunk.rfind(' ')
198
+ if last_space > start + self.chunk_size // 2:
199
+ end = start + last_space
200
+
201
  chunks.append(text[start:end].strip())
202
  start = end - self.chunk_overlap
203
+
204
+ return [chunk for chunk in chunks if len(chunk.strip()) > 10]
205
+
206
 
207
  class EmbeddingGenerator:
208
+ def __init__(self, model_name: str = 'all-MiniLM-L6-v2'):
209
+ self.model_name = model_name
210
+ self.model = SentenceTransformer(model_name)
211
 
212
  def generate_embeddings(self, chunks: List[Dict[str, Any]]) -> np.ndarray:
213
  texts = [chunk['content'] for chunk in chunks]
214
+ embeddings = self.model.encode(texts, batch_size=32, show_progress_bar=True, convert_to_numpy=True)
215
  return embeddings
216
 
217
  def get_query_embedding(self, query: str) -> np.ndarray:
218
  return self.model.encode([query], convert_to_numpy=True)[0]
219
 
220
+
221
+ def build_embeddings_from_directory(data_directory: str, output_directory: str,
222
+ chunk_size: int = 512, chunk_overlap: int = 50) -> Dict[str, Any]:
223
+ os.makedirs(output_directory, exist_ok=True)
224
+ doc_processor = DocumentProcessor()
225
+ chunker = DocumentChunker(chunk_size, chunk_overlap)
226
+ embedder = EmbeddingGenerator()
227
+
228
+ documents = doc_processor.load_documents(data_directory)
229
+ if not documents:
230
+ return {}
231
+
232
+ chunks = chunker.chunk_documents(documents)
233
+ embeddings = embedder.generate_embeddings(chunks)
234
+
235
+ return {
236
+ 'chunks': chunks,
237
+ 'embeddings': embeddings,
238
+ 'metadata': {
239
+ 'num_documents': len(documents),
240
+ 'num_chunks': len(chunks),
241
+ 'embedding_dim': embeddings.shape[1]
242
+ }
243
+ }
244
+
245
+
246
  # ===================================================================
247
  # RETRIEVER
248
  # ===================================================================
249
+
250
  class DocumentRetriever:
251
+ def __init__(self, embedding_model_name: str = 'all-MiniLM-L6-v2'):
252
+ self.embedding_generator = EmbeddingGenerator(embedding_model_name)
253
  self.index = None
254
  self.chunks = []
255
+ self.embeddings = None
256
 
257
+ def build_index(self, chunks: List[Dict[str, Any]], embeddings: np.ndarray) -> None:
258
  self.chunks = chunks
259
+ self.embeddings = embeddings
260
  embedding_dim = embeddings.shape[1]
261
  self.index = faiss.IndexFlatIP(embedding_dim)
262
+ embeddings_normalized = self._normalize_embeddings(embeddings)
263
  self.index.add(embeddings_normalized.astype(np.float32))
264
 
265
+ def _normalize_embeddings(self, embeddings: np.ndarray) -> np.ndarray:
266
  norms = np.linalg.norm(embeddings, axis=1, keepdims=True)
267
  norms[norms == 0] = 1
268
  return embeddings / norms
 
270
  def search(self, query: str, k: int = 5) -> List[Dict[str, Any]]:
271
  if not self.index:
272
  return []
273
+
274
+ query_embedding = self.embedding_generator.get_query_embedding(query)
275
+ query_embedding_normalized = self._normalize_embeddings(query_embedding.reshape(1, -1))
276
+ scores, indices = self.index.search(query_embedding_normalized.astype(np.float32), k)
277
+
278
  results = []
279
+ for i, (score, idx) in enumerate(zip(scores[0], indices[0])):
280
+ if idx >= 0:
281
  chunk = self.chunks[idx].copy()
282
+ chunk.update({'similarity_score': float(score), 'rank': i + 1})
283
  results.append(chunk)
284
  return results
285
 
286
+
287
  # ===================================================================
288
+ # AGENTIC TOOLS
289
  # ===================================================================
290
+
291
+ class AgenticTools:
292
  def __init__(self):
293
+ self.tools = {
294
+ "calculator": self.calculator_tool,
295
+ "web_search": self.web_search_tool,
296
+ "fact_checker": self.fact_checker_tool,
297
+ "document_analyzer": self.document_analyzer_tool
298
+ }
299
+ self.web_search_instance = WebSearchTool()
300
+
301
+ def calculator_tool(self, expression: str) -> Dict[str, Any]:
302
+ try:
303
+ clean_expr = re.sub(r'[^0-9+\-*/().\s]', '', expression)
304
+ node = ast.parse(clean_expr, mode='eval')
305
+ result = self._eval_expr(node.body)
306
+ return {
307
+ "tool": "calculator",
308
+ "input": expression,
309
+ "result": result,
310
+ "success": True,
311
+ "explanation": f"Calculated {clean_expr} = {result}"
312
+ }
313
+ except Exception as e:
314
+ return {"tool": "calculator", "input": expression, "result": None, "success": False, "error": str(e)}
315
+
316
+ def _eval_expr(self, node):
317
+ ops = {
318
+ ast.Add: operator.add, ast.Sub: operator.sub,
319
+ ast.Mult: operator.mul, ast.Div: operator.truediv,
320
+ ast.Pow: operator.pow, ast.USub: operator.neg
321
+ }
322
+ if isinstance(node, ast.Num):
323
+ return node.n
324
+ elif isinstance(node, ast.BinOp):
325
+ return ops[type(node.op)](self._eval_expr(node.left), self._eval_expr(node.right))
326
+ elif isinstance(node, ast.UnaryOp):
327
+ return ops[type(node.op)](self._eval_expr(node.operand))
328
+ raise TypeError(node)
329
+
330
+ def web_search_tool(self, query: str) -> Dict[str, Any]:
331
+ try:
332
+ result = self.web_search_instance.search(query)
333
+ return {
334
+ "tool": "web_search",
335
+ "input": query,
336
+ "result": result,
337
+ "success": result.get('results_found', False),
338
+ "explanation": f"Found web information about: {query}"
339
+ }
340
+ except Exception as e:
341
+ return {"tool": "web_search", "input": query, "result": None, "success": False, "error": str(e)}
342
+
343
+ def fact_checker_tool(self, claim: str) -> Dict[str, Any]:
344
+ confidence = "medium"
345
+ verification = "partial"
346
+ if re.search(r'\d+', claim):
347
+ verification = "requires_calculation"
348
+ return {
349
+ "tool": "fact_checker",
350
+ "input": claim,
351
+ "result": {"verification": verification, "confidence": confidence},
352
+ "success": True
353
+ }
354
+
355
+ def document_analyzer_tool(self, text: str, analysis_type: str = "summary") -> Dict[str, Any]:
356
+ sentences = re.split(r'[.!?]+', text)[:3]
357
+ summary = '. '.join([s.strip() for s in sentences if s.strip()])
358
+ return {
359
+ "tool": "document_analyzer",
360
+ "input": f"{analysis_type} analysis",
361
+ "result": summary,
362
+ "success": True
363
+ }
364
+
365
+
366
+ class AgentPlanner:
367
+ def __init__(self):
368
+ self.planning_patterns = {
369
+ "calculation": ["calculate", "compute", "math", "percentage", "total"],
370
+ "current_info": ["latest", "recent", "current", "rate", "price", "exchange", "dollar", "currency"],
371
+ "analysis": ["analyze", "insights", "patterns", "summary"],
372
+ "fact_check": ["verify", "confirm", "accurate"]
373
+ }
374
+
375
+ def create_execution_plan(self, query: str) -> Dict[str, Any]:
376
+ query_lower = query.lower()
377
+ needed_capabilities = []
378
+ for capability, keywords in self.planning_patterns.items():
379
+ if any(keyword in query_lower for keyword in keywords):
380
+ needed_capabilities.append(capability)
381
+
382
+ steps = [{"step": 1, "tool": "document_search", "description": "Search documents", "query": query}]
383
+ step_num = 2
384
+
385
+ if "calculation" in needed_capabilities:
386
+ steps.append({"step": step_num, "tool": "calculator", "description": "Perform calculations", "depends_on": [1]})
387
+ step_num += 1
388
+ if "current_info" in needed_capabilities:
389
+ steps.append({"step": step_num, "tool": "web_search", "description": "Search web", "query": query, "depends_on": [1]})
390
+ step_num += 1
391
+ if "analysis" in needed_capabilities:
392
+ steps.append({"step": step_num, "tool": "document_analyzer", "description": "Analyze content", "depends_on": [1]})
393
+ step_num += 1
394
+
395
+ steps.append({"step": step_num, "tool": "synthesizer", "description": "Synthesize results", "depends_on": list(range(1, step_num))})
396
+
397
+ return {"query": query, "detected_needs": needed_capabilities, "steps": steps, "total_steps": len(steps)}
398
+
399
+
400
+ class ResultSynthesizer:
401
+ def __init__(self, groq_client):
402
+ self.groq_client = groq_client
403
+
404
+ def synthesize_results(self, query: str, results: Dict[str, Any], temperature: float = 0.3, max_tokens: int = 500) -> str:
405
+ context_parts = []
406
+ if "document_search" in results and results["document_search"]["success"]:
407
+ context_parts.append(f"DOCUMENTS:\n{results['document_search']['result']}")
408
+ if "web_search" in results and results["web_search"]["success"]:
409
+ web_info = results["web_search"]["result"]
410
+ web_text = f"{web_info.get('abstract', '')} {web_info.get('answer', '')}"
411
+ context_parts.append(f"WEB INFO:\n{web_text}")
412
+ if "calculator" in results and results["calculator"]["success"]:
413
+ context_parts.append(f"CALCULATION:\n{results['calculator']['result']}")
414
+
415
+ all_context = "\n\n".join(context_parts)
416
+ prompt = f"""Based on the following information, provide a comprehensive answer.
417
+ QUESTION: {query}
418
+ INFORMATION:
419
+ {all_context}
420
+ Provide a clear, direct answer synthesizing all sources."""
421
+
422
+ try:
423
+ response = self.groq_client.chat.completions.create(
424
+ model="llama-3.1-8b-instant",
425
+ messages=[
426
+ {"role": "system", "content": "You are an expert research assistant."},
427
+ {"role": "user", "content": prompt}
428
+ ],
429
+ temperature=temperature,
430
+ max_tokens=max_tokens
431
+ )
432
+ return response.choices[0].message.content.strip()
433
+ except Exception as e:
434
+ return f"Based on available information: {all_context[:500]}..."
435
+
436
+
437
+ class AgenticEvaluator:
438
+ def evaluate_response(self, query: str, response: str, tool_results: Dict[str, Any]) -> Dict[str, Any]:
439
+ successful_tools = sum(1 for r in tool_results.values() if r.get("success", False))
440
+ total_tools = len(tool_results)
441
+
442
+ confidence = min(0.8, successful_tools / max(total_tools, 1)) if successful_tools > 0 else 0.0
443
+ source_types = []
444
+ if "document_search" in tool_results and tool_results["document_search"]["success"]:
445
+ source_types.append("documents")
446
+ if "web_search" in tool_results and tool_results["web_search"]["success"]:
447
+ source_types.append("web")
448
+
449
+ return {
450
+ "confidence_score": confidence,
451
+ "completeness": "comprehensive" if successful_tools >= total_tools else "partial",
452
+ "source_diversity": len(source_types),
453
+ "recommendations": []
454
+ }
455
+
456
+
457
+ # ===================================================================
458
+ # MAIN AGENT CLASS
459
+ # ===================================================================
460
+
461
+ class AgenticRAGAgent:
462
+ def __init__(self):
463
+ self.config = ConfigManager.load_config()
464
  self.retriever = None
465
  self.groq_client = None
466
+ self.conversation_history = []
467
+
468
+ self.tools = AgenticTools()
469
+ self.planner = AgentPlanner()
470
+ self.synthesizer = None
471
+ self.evaluator = AgenticEvaluator()
472
+
473
+ self.temperature = 0.3
474
+ self.max_tokens = 500
475
+ self.chunk_size = 512
476
+ self.chunk_overlap = 50
477
+ self.retrieval_k = 8
478
+
479
+ self.enable_web_search = True
480
+ self.enable_calculations = True
481
+ self.enable_fact_checking = True
482
+ self.enable_analysis = True
483
+
484
+ # Initialize Groq
485
+ groq_api_key = os.getenv("GROQ_API_KEY")
486
+ if groq_api_key:
487
  try:
488
+ self.groq_client = Groq(api_key=groq_api_key)
489
+ self.synthesizer = ResultSynthesizer(self.groq_client)
490
+ print("โœ… Groq API configured")
491
  except Exception as e:
492
+ print(f"โŒ Error: {e}")
493
+
494
+ def clean_text_for_speech(self, text):
495
+ """Clean text for TTS"""
496
+ if not text:
497
+ return ""
498
+
499
+ # Remove markdown formatting
500
+ text = re.sub(r'\*\*([^*]+)\*\*', r'\1', text)
501
+ text = re.sub(r'\*([^*]+)\*', r'\1', text)
502
+ text = re.sub(r'^#{1,6}\s+', '', text, flags=re.MULTILINE)
503
+ text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text)
504
+ text = re.sub(r'```[^`]*```', '', text, flags=re.DOTALL)
505
+ text = re.sub(r'`([^`]+)`', r'\1', text)
506
+ text = re.sub(r'^[\s]*[-*+โ€ข]\s+', '', text, flags=re.MULTILINE)
507
+ text = re.sub(r'^[\s]*\d+\.\s+', '', text, flags=re.MULTILINE)
508
+
509
+ # Remove emojis
510
+ emoji_pattern = re.compile(
511
+ "["
512
+ "\U0001F600-\U0001F64F"
513
+ "\U0001F300-\U0001F5FF"
514
+ "\U0001F680-\U0001F6FF"
515
+ "\U0001F1E0-\U0001F1FF"
516
+ "\U00002702-\U000027B0"
517
+ "\U000024C2-\U0001F251"
518
+ "\U0001F900-\U0001F9FF"
519
+ "\U00002600-\U000026FF"
520
+ "\U00002700-\U000027BF"
521
+ "]+"
522
+ )
523
+ text = emoji_pattern.sub('', text)
524
+ text = re.sub(r'\s+', ' ', text)
525
+ text = re.sub(r'\n+', '. ', text)
526
+ text = text.strip()
527
+ text = re.sub(r'\.+', '.', text)
528
+
529
+ return text
530
+
531
+ def generate_audio_response(self, text):
532
+ """Generate audio using gTTS"""
533
+ if not text or not GTTS_AVAILABLE:
534
+ return None
535
+
536
+ clean_text = self.clean_text_for_speech(text)
537
+ if not clean_text:
538
+ return None
539
 
 
 
 
 
540
  try:
541
+ temp_dir = tempfile.gettempdir()
542
+ timestamp = int(time.time())
543
+ audio_file = os.path.join(temp_dir, f"response_{timestamp}.mp3")
544
+
545
+ tts = gTTS(text=clean_text, lang='en', slow=False)
546
+ tts.save(audio_file)
547
+ return audio_file
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
548
  except Exception as e:
549
+ logger.error(f"Audio generation failed: {e}")
550
+ return None
551
+
552
+ def is_greeting_or_casual(self, query):
553
+ query_lower = query.lower().strip()
554
+ greetings = ['hi', 'hello', 'hey', 'howdy']
555
+ return any(query_lower.startswith(g) for g in greetings) or query_lower in greetings
556
+
557
+ def get_greeting_response(self, query):
558
+ return "Hi there! ๐Ÿ‘‹ I'm AI Research Agent with agentic capabilities. Upload PDF documents and ask complex questions!"
559
+
560
+ def get_simple_answer(self, query, retrieved_docs):
561
+ if not self.groq_client:
562
+ return "Error: Groq API not configured"
563
+
564
+ context = "\n\n".join([doc.get('content', str(doc)) for doc in retrieved_docs[:5]])
565
+ prompt = f"""Based on this context, provide a clear answer.
566
+ Context: {context}
567
+ Question: {query}
568
+ Answer:"""
569
 
570
+ try:
571
+ response = self.groq_client.chat.completions.create(
572
+ model="llama-3.1-8b-instant",
573
+ messages=[
574
+ {"role": "system", "content": "You are a helpful research assistant."},
575
+ {"role": "user", "content": prompt}
576
+ ],
577
+ temperature=self.temperature,
578
+ max_tokens=self.max_tokens
579
+ )
580
+ return response.choices[0].message.content.strip()
581
+ except Exception as e:
582
+ return f"Error: {str(e)}"
583
+
584
+ async def process_agentic_query(self, query, chat_history, progress=gr.Progress()):
585
  if not query.strip():
586
+ return chat_history, "", None
587
+
588
+ if chat_history is None:
589
+ chat_history = []
590
+
591
+ chat_history.append({"role": "user", "content": query})
592
+
593
  try:
594
+ if self.is_greeting_or_casual(query):
595
+ progress(0.5, desc="Generating response...")
596
+ response = self.get_greeting_response(query)
597
+ chat_history.append({"role": "assistant", "content": response})
598
+
599
+ progress(0.8, desc="๐Ÿ”Š Generating voice...")
600
+ audio_file = self.generate_audio_response(response)
601
+
602
+ return chat_history, "", audio_file
603
+
604
+ progress(0.1, desc="๐Ÿง  Planning...")
605
+
606
+ if not self.retriever or not hasattr(self.retriever, 'index') or not self.retriever.index:
607
+ error = "๐Ÿ“„ Please upload a PDF document first!"
608
+ chat_history.append({"role": "assistant", "content": error})
609
+ audio_file = self.generate_audio_response(error)
610
+ return chat_history, "", audio_file
611
+
612
+ plan = self.planner.create_execution_plan(query)
613
+ progress(0.2, desc=f"๐Ÿ“‹ Plan: {len(plan['steps'])} steps")
614
+
615
+ results = {}
616
+ current_step = 0
617
+
618
+ for step in plan['steps']:
619
+ current_step += 1
620
+ progress_val = 0.2 + (current_step / len(plan['steps'])) * 0.6
621
+ progress(progress_val, desc=f"๐Ÿ”ง Step {current_step}: {step['description']}")
622
+
623
+ if step['tool'] == 'document_search':
624
+ retrieved_docs = self.retriever.search(query, k=self.retrieval_k)
625
+ if retrieved_docs:
626
+ doc_answer = self.get_simple_answer(query, retrieved_docs)
627
+ results['document_search'] = {"success": True, "result": doc_answer}
628
  else:
629
+ results['document_search'] = {"success": False, "result": "No relevant info"}
630
+
631
+ elif step['tool'] == 'calculator' and self.enable_calculations:
632
+ math_patterns = re.findall(r'[\d+\-*/().\s]+', query)
633
+ for expr in math_patterns:
634
+ if any(op in expr for op in ['+', '-', '*', '/']):
635
+ results['calculator'] = self.tools.calculator_tool(expr.strip())
636
+ break
637
+
638
+ elif step['tool'] == 'web_search' and self.enable_web_search:
639
+ results['web_search'] = self.tools.web_search_tool(query)
640
+
641
+ elif step['tool'] == 'document_analyzer' and self.enable_analysis:
642
+ if 'document_search' in results and results['document_search']['success']:
643
+ doc_content = results['document_search']['result']
644
+ results['document_analyzer'] = self.tools.document_analyzer_tool(doc_content, "summary")
645
+
646
+ progress(0.85, desc="๐Ÿ”ฌ Synthesizing...")
647
+
648
+ if self.synthesizer:
649
+ final_answer = self.synthesizer.synthesize_results(query, results, self.temperature, self.max_tokens)
650
+ else:
651
+ successful = [r['result'] for r in results.values() if r.get('success')]
652
+ final_answer = f"Based on available info: {' '.join(map(str, successful))}"
653
+
654
+ progress(0.9, desc="๐Ÿ“Š Evaluating...")
655
+ evaluation = self.evaluator.evaluate_response(query, final_answer, results)
656
+
657
+ eval_summary = f"\n\n๐Ÿ’ก **Analysis:**\n"
658
+ eval_summary += f"โ€ข Confidence: {evaluation['confidence_score']:.1%}\n"
659
+ eval_summary += f"โ€ข Sources: {evaluation['source_diversity']} types\n"
660
+ eval_summary += f"โ€ข Completeness: {evaluation['completeness']}"
661
+
662
+ complete_response = final_answer + eval_summary
663
+
664
+ progress(0.95, desc="๐Ÿ”Š Generating voice response...")
665
+ audio_file = self.generate_audio_response(final_answer)
666
+
667
+ chat_history.append({"role": "assistant", "content": complete_response})
668
+
669
+ self.conversation_history.append({
670
+ 'timestamp': datetime.now().isoformat(),
671
+ 'query': query,
672
+ 'response': complete_response,
673
+ 'plan': plan,
674
+ 'results': results,
675
+ 'evaluation': evaluation,
676
+ 'audio_file': audio_file
677
+ })
678
+
679
+ progress(1.0, desc="โœ… Complete!")
680
+ return chat_history, "", audio_file
681
+
682
+ except Exception as e:
683
+ error = f"โŒ Error: {str(e)}"
684
+ chat_history.append({"role": "assistant", "content": error})
685
+ return chat_history, "", None
686
+
687
+ def upload_documents(self, files, progress=gr.Progress()):
688
+ if not files:
689
+ return "No files uploaded"
690
+
691
+ try:
692
+ progress(0.1, desc="Processing files...")
693
+ os.makedirs("sample_data", exist_ok=True)
694
+
695
+ uploaded = []
696
+ for file in files:
697
+ if hasattr(file, 'name') and file.name.endswith('.pdf'):
698
+ original = os.path.basename(file.name)
699
+ dest = os.path.join("sample_data", original)
700
+ with open(dest, "wb") as dst:
701
+ dst.write(file.read())
702
+
703
+ uploaded.append(original)
704
+
705
+ if not uploaded:
706
+ return "โŒ No valid PDF files"
707
+
708
+ progress(0.5, desc="Generating embeddings...")
709
+ embeddings_data = build_embeddings_from_directory("sample_data", "temp_embeddings")
710
+
711
+ if embeddings_data and 'embeddings' in embeddings_data:
712
+ progress(0.8, desc="Building index...")
713
+ self.retriever = DocumentRetriever()
714
+ self.retriever.build_index(embeddings_data['chunks'], embeddings_data['embeddings'])
715
+
716
+ doc_count = embeddings_data.get('metadata', {}).get('num_documents', 0)
717
+ chunk_count = embeddings_data.get('metadata', {}).get('num_chunks', 0)
718
+
719
+ progress(1.0, desc="Complete!")
720
+ return f"""โœ… **Success!**
721
+ ๐Ÿ“„ Files: {', '.join(uploaded)}
722
+ ๐Ÿ“Š Documents: {doc_count} | Chunks: {chunk_count}
723
+ ๐ŸŽฏ Ready for complex questions with voice support!"""
724
  else:
725
+ return "โŒ Failed to process documents"
 
 
 
726
  except Exception as e:
727
+ return f"โŒ Error: {str(e)}"
728
+
729
+ def update_settings(self, temp, tokens, chunk_size, overlap, k, web, calc, fact, analysis):
730
+ self.temperature = temp
731
+ self.max_tokens = tokens
732
+ self.chunk_size = chunk_size
733
+ self.chunk_overlap = overlap
734
+ self.retrieval_k = k
735
+ self.enable_web_search = web
736
+ self.enable_calculations = calc
737
+ self.enable_fact_checking = fact
738
+ self.enable_analysis = analysis
739
+
740
+ return f"""โš™๏ธ Settings Updated:
741
+ โ€ข Temperature: {temp}
742
+ โ€ข Max Tokens: {tokens}
743
+ โ€ข Chunk Size: {chunk_size}
744
+ โ€ข Retrieved: {k}
745
+ โ€ข Web: {'โœ…' if web else 'โŒ'}
746
+ โ€ข Calc: {'โœ…' if calc else 'โŒ'}
747
+ โ€ข Voice Output: {'โœ…' if GTTS_AVAILABLE else 'โŒ'}"""
748
+
749
 
750
  # ===================================================================
751
+ # GRADIO INTERFACE (COMPATIBLE WITH GRADIO 4.27)
752
  # ===================================================================
753
+
754
+ def create_interface():
755
+ agent = AgenticRAGAgent()
756
+
757
+ with gr.Blocks(title="๐Ÿค– AI Research Agent", theme=gr.themes.Soft()) as interface:
758
+ gr.HTML("""
759
+ <div style="text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 15px;">
760
+ <h1 style="color: white; margin: 0;">๐Ÿค– AI Research Agent - Agentic RAG</h1>
761
+ <p style="color: white; margin: 10px 0;">Advanced Multi-Tool Research Assistant with Voice Support ๐Ÿ”Š</p>
762
+ </div>
763
+ """)
764
+
765
  with gr.Row():
766
+ with gr.Column(scale=2):
767
+ chatbot = gr.Chatbot(label="๐Ÿ’ฌ Chat", height=500)
768
+
769
+ with gr.Row():
770
+ msg = gr.Textbox(label="", placeholder="Ask a complex research question...", scale=4)
771
+ submit_btn = gr.Button("๐Ÿš€ Send", variant="primary", scale=1)
772
+
773
+ with gr.Row():
774
+ clear_btn = gr.Button("๐Ÿ—‘๏ธ Clear Chat", variant="secondary")
775
+
776
+ audio_output = gr.Audio(label="๐Ÿ”Š Voice Response", autoplay=True, interactive=False)
777
+
778
+ with gr.Column(scale=1):
779
+ with gr.Group():
780
+ gr.HTML("<h3 style='text-align: center;'>๐Ÿ“„ Upload Documents</h3>")
781
+ file_upload = gr.Files(label="", file_types=[".pdf"], file_count="multiple")
782
+ upload_status = gr.Textbox(label="๐Ÿ“Š Status", interactive=False, max_lines=10)
783
+
784
+ with gr.Accordion("โš™๏ธ Settings", open=False):
785
+ gr.HTML("<h4>๐Ÿง  AI Parameters</h4>")
786
+ temperature_slider = gr.Slider(0.0, 1.0, value=0.3, step=0.1, label="๐ŸŒก๏ธ Temperature")
787
+ max_tokens_slider = gr.Slider(100, 1000, value=500, step=50, label="๐Ÿ“ Max Tokens")
788
+
789
+ gr.HTML("<h4>๐Ÿ“„ Document Processing</h4>")
790
+ chunk_size_slider = gr.Slider(256, 1024, value=512, step=64, label="๐Ÿ“„ Chunk Size")
791
+ chunk_overlap_slider = gr.Slider(0, 100, value=50, step=10, label="๐Ÿ”— Overlap")
792
+ retrieval_k_slider = gr.Slider(3, 15, value=8, step=1, label="๐Ÿ” Retrieved Chunks")
793
+
794
+ gr.HTML("<h4>๐Ÿ› ๏ธ Agentic Tools</h4>")
795
+ with gr.Row():
796
+ enable_web = gr.Checkbox(value=True, label="๐ŸŒ Web Search")
797
+ enable_calc = gr.Checkbox(value=True, label="๐Ÿงฎ Calculator")
798
+ with gr.Row():
799
+ enable_fact = gr.Checkbox(value=True, label="โœ… Fact Check")
800
+ enable_analysis = gr.Checkbox(value=True, label="๐Ÿ“Š Analysis")
801
+
802
+ apply_btn = gr.Button("โšก Apply Settings", variant="primary", size="lg")
803
+
804
+ settings_status = gr.Textbox(label="โš™๏ธ Settings Status", interactive=False, max_lines=8)
805
+
806
+ with gr.Accordion("๐Ÿ”Š Voice Features Status", open=False):
807
+ gr.HTML(f"""
808
+ <div style="padding: 10px;">
809
+ <p><strong>Text-to-Speech (gTTS):</strong> {'โœ… Available' if GTTS_AVAILABLE else 'โŒ Not Available'}</p>
810
+ <p><strong>Speech-to-Text:</strong> {'โœ… Available' if STT_AVAILABLE else 'โŒ Not Available (HF Spaces limitation)'}</p>
811
+ <p><em>Voice output: Auto-plays with responses</em></p>
812
+ </div>
813
+ """)
814
+
815
+ # -----------------------------
816
+ # Event Handlers (Sync wrapper for async)
817
+ # -----------------------------
818
+ def process_msg(message, history):
819
+ import asyncio
820
+ try:
821
+ loop = asyncio.get_event_loop()
822
+ if loop.is_running():
823
+ future = asyncio.run_coroutine_threadsafe(agent.process_agentic_query(message, history), loop)
824
+ return future.result()
825
+ else:
826
+ return loop.run_until_complete(agent.process_agentic_query(message, history))
827
+ except RuntimeError:
828
+ return asyncio.run(agent.process_agentic_query(message, history))
829
+
830
+ submit_btn.click(process_msg, inputs=[msg, chatbot], outputs=[chatbot, msg, audio_output])
831
+ msg.submit(process_msg, inputs=[msg, chatbot], outputs=[chatbot, msg, audio_output])
832
+ clear_btn.click(lambda: [], outputs=[chatbot])
833
+
834
+ file_upload.change(agent.upload_documents, inputs=[file_upload], outputs=[upload_status])
835
+
836
+ apply_btn.click(
837
+ agent.update_settings,
838
+ inputs=[
839
+ temperature_slider, max_tokens_slider, chunk_size_slider,
840
+ chunk_overlap_slider, retrieval_k_slider, enable_web,
841
+ enable_calc, enable_fact, enable_analysis
842
+ ],
843
+ outputs=[settings_status]
844
+ )
845
+
846
+ return interface
847
+
848
+ # ===================================================================
849
+ # MAIN
850
+ # ===================================================================
851
 
852
  if __name__ == "__main__":
853
+ print("๐Ÿš€ Launching AI Research Agent on Hugging Face Spaces...")
854
+ print("โœจ Features:")
855
+ print(" โ€ข Multi-Tool Integration")
856
+ print(" โ€ข Intelligent Query Planning")
857
+ print(" โ€ข Multi-Step Reasoning")
858
+ print(" โ€ข Result Synthesis")
859
+ print(" โ€ข Quality Evaluation")
860
+ print(" โ€ข ๐Ÿ”Š Voice Output (Text-to-Speech)")
861
+
862
+ app = create_interface()
863
+ app.launch()