Mehedi2 commited on
Commit
8f02790
·
verified ·
1 Parent(s): a793777

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +170 -335
app.py CHANGED
@@ -1,400 +1,235 @@
1
  import os
2
- import requests
3
  import json
 
4
  import gradio as gr
5
- from typing import Dict, List, Any
6
 
 
7
  try:
8
  from datasets import load_dataset
9
  DATASETS_AVAILABLE = True
10
  except ImportError:
11
  DATASETS_AVAILABLE = False
 
12
 
13
- # Your OpenRouter API key
14
- OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY") or os.getenv("my_key")
15
-
16
  class OpenRouterLLM:
17
- def __init__(self, api_key: str, model: str = "deepseek/deepseek-v3.1-terminus"):
18
- self.api_key = api_key
19
  self.model = model
20
- self.base_url = "https://openrouter.ai/api/v1/chat/completions"
21
-
22
- def __call__(self, prompt: str, max_tokens: int = 1000, temperature: float = 0.1) -> str:
23
- """Make API call to OpenRouter"""
24
-
25
- if not self.api_key or not self.api_key.startswith('sk-or-v1-'):
26
- return "Error: Invalid OpenRouter API key"
27
 
 
 
 
 
 
28
  headers = {
29
  "Authorization": f"Bearer {self.api_key}",
30
  "Content-Type": "application/json",
31
  }
32
-
33
  payload = {
34
  "model": self.model,
35
  "messages": [
36
- {
37
- "role": "system",
38
- "content": """You are a helpful AI assistant. Provide exact, direct answers without explanations unless specifically asked.
39
-
40
- Examples:
41
- - Math: "15 + 27" → "42"
42
- - Yes/No: "Is Paris the capital of France?" → "Yes"
43
- - Facts: "Capital of Japan?" → "Tokyo"
44
-
45
- Be concise and precise."""
46
- },
47
- {
48
- "role": "user",
49
- "content": prompt
50
- }
51
- ],
52
- "temperature": temperature,
53
- "max_tokens": max_tokens,
54
  }
55
 
56
  try:
57
- response = requests.post(self.base_url, headers=headers, json=payload, timeout=30)
58
-
59
- if response.status_code != 200:
60
- return f"API Error: {response.status_code}"
61
-
62
- result = response.json()
63
-
64
- if "choices" in result and len(result["choices"]) > 0:
65
- answer = result["choices"][0]["message"]["content"].strip()
66
- return self.clean_answer(answer)
67
- else:
68
- return "Error: No response content received"
69
-
70
  except Exception as e:
71
- return f"Error: {str(e)}"
72
-
73
- def clean_answer(self, response: str) -> str:
74
- """Clean the response to extract just the answer"""
75
- response = response.strip()
76
-
77
- # Remove common prefixes
78
- prefixes = [
79
- "Answer:", "The answer is:", "Response:", "Result:",
80
- "Final answer:", "Solution:", "A:", "Answer is:",
81
- "The final answer is:", "My answer is:"
82
- ]
83
-
84
- for prefix in prefixes:
85
- if response.lower().startswith(prefix.lower()):
86
- response = response[len(prefix):].strip()
87
- break
88
-
89
- # Remove quotes
90
- if (response.startswith('"') and response.endswith('"')) or (response.startswith("'") and response.endswith("'")):
91
- response = response[1:-1]
92
-
93
- # Remove trailing periods for short answers
94
- if len(response.split()) <= 3 and response.endswith('.'):
95
- response = response[:-1]
96
-
97
- return response
98
 
 
 
 
99
  class GAIADatasetLoader:
100
- def __init__(self, api_key: str):
101
- self.llm = OpenRouterLLM(api_key=api_key)
102
- self.questions = []
103
  self.dataset = None
104
-
 
105
  def load_gaia_dataset(self):
106
- """Load GAIA dataset properly"""
107
  if not DATASETS_AVAILABLE:
108
  return "Error: datasets library not available. Install with: pip install datasets"
109
-
110
  try:
111
- # Load the GAIA dataset - it has different subsets
112
  print("Loading GAIA dataset...")
113
-
114
- # The GAIA dataset structure from HuggingFace
115
- dataset = load_dataset("gaia-benchmark/GAIA", "2023_all")
116
-
117
- # Check available splits
118
  available_splits = list(dataset.keys())
119
  print(f"Available splits: {available_splits}")
120
 
121
- # Use validation split if available, otherwise use test
122
  if "validation" in available_splits:
123
  self.dataset = dataset["validation"]
124
  elif "test" in available_splits:
125
- self.dataset = dataset["test"]
126
  else:
127
- # Use the first available split
128
  split_name = available_splits[0]
129
  self.dataset = dataset[split_name]
130
 
131
- # Convert to our format
132
  self.questions = []
133
- level_1_count = 0
134
-
135
- for i, item in enumerate(self.dataset):
136
- # Extract fields from the dataset
137
- task_id = item.get("task_id", f"gaia_{i}")
138
- question = item.get("Question", "")
139
- final_answer = item.get("Final answer", "")
140
- level = item.get("Level", 1) # Default to level 1
141
-
142
- # Only include level 1 questions for the leaderboard
143
- if level == 1 and question and final_answer:
144
- self.questions.append({
145
- "task_id": task_id,
146
- "Question": question,
147
- "Final answer": str(final_answer),
148
- "Level": level,
149
- "file_name": item.get("file_name", ""),
150
- "file_path": item.get("file_path", "")
151
- })
152
- level_1_count += 1
153
-
154
- # Limit to 20 questions for the leaderboard
155
- if level_1_count >= 20:
156
- break
157
-
158
- if not self.questions:
159
- return self.create_fallback_questions("No level 1 questions found in dataset")
160
-
161
- return f"Successfully loaded {len(self.questions)} level 1 GAIA questions"
162
 
 
 
163
  except Exception as e:
164
  print(f"Dataset loading error: {e}")
165
  return self.create_fallback_questions(str(e))
166
 
167
- def create_fallback_questions(self, error_msg: str):
168
- """Create fallback questions if dataset loading fails"""
169
  self.questions = [
170
- {"task_id": "fallback_1", "Question": "What is 15 + 27?", "Final answer": "42", "Level": 1},
171
- {"task_id": "fallback_2", "Question": "What is the capital of France?", "Final answer": "Paris", "Level": 1},
172
- {"task_id": "fallback_3", "Question": "Is 64 divisible by 8?", "Final answer": "Yes", "Level": 1},
173
- {"task_id": "fallback_4", "Question": "What is 7 × 6?", "Final answer": "42", "Level": 1},
174
- {"task_id": "fallback_5", "Question": "What color do you get when you mix red and yellow?", "Final answer": "Orange", "Level": 1},
175
- {"task_id": "fallback_6", "Question": "How many days are in a week?", "Final answer": "7", "Level": 1},
176
- {"task_id": "fallback_7", "Question": "What is the square root of 16?", "Final answer": "4", "Level": 1},
177
- {"task_id": "fallback_8", "Question": "Is the Sun a star?", "Final answer": "Yes", "Level": 1},
178
- {"task_id": "fallback_9", "Question": "What is 100 - 37?", "Final answer": "63", "Level": 1},
179
- {"task_id": "fallback_10", "Question": "What is the largest planet in our solar system?", "Final answer": "Jupiter", "Level": 1}
180
  ]
181
-
182
- return f"Dataset loading failed: {error_msg}. Using {len(self.questions)} fallback questions for testing."
183
-
184
- def run_agent(self, prompt: str) -> str:
185
- """Main function that GAIA will call"""
186
- return self.llm(prompt, max_tokens=500, temperature=0.1)
187
-
188
- def get_random_question(self):
189
- """Get a random question"""
190
- if not self.questions:
191
- return None
192
-
193
- import random
194
- return random.choice(self.questions)
195
-
196
- def evaluate_agent(self, max_questions: int = None):
197
- """Evaluate the agent on loaded questions"""
198
- if not self.questions:
199
- return {"error": "No questions loaded"}
200
-
201
- eval_questions = self.questions[:max_questions] if max_questions else self.questions
202
-
203
- results = []
204
- correct = 0
205
-
206
- for q in eval_questions:
207
- question = q["Question"]
208
- expected = q["Final answer"]
209
-
210
- agent_answer = self.run_agent(question)
211
-
212
- # Exact match comparison (case-insensitive)
213
- is_correct = agent_answer.lower().strip() == expected.lower().strip()
 
 
 
 
 
214
  if is_correct:
215
  correct += 1
216
 
217
  results.append({
218
  "task_id": q["task_id"],
219
- "question": question,
220
  "expected": expected,
221
- "agent_answer": agent_answer,
222
  "correct": is_correct
223
  })
224
 
225
- score = (correct / len(eval_questions)) * 100 if eval_questions else 0
226
-
227
- return {
228
- "score": score,
229
- "correct": correct,
230
- "total": len(eval_questions),
231
- "results": results
232
- }
233
-
234
- # Initialize the agent
235
- agent = GAIADatasetLoader(api_key=OPENROUTER_API_KEY)
236
-
237
- def run_agent(prompt: str) -> str:
238
- """Main function for GAIA evaluation - this is what gets called"""
239
- return agent.run_agent(prompt)
240
-
241
- def load_dataset_action():
242
- """Load the GAIA dataset"""
243
- return agent.load_gaia_dataset()
244
-
245
- def test_random_question():
246
- """Test with a random question"""
247
- if not agent.questions:
248
- return "Please load the dataset first"
249
-
250
- q = agent.get_random_question()
251
- if not q:
252
- return "No questions available"
253
-
254
- question = q["Question"]
255
- expected = q["Final answer"]
256
- agent_answer = agent.run_agent(question)
257
- is_correct = agent_answer.lower().strip() == expected.lower().strip()
258
-
259
- return f"""Question: {question}
260
-
261
- Expected Answer: {expected}
262
- Agent Answer: {agent_answer}
263
- Correct: {'Yes' if is_correct else 'No'}
264
-
265
- Status: {'EXACT MATCH' if is_correct else 'NO MATCH'}"""
266
-
267
- def run_full_evaluation():
268
- """Run evaluation on all loaded questions"""
269
- if not agent.questions:
270
- return "Please load the dataset first"
271
-
272
- results = agent.evaluate_agent()
273
-
274
- if "error" in results:
275
- return f"Error: {results['error']}"
276
-
277
- summary = f"""GAIA Evaluation Results:
278
-
279
- Score: {results['score']:.1f}%
280
- Correct: {results['correct']}/{results['total']}
281
-
282
- Detailed Results:
283
- """
284
-
285
- for i, result in enumerate(results['results'][:10]): # Show first 10
286
- status_icon = "✓" if result['correct'] else "✗"
287
- summary += f"\n{i+1}. [{status_icon}] {result['question'][:60]}..."
288
- summary += f"\n Expected: {result['expected']}"
289
- summary += f"\n Got: {result['agent_answer']}"
290
- summary += "\n"
291
 
292
- if len(results['results']) > 10:
293
- summary += f"\n... and {len(results['results']) - 10} more questions"
 
 
294
 
295
- return summary
296
-
297
- def create_gradio_app():
298
- with gr.Blocks(title="GAIA Dataset Agent", theme=gr.themes.Soft()) as app:
299
-
300
- gr.HTML("""
301
- <div style="text-align: center; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); color: white; padding: 20px; border-radius: 10px; margin-bottom: 20px;">
302
- <h1>GAIA Dataset Agent</h1>
303
- <p>Load and evaluate on real GAIA questions</p>
304
- </div>
305
- """)
306
-
307
- # Status indicators
308
- with gr.Row():
309
- api_status = gr.HTML(
310
- f"""<div style="padding: 10px; border-radius: 5px; {'background: lightgreen;' if OPENROUTER_API_KEY else 'background: lightcoral;'}">
311
- API Key: {'✓ Configured' if OPENROUTER_API_KEY else '✗ Missing - Set OPENROUTER_API_KEY'}
312
- </div>"""
313
- )
314
-
315
- datasets_status = gr.HTML(
316
- f"""<div style="padding: 10px; border-radius: 5px; {'background: lightgreen;' if DATASETS_AVAILABLE else 'background: lightcoral;'}">
317
- Datasets Library: {'✓ Available' if DATASETS_AVAILABLE else '✗ Missing - Install datasets'}
318
- </div>"""
319
- )
320
 
321
  with gr.Tab("1. Load Dataset"):
322
- gr.Markdown("### Load GAIA Questions from HuggingFace")
323
-
324
- load_btn = gr.Button("Load GAIA Dataset", variant="primary", size="lg")
325
- load_output = gr.Textbox(
326
- label="Loading Status",
327
- lines=5,
328
- placeholder="Click to load GAIA questions..."
329
- )
330
-
331
- load_btn.click(fn=load_dataset_action, outputs=[load_output])
332
-
333
- with gr.Tab("2. Test Single"):
334
- gr.Markdown("### Test with Random Question")
335
-
336
- test_btn = gr.Button("Test Random Question", variant="primary")
337
- test_output = gr.Textbox(
338
- label="Test Result",
339
- lines=10,
340
- placeholder="Load dataset first, then test..."
341
- )
342
-
343
- test_btn.click(fn=test_random_question, outputs=[test_output])
344
-
345
- with gr.Tab("3. Full Evaluation"):
346
- gr.Markdown("### Run Complete Evaluation")
347
-
348
- eval_btn = gr.Button("Run Full Evaluation", variant="primary", size="lg")
349
- eval_output = gr.Textbox(
350
- label="Evaluation Results",
351
- lines=20,
352
- placeholder="Load dataset first, then run evaluation...",
353
- show_copy_button=True
354
- )
355
-
356
- eval_btn.click(fn=run_full_evaluation, outputs=[eval_output])
357
-
358
- with gr.Tab("4. Manual Test"):
359
- gr.Markdown("### Test Your Own Questions")
360
-
361
- manual_input = gr.Textbox(
362
- label="Enter Question",
363
- placeholder="What is 2 + 2?",
364
- lines=2
365
- )
366
-
367
- manual_btn = gr.Button("Get Answer", variant="primary")
368
- manual_output = gr.Textbox(
369
- label="Agent Answer",
370
- lines=3
371
- )
372
-
373
- manual_btn.click(fn=run_agent, inputs=[manual_input], outputs=[manual_output])
374
-
375
- gr.Markdown("""
376
- ### Instructions:
377
- 1. **Load Dataset**: Click to download GAIA questions from HuggingFace
378
- 2. **Test Single**: Try your agent on one random question
379
- 3. **Full Evaluation**: Get your score on all loaded questions
380
- 4. **Manual Test**: Test with your own questions
381
-
382
- ### Requirements:
383
- - OpenRouter API key in Space secrets as `OPENROUTER_API_KEY`
384
- - The `datasets` library (should be in requirements.txt)
385
-
386
- ### Notes:
387
- - Uses real GAIA level 1 questions (20 max for leaderboard)
388
- - Scoring is exact match only (case-insensitive)
389
- - Your `run_agent` function is ready for GAIA API integration
390
- """)
391
-
392
- return app
393
 
394
  if __name__ == "__main__":
395
- app = create_gradio_app()
396
-
397
- if os.getenv("SPACE_ID"):
398
- app.launch(server_name="0.0.0.0", server_port=7860, show_api=False)
399
- else:
400
- app.launch(share=True, show_api=False)
 
1
  import os
2
+ import re
3
  import json
4
+ import requests
5
  import gradio as gr
 
6
 
7
+ # Try importing datasets
8
  try:
9
  from datasets import load_dataset
10
  DATASETS_AVAILABLE = True
11
  except ImportError:
12
  DATASETS_AVAILABLE = False
13
+ print("⚠️ datasets library not found. Install with: pip install datasets")
14
 
15
+ # ===============================
16
+ # 1. Your Original LLM Wrapper
17
+ # ===============================
18
  class OpenRouterLLM:
19
+ def __init__(self, api_key=None, model="deepseek/deepseek-v3.1-terminus"):
20
+ self.api_key = api_key or os.getenv("OPENROUTER_API_KEY")
21
  self.model = model
22
+ self.base_url = "https://openrouter.ai/api/v1"
 
 
 
 
 
 
23
 
24
+ if not self.api_key:
25
+ raise ValueError("Missing OpenRouter API key. Set OPENROUTER_API_KEY environment variable.")
26
+
27
+ def generate(self, prompt, system_prompt="You are a helpful AI agent."):
28
+ """Send a prompt to OpenRouter and return the model's response"""
29
  headers = {
30
  "Authorization": f"Bearer {self.api_key}",
31
  "Content-Type": "application/json",
32
  }
 
33
  payload = {
34
  "model": self.model,
35
  "messages": [
36
+ {"role": "system", "content": system_prompt},
37
+ {"role": "user", "content": prompt}
38
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  }
40
 
41
  try:
42
+ response = requests.post(
43
+ f"{self.base_url}/chat/completions",
44
+ headers=headers,
45
+ data=json.dumps(payload)
46
+ )
47
+ response.raise_for_status()
48
+ data = response.json()
49
+ return data["choices"][0]["message"]["content"].strip()
 
 
 
 
 
50
  except Exception as e:
51
+ print(f"LLM error: {e}")
52
+ return f"Error: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
+ # ===============================
55
+ # 2. GAIA Dataset Loader
56
+ # ===============================
57
  class GAIADatasetLoader:
58
+ def __init__(self):
 
 
59
  self.dataset = None
60
+ self.questions = []
61
+
62
  def load_gaia_dataset(self):
63
+ """Load GAIA dataset from Hugging Face"""
64
  if not DATASETS_AVAILABLE:
65
  return "Error: datasets library not available. Install with: pip install datasets"
66
+
67
  try:
 
68
  print("Loading GAIA dataset...")
69
+ dataset = load_dataset("gaia-benchmark/GAIA")
 
 
 
 
70
  available_splits = list(dataset.keys())
71
  print(f"Available splits: {available_splits}")
72
 
73
+ # Use validation split if available
74
  if "validation" in available_splits:
75
  self.dataset = dataset["validation"]
76
  elif "test" in available_splits:
77
+ self.dataset = dataset["test"]
78
  else:
 
79
  split_name = available_splits[0]
80
  self.dataset = dataset[split_name]
81
 
82
+ # Pick first 20 items (GAIA leaderboard setup)
83
  self.questions = []
84
+ for i, item in enumerate(self.dataset.select(range(20))):
85
+ self.questions.append({
86
+ "task_id": item["task_id"],
87
+ "Question": item["Question"],
88
+ "Final answer": str(item["Final answer"]),
89
+ "file_name": item.get("file_name", ""),
90
+ "file_path": item.get("file_path", "")
91
+ })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
+ return f"✅ Successfully loaded {len(self.questions)} GAIA questions"
94
+
95
  except Exception as e:
96
  print(f"Dataset loading error: {e}")
97
  return self.create_fallback_questions(str(e))
98
 
99
+ def create_fallback_questions(self, error_message=""):
100
+ """Fallback: create toy questions if dataset fails"""
101
  self.questions = [
102
+ {
103
+ "task_id": "test_1",
104
+ "Question": "What is 2+2?",
105
+ "Final answer": "4"
106
+ },
107
+ {
108
+ "task_id": "test_2",
109
+ "Question": "What is the capital of France?",
110
+ "Final answer": "Paris"
111
+ }
112
  ]
113
+ return f"⚠️ Using fallback questions. Error: {error_message}"
114
+
115
+ # ===============================
116
+ # 3. GAIA Agent (Evaluator)
117
+ # ===============================
118
+ class GAIAAgent:
119
+ def __init__(self, llm: OpenRouterLLM, dataset_loader: GAIADatasetLoader):
120
+ self.llm = llm
121
+ self.dataset_loader = dataset_loader
122
+
123
+ def clean_answer(self, answer: str):
124
+ """Clean model output to keep only raw answer"""
125
+ if not answer:
126
+ return ""
127
+ answer = answer.strip()
128
+ # Remove "Answer:" or "Final answer:" prefixes
129
+ answer = re.sub(r"(?i)^(final\s*answer|answer)\s*[:\-]?\s*", "", answer)
130
+ return answer.strip()
131
+
132
+ def answer_question(self, question_obj):
133
+ """Ask LLM to answer one question"""
134
+ q = question_obj["Question"]
135
+ system_prompt = (
136
+ "You are solving GAIA benchmark questions. "
137
+ "Provide ONLY the final answer, no reasoning."
138
+ )
139
+ raw_answer = self.llm.generate(q, system_prompt)
140
+ return self.clean_answer(raw_answer)
141
+
142
+ def evaluate(self):
143
+ """Evaluate all questions and compute accuracy"""
144
+ results, correct = [], 0
145
+ for q in self.dataset_loader.questions:
146
+ agent_answer = self.answer_question(q)
147
+ expected = str(q["Final answer"]).strip()
148
+
149
+ # Exact match (GAIA leaderboard scoring)
150
+ is_correct = agent_answer.strip() == expected
151
  if is_correct:
152
  correct += 1
153
 
154
  results.append({
155
  "task_id": q["task_id"],
156
+ "question": q["Question"],
157
  "expected": expected,
158
+ "answer": agent_answer,
159
  "correct": is_correct
160
  })
161
 
162
+ accuracy = correct / len(results) if results else 0
163
+ return results, accuracy
164
+
165
+ # ===============================
166
+ # 4. Gradio UI
167
+ # ===============================
168
+ def build_gradio_interface(agent, dataset_loader):
169
+ def load_dataset_ui():
170
+ return dataset_loader.load_gaia_dataset()
171
+
172
+ def test_single_question(question_text):
173
+ return agent.answer_question({"Question": question_text})
174
+
175
+ def evaluate_agent():
176
+ results, acc = agent.evaluate()
177
+ summary = f"✅ Accuracy: {acc*100:.1f}% ({sum(r['correct'] for r in results)}/{len(results)})\n\n"
178
+ for r in results:
179
+ summary += (
180
+ f"\nQ: {r['question']}\n"
181
+ f"Expected: {r['expected']} | Got: {r['answer']} | Correct: {r['correct']}\n"
182
+ )
183
+ return summary
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
 
185
+ def manual_answer_eval(question_text, expected_answer):
186
+ agent_answer = agent.answer_question({"Question": question_text})
187
+ is_correct = agent_answer.strip() == expected_answer.strip()
188
+ return f"Q: {question_text}\nExpected: {expected_answer}\nAgent: {agent_answer}\nCorrect: {is_correct}"
189
 
190
+ with gr.Blocks() as demo:
191
+ gr.Markdown("# 🤖 GAIA Agent Evaluation")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
 
193
  with gr.Tab("1. Load Dataset"):
194
+ out1 = gr.Textbox(label="Dataset Load Status")
195
+ btn1 = gr.Button("Load GAIA Dataset")
196
+ btn1.click(load_dataset_ui, outputs=out1)
197
+
198
+ with gr.Tab("2. Test Single Question"):
199
+ q_in = gr.Textbox(label="Enter a Question")
200
+ ans_out = gr.Textbox(label="Agent Answer")
201
+ btn2 = gr.Button("Get Answer")
202
+ btn2.click(test_single_question, inputs=q_in, outputs=ans_out)
203
+
204
+ with gr.Tab("3. Evaluate Full Dataset"):
205
+ out3 = gr.Textbox(label="Evaluation Results", lines=20)
206
+ btn3 = gr.Button("Run Evaluation")
207
+ btn3.click(evaluate_agent, outputs=out3)
208
+
209
+ with gr.Tab("4. Manual Evaluation"):
210
+ q_in2 = gr.Textbox(label="Question")
211
+ expected_in = gr.Textbox(label="Expected Answer")
212
+ out4 = gr.Textbox(label="Evaluation Result")
213
+ btn4 = gr.Button("Evaluate Agent Answer")
214
+ btn4.click(manual_answer_eval, inputs=[q_in2, expected_in], outputs=out4)
215
+
216
+ return demo
217
+
218
+ # ===============================
219
+ # 5. Main
220
+ # ===============================
221
+ def main():
222
+ api_key = os.getenv("OPENROUTER_API_KEY")
223
+ if not api_key:
224
+ print("⚠️ Set OPENROUTER_API_KEY before running.")
225
+ return
226
+
227
+ llm = OpenRouterLLM(api_key=api_key, model="deepseek/deepseek-v3.1-terminus")
228
+ loader = GAIADatasetLoader()
229
+ agent = GAIAAgent(llm, loader)
230
+
231
+ demo = build_gradio_interface(agent, loader)
232
+ demo.launch(share=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
 
234
  if __name__ == "__main__":
235
+ main()