update openrouter model
Browse files- agent.py +2 -2
- extract_gaia_parquet.py +61 -0
- find_gaia_answers.py +46 -0
agent.py
CHANGED
|
@@ -61,7 +61,7 @@ def smart_invoke(msgs, use_tools=False, start_tier=0):
|
|
| 61 |
gemini_alternatives = ["gemini-2.5-flash", "gemini-2.0-flash", "gemini-flash-latest", "gemini-pro-latest"]
|
| 62 |
|
| 63 |
tiers_config = [
|
| 64 |
-
{"name": "OpenRouter", "key": "OPENROUTER_API_KEY", "provider": "openai", "model_name": "
|
| 65 |
{"name": "Gemini", "key": "GOOGLE_API_KEY", "provider": "google", "model_name": "gemini-2.0-flash", "alternatives": gemini_alternatives},
|
| 66 |
{"name": "Groq", "key": "GROQ_API_KEY", "provider": "groq", "model_name": "llama-3.3-70b-versatile"},
|
| 67 |
{"name": "NVIDIA", "key": "NVIDIA_API_KEY", "provider": "openai", "model_name": "meta/llama-3.3-70b-instruct", "base_url": "https://integrate.api.nvidia.com/v1"},
|
|
@@ -178,7 +178,7 @@ def wiki_search(query: str) -> str:
|
|
| 178 |
def get_vision_models():
|
| 179 |
"""Returns a list of vision models to try, in order of preference."""
|
| 180 |
configs = [
|
| 181 |
-
{"name": "OpenRouter-
|
| 182 |
{"name": "Google-Gemini-2.0-Flash", "key": "GOOGLE_API_KEY", "provider": "google", "model_name": "gemini-2.0-flash"},
|
| 183 |
{"name": "Google-Gemini-Flash-Latest", "key": "GOOGLE_API_KEY", "provider": "google", "model_name": "gemini-flash-latest"},
|
| 184 |
{"name": "NVIDIA-Vision-Llama-11b", "key": "NVIDIA_API_KEY", "provider": "openai", "model_name": "meta/llama-3.2-11b-vision-instruct", "base_url": "https://integrate.api.nvidia.com/v1"},
|
|
|
|
| 61 |
gemini_alternatives = ["gemini-2.5-flash", "gemini-2.0-flash", "gemini-flash-latest", "gemini-pro-latest"]
|
| 62 |
|
| 63 |
tiers_config = [
|
| 64 |
+
{"name": "OpenRouter", "key": "OPENROUTER_API_KEY", "provider": "openai", "model_name": "minimax/minimax-m2.5:free", "base_url": "https://openrouter.ai/api/v1"},
|
| 65 |
{"name": "Gemini", "key": "GOOGLE_API_KEY", "provider": "google", "model_name": "gemini-2.0-flash", "alternatives": gemini_alternatives},
|
| 66 |
{"name": "Groq", "key": "GROQ_API_KEY", "provider": "groq", "model_name": "llama-3.3-70b-versatile"},
|
| 67 |
{"name": "NVIDIA", "key": "NVIDIA_API_KEY", "provider": "openai", "model_name": "meta/llama-3.3-70b-instruct", "base_url": "https://integrate.api.nvidia.com/v1"},
|
|
|
|
| 178 |
def get_vision_models():
|
| 179 |
"""Returns a list of vision models to try, in order of preference."""
|
| 180 |
configs = [
|
| 181 |
+
{"name": "OpenRouter-Gemma-3-27b-it", "key": "OPENROUTER_API_KEY", "provider": "openai", "model_name": "google/gemma-3-27b-it:free", "base_url": "https://openrouter.ai/api/v1"},
|
| 182 |
{"name": "Google-Gemini-2.0-Flash", "key": "GOOGLE_API_KEY", "provider": "google", "model_name": "gemini-2.0-flash"},
|
| 183 |
{"name": "Google-Gemini-Flash-Latest", "key": "GOOGLE_API_KEY", "provider": "google", "model_name": "gemini-flash-latest"},
|
| 184 |
{"name": "NVIDIA-Vision-Llama-11b", "key": "NVIDIA_API_KEY", "provider": "openai", "model_name": "meta/llama-3.2-11b-vision-instruct", "base_url": "https://integrate.api.nvidia.com/v1"},
|
extract_gaia_parquet.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
import os
|
| 3 |
+
import pandas as pd
|
| 4 |
+
from huggingface_hub import hf_hub_download
|
| 5 |
+
from dotenv import load_dotenv
|
| 6 |
+
|
| 7 |
+
load_dotenv(override=True)
|
| 8 |
+
|
| 9 |
+
# 1. Fetch current questions
|
| 10 |
+
QUESTIONS_URL = "https://agents-course-unit4-scoring.hf.space/questions"
|
| 11 |
+
print(f"Fetching questions from {QUESTIONS_URL}...")
|
| 12 |
+
try:
|
| 13 |
+
resp = requests.get(QUESTIONS_URL)
|
| 14 |
+
resp.raise_for_status()
|
| 15 |
+
current_questions = resp.json()
|
| 16 |
+
except Exception as e:
|
| 17 |
+
print(f"Error fetching questions: {e}")
|
| 18 |
+
current_questions = []
|
| 19 |
+
|
| 20 |
+
# 2. Download ground truth via hf_hub_download
|
| 21 |
+
print("Downloading GAIA validation metadata...")
|
| 22 |
+
try:
|
| 23 |
+
token = os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN")
|
| 24 |
+
file_path = hf_hub_download(
|
| 25 |
+
repo_id="gaia-benchmark/GAIA",
|
| 26 |
+
filename="2023/validation/metadata.parquet",
|
| 27 |
+
repo_type="dataset",
|
| 28 |
+
token=token
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
# 3. Read Parquet and match
|
| 32 |
+
df = pd.read_parquet(file_path)
|
| 33 |
+
# Ensure column names match (Question, Final answer, task_id)
|
| 34 |
+
# The GAIA dataset columns are usually: task_id, Question, Final answer, etc.
|
| 35 |
+
|
| 36 |
+
print("\n--- GAIA GROUND TRUTH ANSWERS ---")
|
| 37 |
+
results = []
|
| 38 |
+
for i, q in enumerate(current_questions):
|
| 39 |
+
tid = q.get("task_id")
|
| 40 |
+
match = df[df["task_id"] == tid]
|
| 41 |
+
if not match.empty:
|
| 42 |
+
answer = match.iloc[0]["Final answer"]
|
| 43 |
+
results.append({
|
| 44 |
+
"Index": i + 1,
|
| 45 |
+
"Task ID": tid,
|
| 46 |
+
"Question": q.get("question")[:100] + "...",
|
| 47 |
+
"Answer": answer
|
| 48 |
+
})
|
| 49 |
+
print(f"{i+1}. [ID: {tid[:8]}] Answer: {answer}")
|
| 50 |
+
else:
|
| 51 |
+
print(f"{i+1}. [ID: {tid[:8]}] NOT FOUND in validation set.")
|
| 52 |
+
|
| 53 |
+
# Save to a nice CSV for the user
|
| 54 |
+
if results:
|
| 55 |
+
res_df = pd.DataFrame(results)
|
| 56 |
+
res_df.to_csv("gaia_ground_truth.csv", index=False)
|
| 57 |
+
print("\nFull list saved to 'gaia_ground_truth.csv'")
|
| 58 |
+
|
| 59 |
+
except Exception as e:
|
| 60 |
+
print(f"Error processing Parquet: {e}")
|
| 61 |
+
print("Tip: You might need 'pip install pyarrow' to read parquet files.")
|
find_gaia_answers.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
import json
|
| 3 |
+
import os
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
|
| 6 |
+
load_dotenv(override=True)
|
| 7 |
+
|
| 8 |
+
# 1. Fetch current questions from the scoring space
|
| 9 |
+
QUESTIONS_URL = "https://agents-course-unit4-scoring.hf.space/questions"
|
| 10 |
+
print(f"Fetching questions from {QUESTIONS_URL}...")
|
| 11 |
+
try:
|
| 12 |
+
resp = requests.get(QUESTIONS_URL)
|
| 13 |
+
resp.raise_for_status()
|
| 14 |
+
current_questions = resp.json()
|
| 15 |
+
except Exception as e:
|
| 16 |
+
print(f"Error fetching questions: {e}")
|
| 17 |
+
current_questions = []
|
| 18 |
+
|
| 19 |
+
# 2. Try to fetch GAIA Validation metadata from HF
|
| 20 |
+
# Note: This file is large and might be gated, but we can try common URLs
|
| 21 |
+
GAIA_VAL_URL = "https://huggingface.co/datasets/gaia-benchmark/GAIA/resolve/main/2023/validation/metadata.jsonl"
|
| 22 |
+
print(f"Fetching ground truth answers from {GAIA_VAL_URL}...")
|
| 23 |
+
# We need a token for gated datasets
|
| 24 |
+
hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN")
|
| 25 |
+
headers = {"Authorization": f"Bearer {hf_token}"} if hf_token else {}
|
| 26 |
+
|
| 27 |
+
try:
|
| 28 |
+
resp = requests.get(GAIA_VAL_URL, headers=headers)
|
| 29 |
+
if resp.status_code == 200:
|
| 30 |
+
lines = resp.text.strip().split("\n")
|
| 31 |
+
val_data = [json.loads(line) for line in lines]
|
| 32 |
+
# Create a map of task_id -> answer
|
| 33 |
+
answer_map = {item["task_id"]: item.get("Final answer") for item in val_data}
|
| 34 |
+
|
| 35 |
+
print("\n--- GAIA GROUND TRUTH ANSWERS ---")
|
| 36 |
+
for i, q in enumerate(current_questions):
|
| 37 |
+
task_id = q.get("task_id")
|
| 38 |
+
answer = answer_map.get(task_id, "NOT FOUND")
|
| 39 |
+
print(f"{i+1}. [ID: {task_id[:8]}...] Answer: {answer}")
|
| 40 |
+
print(f" Q: {q.get('question')[:80]}...")
|
| 41 |
+
print("-" * 20)
|
| 42 |
+
else:
|
| 43 |
+
print(f"Failed to fetch ground truth (Status {resp.status_code}). Likely gated or wrong URL.")
|
| 44 |
+
print("Tip: You can find them at https://huggingface.co/datasets/gaia-benchmark/GAIA/viewer/2023/validation")
|
| 45 |
+
except Exception as e:
|
| 46 |
+
print(f"Error during matching: {e}")
|