Paperbag commited on
Commit
8d79810
·
1 Parent(s): 85f2d49

update openrouter model

Browse files
Files changed (3) hide show
  1. agent.py +2 -2
  2. extract_gaia_parquet.py +61 -0
  3. find_gaia_answers.py +46 -0
agent.py CHANGED
@@ -61,7 +61,7 @@ def smart_invoke(msgs, use_tools=False, start_tier=0):
61
  gemini_alternatives = ["gemini-2.5-flash", "gemini-2.0-flash", "gemini-flash-latest", "gemini-pro-latest"]
62
 
63
  tiers_config = [
64
- {"name": "OpenRouter", "key": "OPENROUTER_API_KEY", "provider": "openai", "model_name": "meta-llama/llama-3.3-70b-instruct", "base_url": "https://openrouter.ai/api/v1"},
65
  {"name": "Gemini", "key": "GOOGLE_API_KEY", "provider": "google", "model_name": "gemini-2.0-flash", "alternatives": gemini_alternatives},
66
  {"name": "Groq", "key": "GROQ_API_KEY", "provider": "groq", "model_name": "llama-3.3-70b-versatile"},
67
  {"name": "NVIDIA", "key": "NVIDIA_API_KEY", "provider": "openai", "model_name": "meta/llama-3.3-70b-instruct", "base_url": "https://integrate.api.nvidia.com/v1"},
@@ -178,7 +178,7 @@ def wiki_search(query: str) -> str:
178
  def get_vision_models():
179
  """Returns a list of vision models to try, in order of preference."""
180
  configs = [
181
- {"name": "OpenRouter-Gemini-2.0", "key": "OPENROUTER_API_KEY", "provider": "openai", "model_name": "google/gemini-2.0-flash-001", "base_url": "https://openrouter.ai/api/v1"},
182
  {"name": "Google-Gemini-2.0-Flash", "key": "GOOGLE_API_KEY", "provider": "google", "model_name": "gemini-2.0-flash"},
183
  {"name": "Google-Gemini-Flash-Latest", "key": "GOOGLE_API_KEY", "provider": "google", "model_name": "gemini-flash-latest"},
184
  {"name": "NVIDIA-Vision-Llama-11b", "key": "NVIDIA_API_KEY", "provider": "openai", "model_name": "meta/llama-3.2-11b-vision-instruct", "base_url": "https://integrate.api.nvidia.com/v1"},
 
61
  gemini_alternatives = ["gemini-2.5-flash", "gemini-2.0-flash", "gemini-flash-latest", "gemini-pro-latest"]
62
 
63
  tiers_config = [
64
+ {"name": "OpenRouter", "key": "OPENROUTER_API_KEY", "provider": "openai", "model_name": "minimax/minimax-m2.5:free", "base_url": "https://openrouter.ai/api/v1"},
65
  {"name": "Gemini", "key": "GOOGLE_API_KEY", "provider": "google", "model_name": "gemini-2.0-flash", "alternatives": gemini_alternatives},
66
  {"name": "Groq", "key": "GROQ_API_KEY", "provider": "groq", "model_name": "llama-3.3-70b-versatile"},
67
  {"name": "NVIDIA", "key": "NVIDIA_API_KEY", "provider": "openai", "model_name": "meta/llama-3.3-70b-instruct", "base_url": "https://integrate.api.nvidia.com/v1"},
 
178
  def get_vision_models():
179
  """Returns a list of vision models to try, in order of preference."""
180
  configs = [
181
+ {"name": "OpenRouter-Gemma-3-27b-it", "key": "OPENROUTER_API_KEY", "provider": "openai", "model_name": "google/gemma-3-27b-it:free", "base_url": "https://openrouter.ai/api/v1"},
182
  {"name": "Google-Gemini-2.0-Flash", "key": "GOOGLE_API_KEY", "provider": "google", "model_name": "gemini-2.0-flash"},
183
  {"name": "Google-Gemini-Flash-Latest", "key": "GOOGLE_API_KEY", "provider": "google", "model_name": "gemini-flash-latest"},
184
  {"name": "NVIDIA-Vision-Llama-11b", "key": "NVIDIA_API_KEY", "provider": "openai", "model_name": "meta/llama-3.2-11b-vision-instruct", "base_url": "https://integrate.api.nvidia.com/v1"},
extract_gaia_parquet.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import os
3
+ import pandas as pd
4
+ from huggingface_hub import hf_hub_download
5
+ from dotenv import load_dotenv
6
+
7
+ load_dotenv(override=True)
8
+
9
+ # 1. Fetch current questions
10
+ QUESTIONS_URL = "https://agents-course-unit4-scoring.hf.space/questions"
11
+ print(f"Fetching questions from {QUESTIONS_URL}...")
12
+ try:
13
+ resp = requests.get(QUESTIONS_URL)
14
+ resp.raise_for_status()
15
+ current_questions = resp.json()
16
+ except Exception as e:
17
+ print(f"Error fetching questions: {e}")
18
+ current_questions = []
19
+
20
+ # 2. Download ground truth via hf_hub_download
21
+ print("Downloading GAIA validation metadata...")
22
+ try:
23
+ token = os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN")
24
+ file_path = hf_hub_download(
25
+ repo_id="gaia-benchmark/GAIA",
26
+ filename="2023/validation/metadata.parquet",
27
+ repo_type="dataset",
28
+ token=token
29
+ )
30
+
31
+ # 3. Read Parquet and match
32
+ df = pd.read_parquet(file_path)
33
+ # Ensure column names match (Question, Final answer, task_id)
34
+ # The GAIA dataset columns are usually: task_id, Question, Final answer, etc.
35
+
36
+ print("\n--- GAIA GROUND TRUTH ANSWERS ---")
37
+ results = []
38
+ for i, q in enumerate(current_questions):
39
+ tid = q.get("task_id")
40
+ match = df[df["task_id"] == tid]
41
+ if not match.empty:
42
+ answer = match.iloc[0]["Final answer"]
43
+ results.append({
44
+ "Index": i + 1,
45
+ "Task ID": tid,
46
+ "Question": q.get("question")[:100] + "...",
47
+ "Answer": answer
48
+ })
49
+ print(f"{i+1}. [ID: {tid[:8]}] Answer: {answer}")
50
+ else:
51
+ print(f"{i+1}. [ID: {tid[:8]}] NOT FOUND in validation set.")
52
+
53
+ # Save to a nice CSV for the user
54
+ if results:
55
+ res_df = pd.DataFrame(results)
56
+ res_df.to_csv("gaia_ground_truth.csv", index=False)
57
+ print("\nFull list saved to 'gaia_ground_truth.csv'")
58
+
59
+ except Exception as e:
60
+ print(f"Error processing Parquet: {e}")
61
+ print("Tip: You might need 'pip install pyarrow' to read parquet files.")
find_gaia_answers.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import json
3
+ import os
4
+ from dotenv import load_dotenv
5
+
6
+ load_dotenv(override=True)
7
+
8
+ # 1. Fetch current questions from the scoring space
9
+ QUESTIONS_URL = "https://agents-course-unit4-scoring.hf.space/questions"
10
+ print(f"Fetching questions from {QUESTIONS_URL}...")
11
+ try:
12
+ resp = requests.get(QUESTIONS_URL)
13
+ resp.raise_for_status()
14
+ current_questions = resp.json()
15
+ except Exception as e:
16
+ print(f"Error fetching questions: {e}")
17
+ current_questions = []
18
+
19
+ # 2. Try to fetch GAIA Validation metadata from HF
20
+ # Note: This file is large and might be gated, but we can try common URLs
21
+ GAIA_VAL_URL = "https://huggingface.co/datasets/gaia-benchmark/GAIA/resolve/main/2023/validation/metadata.jsonl"
22
+ print(f"Fetching ground truth answers from {GAIA_VAL_URL}...")
23
+ # We need a token for gated datasets
24
+ hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN")
25
+ headers = {"Authorization": f"Bearer {hf_token}"} if hf_token else {}
26
+
27
+ try:
28
+ resp = requests.get(GAIA_VAL_URL, headers=headers)
29
+ if resp.status_code == 200:
30
+ lines = resp.text.strip().split("\n")
31
+ val_data = [json.loads(line) for line in lines]
32
+ # Create a map of task_id -> answer
33
+ answer_map = {item["task_id"]: item.get("Final answer") for item in val_data}
34
+
35
+ print("\n--- GAIA GROUND TRUTH ANSWERS ---")
36
+ for i, q in enumerate(current_questions):
37
+ task_id = q.get("task_id")
38
+ answer = answer_map.get(task_id, "NOT FOUND")
39
+ print(f"{i+1}. [ID: {task_id[:8]}...] Answer: {answer}")
40
+ print(f" Q: {q.get('question')[:80]}...")
41
+ print("-" * 20)
42
+ else:
43
+ print(f"Failed to fetch ground truth (Status {resp.status_code}). Likely gated or wrong URL.")
44
+ print("Tip: You can find them at https://huggingface.co/datasets/gaia-benchmark/GAIA/viewer/2023/validation")
45
+ except Exception as e:
46
+ print(f"Error during matching: {e}")