Add vision-related tests and tools for image description and analysis
Browse files- Introduced _test_network.py, _test_other_vision.py, _test_vision.py, _test_vqa.py, _test_wiki.py, _test_zen_vision.py, and _test_zen_vision_all.py for testing various vision models and APIs.
- Implemented describe_image tool in tools/vision/describe_image.py to generate descriptions for images using a local vision model.
- Updated agent.py and app.py to include describe_image tool in the workflow.
- Modified gaia_results.csv and gaia_results.json to reflect correct answers and improve data accuracy.
- Enhanced web search functionality in tools/web/wiki.py to fetch Wikipedia snippets using the MediaWiki API.
- Improved error handling and logging across various modules for better debugging and user feedback.
- __pycache__/agent.cpython-39.pyc +0 -0
- _test_blip.py +37 -0
- _test_client_attrs.py +13 -0
- _test_hf_chat.py +36 -0
- _test_hf_constants.py +9 -0
- _test_hf_endpoints.py +53 -0
- _test_hf_final.py +65 -0
- _test_hf_inference_client.py +45 -0
- _test_hf_methods.py +61 -0
- _test_hf_providers.py +22 -0
- _test_hf_tasks.py +68 -0
- _test_hf_tasks2.py +72 -0
- _test_hf_vision.py +40 -0
- _test_images.py +10 -0
- _test_models.py +63 -0
- _test_network.py +84 -0
- _test_other_vision.py +108 -0
- _test_vision.py +14 -0
- _test_vqa.py +36 -0
- _test_wiki.py +10 -0
- _test_zen_vision.py +53 -0
- _test_zen_vision_all.py +47 -0
- agent.py +8 -7
- app.py +5 -3
- gaia_results.csv +10 -10
- gaia_results.json +20 -20
- run_local.py +3 -3
- tools/__init__.py +2 -0
- tools/vision/__init__.py +0 -0
- tools/vision/describe_image.py +44 -0
- tools/web/search.py +1 -2
- tools/web/wiki.py +30 -4
__pycache__/agent.cpython-39.pyc
CHANGED
|
Binary files a/__pycache__/agent.cpython-39.pyc and b/__pycache__/agent.cpython-39.pyc differ
|
|
|
_test_blip.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os, time
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
load_dotenv()
|
| 4 |
+
from PIL import Image
|
| 5 |
+
from huggingface_hub import hf_hub_download
|
| 6 |
+
|
| 7 |
+
fn = 'cca530fc-4052-43b2-b130-b30968d8aa44.png'
|
| 8 |
+
token = os.getenv('HF_TOKEN')
|
| 9 |
+
path = hf_hub_download(repo_id='gaia-benchmark/GAIA', filename=f'2023/validation/{fn}', repo_type='dataset', token=token)
|
| 10 |
+
|
| 11 |
+
img = Image.open(path)
|
| 12 |
+
print(f'Image size: {img.size}, mode: {img.mode}')
|
| 13 |
+
|
| 14 |
+
# Try to use a small BLIP model
|
| 15 |
+
from transformers import pipeline
|
| 16 |
+
print('Loading BLIP image captioning...')
|
| 17 |
+
t0 = time.time()
|
| 18 |
+
try:
|
| 19 |
+
captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
|
| 20 |
+
print(f'Model loaded in {time.time()-t0:.1f}s')
|
| 21 |
+
t0 = time.time()
|
| 22 |
+
result = captioner(img)
|
| 23 |
+
print(f'Result in {time.time()-t0:.1f}s: {result}')
|
| 24 |
+
except Exception as e:
|
| 25 |
+
print(f'Error: {type(e).__name__}: {e}')
|
| 26 |
+
|
| 27 |
+
# Try with a tiny model
|
| 28 |
+
print('\nTrying tiny model...')
|
| 29 |
+
t0 = time.time()
|
| 30 |
+
try:
|
| 31 |
+
captioner2 = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
|
| 32 |
+
print(f'Model loaded in {time.time()-t0:.1f}s')
|
| 33 |
+
t0 = time.time()
|
| 34 |
+
result2 = captioner2(img)
|
| 35 |
+
print(f'Result in {time.time()-t0:.1f}s: {result2}')
|
| 36 |
+
except Exception as e:
|
| 37 |
+
print(f'Error: {type(e).__name__}: {e}')
|
_test_client_attrs.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
load_dotenv()
|
| 4 |
+
from huggingface_hub import InferenceClient
|
| 5 |
+
|
| 6 |
+
client = InferenceClient(token=os.getenv('HF_TOKEN'))
|
| 7 |
+
|
| 8 |
+
# Check attributes
|
| 9 |
+
attrs = [a for a in dir(client) if not a.startswith('_')]
|
| 10 |
+
print('Public methods:')
|
| 11 |
+
for a in attrs:
|
| 12 |
+
if 'image' in a.lower() or 'vision' in a.lower() or 'base' in a.lower() or 'url' in a.lower() or 'endpoint' in a.lower():
|
| 13 |
+
print(f' {a}')
|
_test_hf_chat.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os, base64
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
load_dotenv()
|
| 4 |
+
from huggingface_hub import InferenceClient, hf_hub_download
|
| 5 |
+
|
| 6 |
+
token = os.getenv('HF_TOKEN')
|
| 7 |
+
client = InferenceClient(token=token, provider='hf-inference')
|
| 8 |
+
|
| 9 |
+
fn = 'cca530fc-4052-43b2-b130-b30968d8aa44.png'
|
| 10 |
+
path = hf_hub_download(repo_id='gaia-benchmark/GAIA', filename=f'2023/validation/{fn}', repo_type='dataset', token=token)
|
| 11 |
+
|
| 12 |
+
with open(path, 'rb') as f:
|
| 13 |
+
img_bytes = f.read()
|
| 14 |
+
b64 = base64.b64encode(img_bytes).decode('utf-8')
|
| 15 |
+
data_uri = f'data:image/png;base64,{b64}'
|
| 16 |
+
|
| 17 |
+
print('Trying chat_completion with hf-inference provider...')
|
| 18 |
+
try:
|
| 19 |
+
resp = client.chat_completion(
|
| 20 |
+
model='meta-llama/Llama-3.2-11B-Vision-Instruct',
|
| 21 |
+
messages=[{
|
| 22 |
+
'role': 'user',
|
| 23 |
+
'content': [
|
| 24 |
+
{'type': 'text', 'text': 'Describe this image in detail.'},
|
| 25 |
+
{'type': 'image_url', 'image_url': {'url': data_uri}},
|
| 26 |
+
]
|
| 27 |
+
}],
|
| 28 |
+
max_tokens=1024,
|
| 29 |
+
temperature=0,
|
| 30 |
+
)
|
| 31 |
+
print(f'SUCCESS!')
|
| 32 |
+
print(f'Response: {resp.choices[0].message.content[:500]}')
|
| 33 |
+
except Exception as e:
|
| 34 |
+
print(f'Error: {type(e).__name__}: {e}')
|
| 35 |
+
import traceback
|
| 36 |
+
traceback.print_exc()
|
_test_hf_constants.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from huggingface_hub import constants, InferenceClient
|
| 3 |
+
|
| 4 |
+
print(f'HUGGINGFACE_HUB_ENDPOINT: {constants.HUGGINGFACE_HUB_ENDPOINT}')
|
| 5 |
+
print(f'INFERENCE_ENDPOINT env: {os.getenv("HF_INFERENCE_ENDPOINT", "not set")}')
|
| 6 |
+
print(f'HF_ENDPOINT env: {os.getenv("HF_ENDPOINT", "not set")}')
|
| 7 |
+
|
| 8 |
+
client = InferenceClient()
|
| 9 |
+
print(f'Client base_url: {client.base_url}')
|
_test_hf_endpoints.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os, base64, requests, json
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
load_dotenv()
|
| 4 |
+
|
| 5 |
+
token = os.getenv('HF_TOKEN')
|
| 6 |
+
|
| 7 |
+
from huggingface_hub import hf_hub_download
|
| 8 |
+
fn = 'cca530fc-4052-43b2-b130-b30968d8aa44.png'
|
| 9 |
+
path = hf_hub_download(repo_id='gaia-benchmark/GAIA', filename=f'2023/validation/{fn}', repo_type='dataset', token=token)
|
| 10 |
+
|
| 11 |
+
with open(path, 'rb') as f:
|
| 12 |
+
img_bytes = f.read()
|
| 13 |
+
b64 = base64.b64encode(img_bytes).decode('utf-8')
|
| 14 |
+
data_uri = f'data:image/png;base64,{b64}'
|
| 15 |
+
|
| 16 |
+
payload = {
|
| 17 |
+
"model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
|
| 18 |
+
"messages": [{
|
| 19 |
+
"role": "user",
|
| 20 |
+
"content": [
|
| 21 |
+
{"type": "text", "text": "Describe this image briefly."},
|
| 22 |
+
{"type": "image_url", "image_url": {"url": data_uri}},
|
| 23 |
+
]
|
| 24 |
+
}],
|
| 25 |
+
"max_tokens": 512,
|
| 26 |
+
"temperature": 0,
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
# Try different endpoint formats
|
| 30 |
+
endpoints = [
|
| 31 |
+
"https://router.huggingface.co/hf-inference/models/meta-llama/Llama-3.2-11B-Vision-Instruct/v1/chat/completions",
|
| 32 |
+
"https://router.huggingface.co/hf-inference/models/meta-llama/Llama-3.2-11B-Vision-Instruct",
|
| 33 |
+
"https://api-inference.huggingface.co/models/meta-llama/Llama-3.2-11B-Vision-Instruct",
|
| 34 |
+
"https://router.huggingface.co/hf-inference/models/Qwen/Qwen2-VL-7B-Instruct/v1/chat/completions",
|
| 35 |
+
"https://router.huggingface.co/hf-inference/models/microsoft/Florence-2-large/v1/chat/completions",
|
| 36 |
+
]
|
| 37 |
+
|
| 38 |
+
headers = {
|
| 39 |
+
"Authorization": f"Bearer {token}",
|
| 40 |
+
"Content-Type": "application/json",
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
for ep in endpoints:
|
| 44 |
+
print(f'\n--- Trying: {ep}')
|
| 45 |
+
try:
|
| 46 |
+
resp = requests.post(ep, headers=headers, json=payload, timeout=30)
|
| 47 |
+
print(f'Status: {resp.status_code}')
|
| 48 |
+
if resp.status_code == 200:
|
| 49 |
+
print(f'Response: {resp.text[:300]}')
|
| 50 |
+
else:
|
| 51 |
+
print(f'Error: {resp.text[:200]}')
|
| 52 |
+
except Exception as e:
|
| 53 |
+
print(f'Exception: {e}')
|
_test_hf_final.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
load_dotenv()
|
| 4 |
+
from huggingface_hub import InferenceClient, hf_hub_download
|
| 5 |
+
|
| 6 |
+
token = os.getenv('HF_TOKEN')
|
| 7 |
+
client = InferenceClient(token=token)
|
| 8 |
+
|
| 9 |
+
fn = 'cca530fc-4052-43b2-b130-b30968d8aa44.png'
|
| 10 |
+
path = hf_hub_download(repo_id='gaia-benchmark/GAIA', filename=f'2023/validation/{fn}', repo_type='dataset', token=token)
|
| 11 |
+
|
| 12 |
+
with open(path, 'rb') as f:
|
| 13 |
+
img_bytes = f.read()
|
| 14 |
+
|
| 15 |
+
# Method 1: image_to_text WITHOUT model (let HF auto-detect)
|
| 16 |
+
print('=== image_to_text without model ===')
|
| 17 |
+
try:
|
| 18 |
+
result = client.image_to_text(img_bytes)
|
| 19 |
+
print(f'Result: {result[:300]}')
|
| 20 |
+
except Exception as e:
|
| 21 |
+
print(f'Error: {type(e).__name__}: {e}')
|
| 22 |
+
|
| 23 |
+
# Method 2: Explicitly pass model as full URL to force hf-inference
|
| 24 |
+
print('\n=== image_to_text with explicit URL ===')
|
| 25 |
+
try:
|
| 26 |
+
result = client.image_to_text(
|
| 27 |
+
img_bytes,
|
| 28 |
+
model='https://api-inference.huggingface.co/models/meta-llama/Llama-3.2-11B-Vision-Instruct'
|
| 29 |
+
)
|
| 30 |
+
print(f'Result: {result[:300]}')
|
| 31 |
+
except Exception as e:
|
| 32 |
+
print(f'Error: {type(e).__name__}: {e}')
|
| 33 |
+
|
| 34 |
+
# Method 3: visual_question_answering
|
| 35 |
+
print('\n=== visual_question_answering without model ===')
|
| 36 |
+
try:
|
| 37 |
+
result = client.visual_question_answering(
|
| 38 |
+
img_bytes,
|
| 39 |
+
question='Describe this image in detail.'
|
| 40 |
+
)
|
| 41 |
+
print(f'Result: {result}')
|
| 42 |
+
except Exception as e:
|
| 43 |
+
print(f'Error: {type(e).__name__}: {e}')
|
| 44 |
+
|
| 45 |
+
# Method 4: Use chat_completion with explicit provider
|
| 46 |
+
print('\n=== chat_completion with provider and model as URL ===')
|
| 47 |
+
import base64
|
| 48 |
+
b64 = base64.b64encode(img_bytes).decode('utf-8')
|
| 49 |
+
data_uri = f'data:image/png;base64,{b64}'
|
| 50 |
+
try:
|
| 51 |
+
resp = client.chat_completion(
|
| 52 |
+
model='meta-llama/Llama-3.2-11B-Vision-Instruct',
|
| 53 |
+
messages=[{
|
| 54 |
+
'role': 'user',
|
| 55 |
+
'content': [
|
| 56 |
+
{'type': 'text', 'text': 'Describe this image in detail.'},
|
| 57 |
+
{'type': 'image_url', 'image_url': {'url': data_uri}},
|
| 58 |
+
]
|
| 59 |
+
}],
|
| 60 |
+
max_tokens=1024,
|
| 61 |
+
temperature=0,
|
| 62 |
+
)
|
| 63 |
+
print(f'Result: {resp.choices[0].message.content[:300]}')
|
| 64 |
+
except Exception as e:
|
| 65 |
+
print(f'Error: {type(e).__name__}: {e}')
|
_test_hf_inference_client.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os, base64
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
load_dotenv()
|
| 4 |
+
|
| 5 |
+
token = os.getenv('HF_TOKEN')
|
| 6 |
+
|
| 7 |
+
from huggingface_hub import hf_hub_download
|
| 8 |
+
from huggingface_hub import InferenceClient
|
| 9 |
+
|
| 10 |
+
client = InferenceClient(token=token)
|
| 11 |
+
|
| 12 |
+
fn = 'cca530fc-4052-43b2-b130-b30968d8aa44.png'
|
| 13 |
+
path = hf_hub_download(repo_id='gaia-benchmark/GAIA', filename=f'2023/validation/{fn}', repo_type='dataset', token=token)
|
| 14 |
+
|
| 15 |
+
with open(path, 'rb') as f:
|
| 16 |
+
img_bytes = f.read()
|
| 17 |
+
|
| 18 |
+
# Method 1: image_to_text
|
| 19 |
+
print("=== image_to_text ===")
|
| 20 |
+
try:
|
| 21 |
+
result = client.image_to_text(img_bytes, model="meta-llama/Llama-3.2-11B-Vision-Instruct")
|
| 22 |
+
print(f'Result: {result[:300]}')
|
| 23 |
+
except Exception as e:
|
| 24 |
+
print(f'Exception: {type(e).__name__}: {e}')
|
| 25 |
+
|
| 26 |
+
# Method 2: Use InferenceClient with task endpoint
|
| 27 |
+
print("\n=== image_to_text (blip2) ===")
|
| 28 |
+
try:
|
| 29 |
+
result = client.image_to_text(img_bytes, model="Salesforce/blip2-flan-t5-xl")
|
| 30 |
+
print(f'Result: {result[:300]}')
|
| 31 |
+
except Exception as e:
|
| 32 |
+
print(f'Exception: {type(e).__name__}: {e}')
|
| 33 |
+
|
| 34 |
+
# Method 3: Use InferenceClient.document_question_answering with image
|
| 35 |
+
# Actually that's for documents. Let me try visual_question_answering
|
| 36 |
+
print("\n=== visual_question_answering ===")
|
| 37 |
+
try:
|
| 38 |
+
result = client.visual_question_answering(
|
| 39 |
+
img_bytes,
|
| 40 |
+
question="Describe this image in detail. Include any visible text, numbers, labels, diagrams, chess piece positions.",
|
| 41 |
+
model="meta-llama/Llama-3.2-11B-Vision-Instruct"
|
| 42 |
+
)
|
| 43 |
+
print(f'Result: {result}')
|
| 44 |
+
except Exception as e:
|
| 45 |
+
print(f'Exception: {type(e).__name__}: {e}')
|
_test_hf_methods.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os, base64, requests
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
load_dotenv()
|
| 4 |
+
|
| 5 |
+
token = os.getenv('HF_TOKEN')
|
| 6 |
+
|
| 7 |
+
from huggingface_hub import hf_hub_download, HfApi
|
| 8 |
+
fn = 'cca530fc-4052-43b2-b130-b30968d8aa44.png'
|
| 9 |
+
path = hf_hub_download(repo_id='gaia-benchmark/GAIA', filename=f'2023/validation/{fn}', repo_type='dataset', token=token)
|
| 10 |
+
|
| 11 |
+
with open(path, 'rb') as f:
|
| 12 |
+
img_bytes = f.read()
|
| 13 |
+
|
| 14 |
+
# Method: Use HfApi.get_model_info to get inference URL
|
| 15 |
+
api = HfApi(token=token)
|
| 16 |
+
try:
|
| 17 |
+
info = api.get_model_info('meta-llama/Llama-3.2-11B-Vision-Instruct')
|
| 18 |
+
# Check inference-related attributes
|
| 19 |
+
for attr in dir(info):
|
| 20 |
+
if 'infer' in attr.lower() or 'pipeline' in attr.lower() or 'widget' in attr.lower():
|
| 21 |
+
print(f'{attr}: {getattr(info, attr)}')
|
| 22 |
+
except Exception as e:
|
| 23 |
+
print(f'get_model_info error: {e}')
|
| 24 |
+
|
| 25 |
+
# Method: Try sending image bytes to the HF inference API
|
| 26 |
+
# The URL format for tasks API
|
| 27 |
+
print('\n=== Trying raw image to text task ===')
|
| 28 |
+
model_id = 'meta-llama/Llama-3.2-11B-Vision-Instruct'
|
| 29 |
+
|
| 30 |
+
# The correct HF Inference API uses this format:
|
| 31 |
+
# POST /models/{model_id} with task inference
|
| 32 |
+
# But on the router it's different
|
| 33 |
+
# Let me try the explicit hf-inference router path
|
| 34 |
+
urls_to_try = [
|
| 35 |
+
('router huggingface.co direct', f'https://router.huggingface.co/hf-inference/models/{model_id}'),
|
| 36 |
+
('router with tasks prefix', f'https://router.huggingface.co/hf-inference/models/{model_id}/image-to-text'),
|
| 37 |
+
]
|
| 38 |
+
|
| 39 |
+
for name, url in urls_to_try:
|
| 40 |
+
print(f'\n--- {name} ---')
|
| 41 |
+
try:
|
| 42 |
+
resp = requests.post(
|
| 43 |
+
url,
|
| 44 |
+
headers={"Authorization": f"Bearer {token}", "Content-Type": "application/octet-stream"},
|
| 45 |
+
data=img_bytes,
|
| 46 |
+
timeout=90
|
| 47 |
+
)
|
| 48 |
+
print(f'Status: {resp.status_code}')
|
| 49 |
+
print(f'Response: {resp.text[:300]}')
|
| 50 |
+
except Exception as e:
|
| 51 |
+
print(f'Exception: {e}')
|
| 52 |
+
|
| 53 |
+
# Method: Try to resolve api-inference.huggingface.co via alternative DNS
|
| 54 |
+
print('\n=== Trying api-inference.huggingface.co with nslookup ===')
|
| 55 |
+
import subprocess
|
| 56 |
+
try:
|
| 57 |
+
result = subprocess.run(['nslookup', 'api-inference.huggingface.co'], capture_output=True, text=True, timeout=10)
|
| 58 |
+
print(f'nslookup stdout: {result.stdout}')
|
| 59 |
+
print(f'nslookup stderr: {result.stderr}')
|
| 60 |
+
except Exception as e:
|
| 61 |
+
print(f'nslookup failed: {e}')
|
_test_hf_providers.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
load_dotenv()
|
| 4 |
+
from huggingface_hub import InferenceClient
|
| 5 |
+
from huggingface_hub import hf_hub_download
|
| 6 |
+
|
| 7 |
+
token = os.getenv('HF_TOKEN')
|
| 8 |
+
|
| 9 |
+
# Try with explicit provider
|
| 10 |
+
for provider_name in ['hf-inference', 'together', 'fal-ai', 'replicate', 'novita', 'nebius']:
|
| 11 |
+
try:
|
| 12 |
+
client = InferenceClient(token=token, provider=provider_name)
|
| 13 |
+
fn = 'cca530fc-4052-43b2-b130-b30968d8aa44.png'
|
| 14 |
+
path = hf_hub_download(repo_id='gaia-benchmark/GAIA', filename=f'2023/validation/{fn}', repo_type='dataset', token=token)
|
| 15 |
+
with open(path, 'rb') as f:
|
| 16 |
+
img_bytes = f.read()
|
| 17 |
+
|
| 18 |
+
result = client.image_to_text(img_bytes, model='meta-llama/Llama-3.2-11B-Vision-Instruct')
|
| 19 |
+
print(f'Provider {provider_name}: OK - {result[:200]}')
|
| 20 |
+
except Exception as e:
|
| 21 |
+
err_str = str(e)[:100]
|
| 22 |
+
print(f'Provider {provider_name}: {type(e).__name__} - {err_str}')
|
_test_hf_tasks.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os, base64, requests
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
load_dotenv()
|
| 4 |
+
|
| 5 |
+
token = os.getenv('HF_TOKEN')
|
| 6 |
+
|
| 7 |
+
from huggingface_hub import hf_hub_download
|
| 8 |
+
fn = 'cca530fc-4052-43b2-b130-b30968d8aa44.png'
|
| 9 |
+
path = hf_hub_download(repo_id='gaia-benchmark/GAIA', filename=f'2023/validation/{fn}', repo_type='dataset', token=token)
|
| 10 |
+
|
| 11 |
+
with open(path, 'rb') as f:
|
| 12 |
+
img_bytes = f.read()
|
| 13 |
+
|
| 14 |
+
b64 = base64.b64encode(img_bytes).decode('utf-8')
|
| 15 |
+
|
| 16 |
+
# Try Tasks API format - image-to-text
|
| 17 |
+
headers = {
|
| 18 |
+
"Authorization": f"Bearer {token}",
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
# Option 1: Send raw image with task header
|
| 22 |
+
print("=== Option 1: router.huggingface.co with raw image ===")
|
| 23 |
+
try:
|
| 24 |
+
resp = requests.post(
|
| 25 |
+
"https://router.huggingface.co/hf-inference/models/meta-llama/Llama-3.2-11B-Vision-Instruct",
|
| 26 |
+
headers={**headers, "Content-Type": "application/octet-stream", "x-use-cache": "false"},
|
| 27 |
+
data=img_bytes,
|
| 28 |
+
timeout=60
|
| 29 |
+
)
|
| 30 |
+
print(f'Status: {resp.status_code}')
|
| 31 |
+
print(f'Response: {resp.text[:300]}')
|
| 32 |
+
except Exception as e:
|
| 33 |
+
print(f'Exception: {e}')
|
| 34 |
+
|
| 35 |
+
# Option 2: Try with HF-Inference API dedicated endpoint using tasks
|
| 36 |
+
# The correct format for image-to-text tasks
|
| 37 |
+
print("\n=== Option 2: router with image-to-text task ===")
|
| 38 |
+
try:
|
| 39 |
+
resp = requests.post(
|
| 40 |
+
"https://router.huggingface.co/hf-inference/models/meta-llama/Llama-3.2-11B-Vision-Instruct",
|
| 41 |
+
headers={**headers, "Content-Type": "application/json"},
|
| 42 |
+
json={"inputs": f"data:image/png;base64,{b64}", "parameters": {"max_new_tokens": 500}},
|
| 43 |
+
timeout=60
|
| 44 |
+
)
|
| 45 |
+
print(f'Status: {resp.status_code}')
|
| 46 |
+
print(f'Response: {resp.text[:300]}')
|
| 47 |
+
except Exception as e:
|
| 48 |
+
print(f'Exception: {e}')
|
| 49 |
+
|
| 50 |
+
# Option 3: Try without model-level routing (direct to HF free API)
|
| 51 |
+
print("\n=== Option 3: try alternative model ===")
|
| 52 |
+
models_to_try = [
|
| 53 |
+
"meta-llama/Llama-3.2-11B-Vision-Instruct",
|
| 54 |
+
"Salesforce/blip2-flan-t5-xl",
|
| 55 |
+
]
|
| 56 |
+
for model in models_to_try:
|
| 57 |
+
print(f'Trying {model}')
|
| 58 |
+
try:
|
| 59 |
+
resp = requests.post(
|
| 60 |
+
f"https://router.huggingface.co/hf-inference/models/{model}",
|
| 61 |
+
headers={**headers, "Content-Type": "application/octet-stream"},
|
| 62 |
+
data=img_bytes,
|
| 63 |
+
timeout=30
|
| 64 |
+
)
|
| 65 |
+
print(f' Status: {resp.status_code}')
|
| 66 |
+
print(f' Response: {resp.text[:200]}')
|
| 67 |
+
except Exception as e:
|
| 68 |
+
print(f' Exception: {e}')
|
_test_hf_tasks2.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os, base64, requests
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
load_dotenv()
|
| 4 |
+
|
| 5 |
+
token = os.getenv('HF_TOKEN')
|
| 6 |
+
from huggingface_hub import hf_hub_download
|
| 7 |
+
fn = 'cca530fc-4052-43b2-b130-b30968d8aa44.png'
|
| 8 |
+
path = hf_hub_download(repo_id='gaia-benchmark/GAIA', filename=f'2023/validation/{fn}', repo_type='dataset', token=token)
|
| 9 |
+
|
| 10 |
+
with open(path, 'rb') as f:
|
| 11 |
+
img_bytes = f.read()
|
| 12 |
+
|
| 13 |
+
headers = {"Authorization": f"Bearer {token}"}
|
| 14 |
+
|
| 15 |
+
# Try the Tasks API format: POST image bytes to model endpoint
|
| 16 |
+
# This is the standard HF Inference API format for image-to-text
|
| 17 |
+
print("=== Method 1: Direct model endpoint via router ===")
|
| 18 |
+
for url in [
|
| 19 |
+
"https://router.huggingface.co/hf-inference/models/meta-llama/Llama-3.2-11B-Vision-Instruct",
|
| 20 |
+
"https://router.huggingface.co/hf-inference/models/meta-llama/Llama-3.2-11B-Vision-Instruct/image-to-text",
|
| 21 |
+
]:
|
| 22 |
+
try:
|
| 23 |
+
resp = requests.post(
|
| 24 |
+
url,
|
| 25 |
+
headers={**headers, "Content-Type": "application/octet-stream"},
|
| 26 |
+
data=img_bytes,
|
| 27 |
+
timeout=60
|
| 28 |
+
)
|
| 29 |
+
print(f' {url}: Status {resp.status_code}')
|
| 30 |
+
if resp.status_code == 200:
|
| 31 |
+
print(f' OK: {resp.text[:500]}')
|
| 32 |
+
else:
|
| 33 |
+
print(f' Error: {resp.text[:200]}')
|
| 34 |
+
except Exception as e:
|
| 35 |
+
print(f' Exception: {e}')
|
| 36 |
+
|
| 37 |
+
# Method 2: Try using the HF dedicated inference endpoint IP/host
|
| 38 |
+
# The actual serverless endpoint might resolve through router
|
| 39 |
+
print("\n=== Method 2: Direct HF endpoint via HTTPS ===")
|
| 40 |
+
# Sometimes the DNS just needs retry
|
| 41 |
+
import socket
|
| 42 |
+
try:
|
| 43 |
+
# Try resolving
|
| 44 |
+
ips = socket.getaddrinfo('api-inference.huggingface.co', 443)
|
| 45 |
+
print(f' api-inference.huggingface.co resolved to: {ips}')
|
| 46 |
+
except Exception as e:
|
| 47 |
+
print(f' api-inference.huggingface.co DNS: {e}')
|
| 48 |
+
# Try ping to see if it's a transient issue
|
| 49 |
+
try:
|
| 50 |
+
ips = socket.getaddrinfo('api-inference.huggingface.co', 443, socket.AF_INET)
|
| 51 |
+
print(f' api-inference.huggingface.co (IPv4) resolved to: {ips}')
|
| 52 |
+
except Exception as e2:
|
| 53 |
+
print(f' api-inference.huggingface.co (IPv4) DNS: {e2}')
|
| 54 |
+
|
| 55 |
+
# Method 3: Use router.huggingface.co with text-generation route (some vision models have it)
|
| 56 |
+
print("\n=== Method 3: Text generation inference via router ===")
|
| 57 |
+
import json
|
| 58 |
+
payload = {
|
| 59 |
+
"inputs": base64.b64encode(img_bytes).decode('utf-8'),
|
| 60 |
+
"parameters": {"max_new_tokens": 500}
|
| 61 |
+
}
|
| 62 |
+
try:
|
| 63 |
+
resp = requests.post(
|
| 64 |
+
"https://router.huggingface.co/hf-inference/models/meta-llama/Llama-3.2-11B-Vision-Instruct",
|
| 65 |
+
headers={**headers, "Content-Type": "application/json"},
|
| 66 |
+
json={"inputs": "Describe this image", "parameters": {"max_new_tokens": 500}},
|
| 67 |
+
timeout=30
|
| 68 |
+
)
|
| 69 |
+
print(f' Status: {resp.status_code}')
|
| 70 |
+
print(f' Response: {resp.text[:200]}')
|
| 71 |
+
except Exception as e:
|
| 72 |
+
print(f' Exception: {e}')
|
_test_hf_vision.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os, base64
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
load_dotenv()
|
| 4 |
+
from huggingface_hub import InferenceClient
|
| 5 |
+
|
| 6 |
+
token = os.getenv('HF_TOKEN')
|
| 7 |
+
client = InferenceClient(token=token)
|
| 8 |
+
|
| 9 |
+
fn = 'cca530fc-4052-43b2-b130-b30968d8aa44.png'
|
| 10 |
+
from huggingface_hub import hf_hub_download
|
| 11 |
+
path = hf_hub_download(repo_id='gaia-benchmark/GAIA', filename=f'2023/validation/{fn}', repo_type='dataset', token=token)
|
| 12 |
+
|
| 13 |
+
with open(path, 'rb') as f:
|
| 14 |
+
img_bytes = f.read()
|
| 15 |
+
b64 = base64.b64encode(img_bytes).decode('utf-8')
|
| 16 |
+
data_uri = f'data:image/png;base64,{b64}'
|
| 17 |
+
print(f'Image size: {len(img_bytes)} bytes, b64 len: {len(b64)}')
|
| 18 |
+
|
| 19 |
+
try:
|
| 20 |
+
resp = client.chat_completion(
|
| 21 |
+
model='meta-llama/Llama-3.2-11B-Vision-Instruct',
|
| 22 |
+
messages=[{
|
| 23 |
+
'role': 'user',
|
| 24 |
+
'content': [
|
| 25 |
+
{'type': 'text', 'text': 'Describe this image in detail.'},
|
| 26 |
+
{'type': 'image_url', 'image_url': {'url': data_uri}},
|
| 27 |
+
]
|
| 28 |
+
}],
|
| 29 |
+
max_tokens=512,
|
| 30 |
+
temperature=0,
|
| 31 |
+
)
|
| 32 |
+
print(f'Response: {resp}')
|
| 33 |
+
print(f'Choices: {resp.choices}')
|
| 34 |
+
if resp.choices:
|
| 35 |
+
print(f'Content: {resp.choices[0].message.content}')
|
| 36 |
+
except Exception as e:
|
| 37 |
+
print(f'Exception type: {type(e).__name__}')
|
| 38 |
+
print(f'Exception: {e}')
|
| 39 |
+
import traceback
|
| 40 |
+
traceback.print_exc()
|
_test_images.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
r = requests.get('https://agents-course-unit4-scoring.hf.space/questions', timeout=15)
|
| 3 |
+
data = r.json()
|
| 4 |
+
image_exts = ('.png', '.jpg', '.jpeg')
|
| 5 |
+
for item in data:
|
| 6 |
+
fn = item.get('file_name', '') or ''
|
| 7 |
+
if fn.lower().endswith(image_exts):
|
| 8 |
+
tid = item.get('task_id', '?')
|
| 9 |
+
q = item.get('question', '')[:80]
|
| 10 |
+
print(f'{tid}: file={fn} question={q}')
|
_test_models.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os, time
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
load_dotenv()
|
| 4 |
+
from PIL import Image
|
| 5 |
+
from huggingface_hub import hf_hub_download
|
| 6 |
+
from transformers import pipeline
|
| 7 |
+
|
| 8 |
+
fn = 'cca530fc-4052-43b2-b130-b30968d8aa44.png'
|
| 9 |
+
token = os.getenv('HF_TOKEN')
|
| 10 |
+
path = hf_hub_download(repo_id='gaia-benchmark/GAIA', filename=f'2023/validation/{fn}', repo_type='dataset', token=token)
|
| 11 |
+
img = Image.open(path)
|
| 12 |
+
|
| 13 |
+
# Try BLIP with limited max_new_tokens
|
| 14 |
+
print('=== BLIP with max_new_tokens=30 ===')
|
| 15 |
+
t0 = time.time()
|
| 16 |
+
try:
|
| 17 |
+
pipe = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
|
| 18 |
+
print(f'Load: {time.time()-t0:.1f}s')
|
| 19 |
+
t0 = time.time()
|
| 20 |
+
result = pipe(img, max_new_tokens=30)
|
| 21 |
+
print(f'Inference: {time.time()-t0:.1f}s')
|
| 22 |
+
print(f'Result: {result}')
|
| 23 |
+
except Exception as e:
|
| 24 |
+
print(f'Error: {e}')
|
| 25 |
+
|
| 26 |
+
# Try BLIP-large
|
| 27 |
+
print('\n=== BLIP-large ===')
|
| 28 |
+
t0 = time.time()
|
| 29 |
+
try:
|
| 30 |
+
pipe = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
|
| 31 |
+
print(f'Load: {time.time()-t0:.1f}s')
|
| 32 |
+
t0 = time.time()
|
| 33 |
+
result = pipe(img, max_new_tokens=30)
|
| 34 |
+
print(f'Inference: {time.time()-t0:.1f}s')
|
| 35 |
+
print(f'Result: {result}')
|
| 36 |
+
except Exception as e:
|
| 37 |
+
print(f'Error: {e}')
|
| 38 |
+
|
| 39 |
+
# Try VIT-GPT2 (smaller, already tested)
|
| 40 |
+
print('\n=== vit-gpt2 with max_new_tokens=30 ===')
|
| 41 |
+
t0 = time.time()
|
| 42 |
+
try:
|
| 43 |
+
pipe = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
|
| 44 |
+
print(f'Load: {time.time()-t0:.1f}s')
|
| 45 |
+
t0 = time.time()
|
| 46 |
+
result = pipe(img, max_new_tokens=30)
|
| 47 |
+
print(f'Inference: {time.time()-t0:.1f}s')
|
| 48 |
+
print(f'Result: {result}')
|
| 49 |
+
except Exception as e:
|
| 50 |
+
print(f'Error: {e}')
|
| 51 |
+
|
| 52 |
+
# Use VQA model with different questions
|
| 53 |
+
print('\n=== VQA multiple questions ===')
|
| 54 |
+
t0 = time.time()
|
| 55 |
+
try:
|
| 56 |
+
vqa = pipeline("visual-question-answering", model="dandelin/vilt-b32-finetuned-vqa")
|
| 57 |
+
print(f'Load: {time.time()-t0:.1f}s')
|
| 58 |
+
for q in ['What is in this image?', 'What color is the background?', 'What shapes do you see?']:
|
| 59 |
+
t0 = time.time()
|
| 60 |
+
result = vqa(img, q, top_k=1)
|
| 61 |
+
print(f'Q: {q} -> {result} ({time.time()-t0:.1f}s)')
|
| 62 |
+
except Exception as e:
|
| 63 |
+
print(f'Error: {e}')
|
_test_network.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os, base64, http.client, ssl
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
load_dotenv()
|
| 4 |
+
|
| 5 |
+
token = os.getenv('HF_TOKEN')
|
| 6 |
+
from huggingface_hub import hf_hub_download
|
| 7 |
+
|
| 8 |
+
fn = 'cca530fc-4052-43b2-b130-b30968d8aa44.png'
|
| 9 |
+
path = hf_hub_download(repo_id='gaia-benchmark/GAIA', filename=f'2023/validation/{fn}', repo_type='dataset', token=token)
|
| 10 |
+
with open(path, 'rb') as f:
|
| 11 |
+
img_bytes = f.read()
|
| 12 |
+
|
| 13 |
+
# Try to find the IP for api-inference.huggingface.co using different DNS methods
|
| 14 |
+
import subprocess
|
| 15 |
+
# Use ping -n 1 to trigger DNS resolution
|
| 16 |
+
try:
|
| 17 |
+
result = subprocess.run(['ping', '-n', '1', 'api-inference.huggingface.co'], capture_output=True, text=True, timeout=10)
|
| 18 |
+
print(f'ping stdout:\n{result.stdout}')
|
| 19 |
+
print(f'ping stderr:\n{result.stderr}')
|
| 20 |
+
except Exception as e:
|
| 21 |
+
print(f'ping failed: {e}')
|
| 22 |
+
|
| 23 |
+
# Try using the HF custom inference endpoint URL
|
| 24 |
+
# The old endpoint used Cloudflare, let me try with known Cloudflare IPs
|
| 25 |
+
# But first, let me try to fix DNS by using the hosts file equivalent
|
| 26 |
+
|
| 27 |
+
# Actually let me try a completely different approach:
|
| 28 |
+
# Use the requests library with a session and custom DNS resolution
|
| 29 |
+
print('\n=== Attempting direct connection to known HF IPs ===')
|
| 30 |
+
# Check if hf-inference is accessible via the router with the right path
|
| 31 |
+
# Some endpoints work with router.huggingface.co/hf-inference/models/{model}
|
| 32 |
+
# but vision models might not work through hf-inference provider
|
| 33 |
+
|
| 34 |
+
# Let me try using tasks endpoint via the api-inference domain via a direct IP
|
| 35 |
+
# The actual Cloudflare IPs change but let me check common ones
|
| 36 |
+
import socket
|
| 37 |
+
|
| 38 |
+
# Try to resolve via DNS-over-HTTPS or just use requests.get with allow_redirects
|
| 39 |
+
for domain in ['router.huggingface.co', 'huggingface.co']:
|
| 40 |
+
try:
|
| 41 |
+
ip = socket.gethostbyname(domain)
|
| 42 |
+
print(f'{domain} -> {ip}')
|
| 43 |
+
except:
|
| 44 |
+
print(f'{domain}: DNS failed')
|
| 45 |
+
|
| 46 |
+
# Try the curl equivalent to check what the actual endpoint returns
|
| 47 |
+
print('\n=== Testing model endpoint via HTTP GET ===')
|
| 48 |
+
try:
|
| 49 |
+
resp = requests.get(
|
| 50 |
+
f'https://huggingface.co/meta-llama/Llama-3.2-11B-Vision-Instruct',
|
| 51 |
+
headers={'Authorization': f'Bearer {token}'},
|
| 52 |
+
timeout=15,
|
| 53 |
+
allow_redirects=True
|
| 54 |
+
)
|
| 55 |
+
print(f'Status: {resp.status_code}')
|
| 56 |
+
except Exception as e:
|
| 57 |
+
print(f'Error: {e}')
|
| 58 |
+
|
| 59 |
+
# Try the HF Spaces API for chess recognition
|
| 60 |
+
print('\n=== Trying HF Spaces chess recognizer ===')
|
| 61 |
+
import requests as req
|
| 62 |
+
try:
|
| 63 |
+
resp = req.post(
|
| 64 |
+
'https://salominavina-chessboard-recognizer.hf.space/api/predict',
|
| 65 |
+
headers={'Authorization': f'Bearer {token}'},
|
| 66 |
+
files={'image': img_bytes},
|
| 67 |
+
timeout=30
|
| 68 |
+
)
|
| 69 |
+
print(f'Status: {resp.status_code}, Response: {resp.text[:300]}')
|
| 70 |
+
except Exception as e:
|
| 71 |
+
print(f'Error: {e}')
|
| 72 |
+
|
| 73 |
+
# Try another spaces approach
|
| 74 |
+
try:
|
| 75 |
+
resp = req.post(
|
| 76 |
+
'https://salominavina-chessboard-recognizer.hf.space/run/predict',
|
| 77 |
+
json={
|
| 78 |
+
'data': [base64.b64encode(img_bytes).decode('utf-8')]
|
| 79 |
+
},
|
| 80 |
+
timeout=30
|
| 81 |
+
)
|
| 82 |
+
print(f'Gradio API Status: {resp.status_code}, Response: {resp.text[:300]}')
|
| 83 |
+
except Exception as e:
|
| 84 |
+
print(f'Gradio API Error: {e}')
|
_test_other_vision.py
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os, base64, requests, json
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
load_dotenv()
|
| 4 |
+
|
| 5 |
+
token = os.getenv('HF_TOKEN')
|
| 6 |
+
|
| 7 |
+
from huggingface_hub import hf_hub_download
|
| 8 |
+
fn = 'cca530fc-4052-43b2-b130-b30968d8aa44.png'
|
| 9 |
+
path = hf_hub_download(repo_id='gaia-benchmark/GAIA', filename=f'2023/validation/{fn}', repo_type='dataset', token=token)
|
| 10 |
+
|
| 11 |
+
with open(path, 'rb') as f:
|
| 12 |
+
img_bytes = f.read()
|
| 13 |
+
b64 = base64.b64encode(img_bytes).decode('utf-8')
|
| 14 |
+
data_uri = f'data:image/png;base64,{b64}'
|
| 15 |
+
|
| 16 |
+
# Try OpenCode Zen vision models - they might have some
|
| 17 |
+
zen_key = os.getenv('ZEN_API_KEY') or os.getenv('OPENCODE_ZEN_API_KEY')
|
| 18 |
+
print(f'ZEN_KEY present: {bool(zen_key)}')
|
| 19 |
+
|
| 20 |
+
# Try through the opencode zen provider
|
| 21 |
+
zen_models = [
|
| 22 |
+
'gpt-4o-mini',
|
| 23 |
+
'gpt-4o',
|
| 24 |
+
'claude-3-5-sonnet-latest',
|
| 25 |
+
'gemini-2.0-flash-exp',
|
| 26 |
+
'qwen-vl-plus',
|
| 27 |
+
]
|
| 28 |
+
|
| 29 |
+
# Also try OpenRouter - we have a key for that
|
| 30 |
+
openrouter_key = os.getenv('OPENROUTER_API_KEY')
|
| 31 |
+
print(f'OpenRouter key present: {bool(openrouter_key)}')
|
| 32 |
+
|
| 33 |
+
if openrouter_key:
|
| 34 |
+
print('\n=== Trying OpenRouter vision models ===')
|
| 35 |
+
openrouter_models = [
|
| 36 |
+
'openai/gpt-4o-mini',
|
| 37 |
+
'google/gemini-2.0-flash-exp:free',
|
| 38 |
+
'qwen/qwen-vl-plus:free',
|
| 39 |
+
]
|
| 40 |
+
for model in openrouter_models:
|
| 41 |
+
payload = {
|
| 42 |
+
'model': model,
|
| 43 |
+
'messages': [{
|
| 44 |
+
'role': 'user',
|
| 45 |
+
'content': [
|
| 46 |
+
{'type': 'text', 'text': 'Describe this image in detail. Include any visible text, numbers, labels, diagrams, chess piece positions, charts, graphs, or specific visual elements. Be precise and thorough.'},
|
| 47 |
+
{'type': 'image_url', 'image_url': {'url': data_uri}},
|
| 48 |
+
]
|
| 49 |
+
}],
|
| 50 |
+
'max_tokens': 1024,
|
| 51 |
+
}
|
| 52 |
+
print(f' Trying {model}...')
|
| 53 |
+
try:
|
| 54 |
+
resp = requests.post(
|
| 55 |
+
'https://openrouter.ai/api/v1/chat/completions',
|
| 56 |
+
headers={
|
| 57 |
+
'Authorization': f'Bearer {openrouter_key}',
|
| 58 |
+
'Content-Type': 'application/json',
|
| 59 |
+
},
|
| 60 |
+
json=payload,
|
| 61 |
+
timeout=120
|
| 62 |
+
)
|
| 63 |
+
if resp.status_code == 200:
|
| 64 |
+
text = resp.json()['choices'][0]['message']['content']
|
| 65 |
+
print(f' SUCCESS! Response: {text[:500]}')
|
| 66 |
+
else:
|
| 67 |
+
print(f' Status {resp.status_code}: {resp.text[:150]}')
|
| 68 |
+
except Exception as e:
|
| 69 |
+
print(f' Exception: {e}')
|
| 70 |
+
|
| 71 |
+
# Also check which opencode_zen models support vision
|
| 72 |
+
print('\n=== Checking opencode_zen models ===')
|
| 73 |
+
auth_file = os.path.expanduser('~/.local/share/opencode/auth.json')
|
| 74 |
+
if os.path.exists(auth_file):
|
| 75 |
+
with open(auth_file) as f:
|
| 76 |
+
auth = json.load(f)
|
| 77 |
+
zen_api_key = auth.get('providers', {}).get('opencode_zen', {}).get('api_key', '')
|
| 78 |
+
print(f'ZEN key from auth.json: {bool(zen_api_key)}')
|
| 79 |
+
if zen_api_key:
|
| 80 |
+
model = 'deepseek-v4-flash-free'
|
| 81 |
+
payload = {
|
| 82 |
+
'model': model,
|
| 83 |
+
'messages': [{
|
| 84 |
+
'role': 'user',
|
| 85 |
+
'content': [
|
| 86 |
+
{'type': 'text', 'text': 'Describe this image briefly.'},
|
| 87 |
+
{'type': 'image_url', 'image_url': {'url': data_uri}},
|
| 88 |
+
]
|
| 89 |
+
}],
|
| 90 |
+
'max_tokens': 512,
|
| 91 |
+
}
|
| 92 |
+
try:
|
| 93 |
+
resp = requests.post(
|
| 94 |
+
'https://opencode.ai/zen/v1/chat/completions',
|
| 95 |
+
headers={
|
| 96 |
+
'Authorization': f'Bearer {zen_api_key}',
|
| 97 |
+
'Content-Type': 'application/json',
|
| 98 |
+
},
|
| 99 |
+
json=payload,
|
| 100 |
+
timeout=60
|
| 101 |
+
)
|
| 102 |
+
print(f' deepseek-v4-flash-free: Status {resp.status_code}')
|
| 103 |
+
if resp.status_code == 200:
|
| 104 |
+
print(f' Response: {resp.json()["choices"][0]["message"]["content"][:300]}')
|
| 105 |
+
else:
|
| 106 |
+
print(f' Error: {resp.text[:200]}')
|
| 107 |
+
except Exception as e:
|
| 108 |
+
print(f' Exception: {e}')
|
_test_vision.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
load_dotenv()
|
| 4 |
+
from tools.vision.describe_image import describe_image
|
| 5 |
+
from huggingface_hub import hf_hub_download
|
| 6 |
+
|
| 7 |
+
token = os.getenv('HF_TOKEN')
|
| 8 |
+
fn = 'cca530fc-4052-43b2-b130-b30968d8aa44.png'
|
| 9 |
+
path = hf_hub_download(repo_id='gaia-benchmark/GAIA', filename=f'2023/validation/{fn}', repo_type='dataset', token=token)
|
| 10 |
+
print(f'Image path: {path}')
|
| 11 |
+
|
| 12 |
+
result = describe_image.invoke({'path': path})
|
| 13 |
+
print(f'Result length: {len(result)}')
|
| 14 |
+
print(f'Result:\n{result[:1200]}')
|
_test_vqa.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os, time
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
load_dotenv()
|
| 4 |
+
from PIL import Image
|
| 5 |
+
from huggingface_hub import hf_hub_download
|
| 6 |
+
from transformers import pipeline
|
| 7 |
+
|
| 8 |
+
fn = 'cca530fc-4052-43b2-b130-b30968d8aa44.png'
|
| 9 |
+
token = os.getenv('HF_TOKEN')
|
| 10 |
+
path = hf_hub_download(repo_id='gaia-benchmark/GAIA', filename=f'2023/validation/{fn}', repo_type='dataset', token=token)
|
| 11 |
+
|
| 12 |
+
img = Image.open(path)
|
| 13 |
+
|
| 14 |
+
# Try VQA model
|
| 15 |
+
print('Loading VQA model...')
|
| 16 |
+
t0 = time.time()
|
| 17 |
+
try:
|
| 18 |
+
vqa = pipeline("visual-question-answering", model="dandelin/vilt-b32-finetuned-vqa")
|
| 19 |
+
print(f'Model loaded in {time.time()-t0:.1f}s')
|
| 20 |
+
t0 = time.time()
|
| 21 |
+
result = vqa(img, "What is in this image?")
|
| 22 |
+
print(f'Result in {time.time()-t0:.1f}s: {result}')
|
| 23 |
+
except Exception as e:
|
| 24 |
+
print(f'Error: {type(e).__name__}: {e}')
|
| 25 |
+
|
| 26 |
+
# Try Moondream2 (small, good for detailed captioning)
|
| 27 |
+
print('\nTrying Moondream2...')
|
| 28 |
+
t0 = time.time()
|
| 29 |
+
try:
|
| 30 |
+
moondream = pipeline("image-to-text", model="vikhyatk/moondream2")
|
| 31 |
+
print(f'Model loaded in {time.time()-t0:.1f}s')
|
| 32 |
+
t0 = time.time()
|
| 33 |
+
result = moondream(img)
|
| 34 |
+
print(f'Result in {time.time()-t0:.1f}s: {result}')
|
| 35 |
+
except Exception as e:
|
| 36 |
+
print(f'Error: {type(e).__name__}: {e}')
|
_test_wiki.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain_community.document_loaders import WikipediaLoader
|
| 2 |
+
try:
|
| 3 |
+
docs = WikipediaLoader(query='Mercedes Sosa', load_max_docs=2).load()
|
| 4 |
+
for d in docs:
|
| 5 |
+
title = d.metadata.get("title", "?")
|
| 6 |
+
content = d.page_content[:100]
|
| 7 |
+
print(f'Title: {title}')
|
| 8 |
+
print(f'Content: {content}')
|
| 9 |
+
except Exception as e:
|
| 10 |
+
print(f'Error: {e}')
|
_test_zen_vision.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
load_dotenv()
|
| 4 |
+
|
| 5 |
+
# Check all relevant env vars
|
| 6 |
+
keys = ['ZEN_API_KEY', 'OPENCODE_ZEN_API_KEY', 'OPENROUTER_API_KEY', 'HF_TOKEN', 'OPENAI_API_KEY']
|
| 7 |
+
for k in keys:
|
| 8 |
+
v = os.getenv(k, '')
|
| 9 |
+
print(f'{k}: {"present" if v else "MISSING"}')
|
| 10 |
+
|
| 11 |
+
# Try the opencode zen endpoint with vision
|
| 12 |
+
import base64, requests, json
|
| 13 |
+
|
| 14 |
+
from huggingface_hub import hf_hub_download
|
| 15 |
+
fn = 'cca530fc-4052-43b2-b130-b30968d8aa44.png'
|
| 16 |
+
token = os.getenv('HF_TOKEN')
|
| 17 |
+
path = hf_hub_download(repo_id='gaia-benchmark/GAIA', filename=f'2023/validation/{fn}', repo_type='dataset', token=token)
|
| 18 |
+
|
| 19 |
+
with open(path, 'rb') as f:
|
| 20 |
+
img_bytes = f.read()
|
| 21 |
+
b64 = base64.b64encode(img_bytes).decode('utf-8')
|
| 22 |
+
data_uri = f'data:image/png;base64,{b64}'
|
| 23 |
+
|
| 24 |
+
zen_key = os.getenv('ZEN_API_KEY')
|
| 25 |
+
if zen_key:
|
| 26 |
+
payload = {
|
| 27 |
+
'model': 'deepseek-v4-flash-free',
|
| 28 |
+
'messages': [{
|
| 29 |
+
'role': 'user',
|
| 30 |
+
'content': [
|
| 31 |
+
{'type': 'text', 'text': 'Describe this image briefly.'},
|
| 32 |
+
{'type': 'image_url', 'image_url': {'url': data_uri}},
|
| 33 |
+
]
|
| 34 |
+
}],
|
| 35 |
+
'max_tokens': 512,
|
| 36 |
+
}
|
| 37 |
+
try:
|
| 38 |
+
resp = requests.post(
|
| 39 |
+
'https://opencode.ai/zen/v1/chat/completions',
|
| 40 |
+
headers={
|
| 41 |
+
'Authorization': f'Bearer {zen_key}',
|
| 42 |
+
'Content-Type': 'application/json',
|
| 43 |
+
},
|
| 44 |
+
json=payload,
|
| 45 |
+
timeout=60
|
| 46 |
+
)
|
| 47 |
+
print(f'\nZEN deepseek-v4-flash-free vision: Status {resp.status_code}')
|
| 48 |
+
if resp.status_code == 200:
|
| 49 |
+
print(f'Response: {resp.json()["choices"][0]["message"]["content"][:500]}')
|
| 50 |
+
else:
|
| 51 |
+
print(f'Error: {resp.text[:300]}')
|
| 52 |
+
except Exception as e:
|
| 53 |
+
print(f'Exception: {e}')
|
_test_zen_vision_all.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os, base64, requests
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
load_dotenv()
|
| 4 |
+
|
| 5 |
+
from huggingface_hub import hf_hub_download
|
| 6 |
+
fn = 'cca530fc-4052-43b2-b130-b30968d8aa44.png'
|
| 7 |
+
token = os.getenv('HF_TOKEN')
|
| 8 |
+
path = hf_hub_download(repo_id='gaia-benchmark/GAIA', filename=f'2023/validation/{fn}', repo_type='dataset', token=token)
|
| 9 |
+
|
| 10 |
+
with open(path, 'rb') as f:
|
| 11 |
+
img_bytes = f.read()
|
| 12 |
+
b64 = base64.b64encode(img_bytes).decode('utf-8')
|
| 13 |
+
data_uri = f'data:image/png;base64,{b64}'
|
| 14 |
+
|
| 15 |
+
zen_key = os.getenv('ZEN_API_KEY')
|
| 16 |
+
models = ['deepseek-v4-flash-free', 'nemotron-3-super-free', 'big-pickle']
|
| 17 |
+
|
| 18 |
+
for model in models:
|
| 19 |
+
payload = {
|
| 20 |
+
'model': model,
|
| 21 |
+
'messages': [{
|
| 22 |
+
'role': 'user',
|
| 23 |
+
'content': [
|
| 24 |
+
{'type': 'text', 'text': 'Describe this image briefly.'},
|
| 25 |
+
{'type': 'image_url', 'image_url': {'url': data_uri}},
|
| 26 |
+
]
|
| 27 |
+
}],
|
| 28 |
+
'max_tokens': 512,
|
| 29 |
+
}
|
| 30 |
+
try:
|
| 31 |
+
resp = requests.post(
|
| 32 |
+
'https://opencode.ai/zen/v1/chat/completions',
|
| 33 |
+
headers={
|
| 34 |
+
'Authorization': f'Bearer {zen_key}',
|
| 35 |
+
'Content-Type': 'application/json',
|
| 36 |
+
},
|
| 37 |
+
json=payload,
|
| 38 |
+
timeout=60
|
| 39 |
+
)
|
| 40 |
+
print(f'{model}: Status {resp.status_code}')
|
| 41 |
+
if resp.status_code == 200:
|
| 42 |
+
print(f' OK: {resp.json()["choices"][0]["message"]["content"][:300]}')
|
| 43 |
+
else:
|
| 44 |
+
err = resp.json().get('error', {}).get('message', resp.text)[:150]
|
| 45 |
+
print(f' Error: {err}')
|
| 46 |
+
except Exception as e:
|
| 47 |
+
print(f'{model}: Exception: {e}')
|
agent.py
CHANGED
|
@@ -52,15 +52,16 @@ TOOL SELECTION:
|
|
| 52 |
- parse_spreadsheet: Excel (.xlsx) and CSV files. Always use this for spreadsheets.
|
| 53 |
- transcribe_audio: Audio files (.mp3, .wav). Call once, use the returned text immediately.
|
| 54 |
- python_repl: Calculations, data analysis, file processing. Variables persist between calls.
|
| 55 |
-
- get_youtube_transcript: YouTube video transcripts only.
|
|
|
|
| 56 |
|
| 57 |
EFFICIENCY RULES (save tool calls):
|
| 58 |
-
- After transcribe_audio returns text, USE IT. Do NOT call read_file on audio, do NOT search for audio files with python_repl.
|
| 59 |
-
- After read_file shows a .py script, run it with exec(open(path).read()) — do NOT rewrite the code.
|
| 60 |
-
- After getting a tool result, analyze it. Do not search again with slightly different queries.
|
| 61 |
-
- Never call the same tool with the same arguments twice.
|
| 62 |
-
-
|
| 63 |
-
- Don't install packages in python_repl — use what's already available."""
|
| 64 |
|
| 65 |
|
| 66 |
def call_model(state: AgentState):
|
|
|
|
| 52 |
- parse_spreadsheet: Excel (.xlsx) and CSV files. Always use this for spreadsheets.
|
| 53 |
- transcribe_audio: Audio files (.mp3, .wav). Call once, use the returned text immediately.
|
| 54 |
- python_repl: Calculations, data analysis, file processing. Variables persist between calls.
|
| 55 |
+
- get_youtube_transcript: YouTube video transcripts only.
|
| 56 |
+
- describe_image: Images (.png/.jpg/.jpeg). Use this to describe what's VISIBLE in an image (text, numbers, diagrams, chess pieces, charts). Call once and use the description.
|
| 57 |
|
| 58 |
EFFICIENCY RULES (save tool calls):
|
| 59 |
+
- After transcribe_audio returns text, USE IT. Do NOT call read_file on audio, do NOT search for audio files with python_repl.
|
| 60 |
+
- After read_file shows a .py script, run it with exec(open(path).read()) — do NOT rewrite the code.
|
| 61 |
+
- After getting a tool result, analyze it. Do not search again with slightly different queries.
|
| 62 |
+
- Never call the same tool with the same arguments twice.
|
| 63 |
+
- After describe_image returns a description, USE IT. Do not call describe_image again on the same file.
|
| 64 |
+
- Don't install packages in python_repl — use what's already available."""
|
| 65 |
|
| 66 |
|
| 67 |
def call_model(state: AgentState):
|
app.py
CHANGED
|
@@ -181,9 +181,11 @@ def run_and_submit_all(profile: Optional[gr.OAuthProfile] = None):
|
|
| 181 |
".csv": "Tip: This is a CSV file. Use parse_spreadsheet to read it.",
|
| 182 |
".mp3": "Tip: This is an audio file. Use transcribe_audio to transcribe it.",
|
| 183 |
".wav": "Tip: This is an audio file. Use transcribe_audio to transcribe it.",
|
| 184 |
-
".png": "Tip: This is an image
|
| 185 |
-
|
| 186 |
-
".
|
|
|
|
|
|
|
| 187 |
".py": "Tip: This is a Python script. Use read_file to view, then python_repl with exec(open(path).read()) to run it directly.",
|
| 188 |
".pdf": "Tip: This is a PDF. Use read_file or python_repl with PyPDF2 to read it.",
|
| 189 |
}
|
|
|
|
| 181 |
".csv": "Tip: This is a CSV file. Use parse_spreadsheet to read it.",
|
| 182 |
".mp3": "Tip: This is an audio file. Use transcribe_audio to transcribe it.",
|
| 183 |
".wav": "Tip: This is an audio file. Use transcribe_audio to transcribe it.",
|
| 184 |
+
".png": "Tip: This is an image. Use describe_image to describe what is visible.",
|
| 185 |
+
|
| 186 |
+
".jpg": "Tip: This is an image. Use describe_image to describe what is visible.",
|
| 187 |
+
|
| 188 |
+
".jpeg": "Tip: This is an image. Use describe_image to describe what is visible.",
|
| 189 |
".py": "Tip: This is a Python script. Use read_file to view, then python_repl with exec(open(path).read()) to run it directly.",
|
| 190 |
".pdf": "Tip: This is a PDF. Use read_file or python_repl with PyPDF2 to read it.",
|
| 191 |
}
|
gaia_results.csv
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
task_id,question,submitted_answer,ground_truth,correct
|
| 2 |
-
8e867cd7-cff9-4e6c-867a-ff5ddc2550be,How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.,
|
| 3 |
a1e91b78-d3d8-4675-bb8d-62741b4b68a6,"In the video https://www.youtube.com/watch?v=L1vXCYZAYYM, what is the highest number of bird species to be on camera simultaneously?",,3,False
|
| 4 |
2d83110e-a098-4ebb-9987-066c06fa42d0,".rewsna eht sa ""tfel"" drow eht fo etisoppo eht etirw ,ecnetnes siht dnatsrednu uoy fI",right,Right,True
|
| 5 |
cca530fc-4052-43b2-b130-b30968d8aa44,Review the chess position provided in the image. It is black's turn. Provide the correct next move for black which guarantees a win. Please provide your response in algebraic notation.,,Rd5,False
|
| 6 |
-
4fc2f1ae-8625-45b5-ab34-ad4433bc21f8,Who nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in November 2016?,,FunkMonk,
|
| 7 |
6f37996b-2ac7-44b0-8e68-6d28256631b4,"Given this table defining * on the set S = {a, b, c, d, e}
|
| 8 |
|
| 9 |
|*|a|b|c|d|e|
|
|
@@ -14,30 +14,30 @@ cca530fc-4052-43b2-b130-b30968d8aa44,Review the chess position provided in the i
|
|
| 14 |
|d|b|e|b|e|d|
|
| 15 |
|e|d|b|a|d|c|
|
| 16 |
|
| 17 |
-
provide the subset of S involved in any possible counter-examples that prove * is not commutative. Provide your answer as a comma separated list of the elements in the set in alphabetical order.","b,e","b, e",True
|
| 18 |
9d191bce-651d-4746-be2d-7ef8ecadb9c2,"Examine the video at https://www.youtube.com/watch?v=1htKBjuUWec.
|
| 19 |
|
| 20 |
What does Teal'c say in response to the question ""Isn't that hot?""",,Extremely,False
|
| 21 |
-
cabe07ed-9eca-40ea-8ead-410ef5e83f91,What is the surname of the equine veterinarian mentioned in 1.E Exercises from the chemistry materials licensed by Marisa Alviar-Agnew & Henry Agnew under the CK-12 license in LibreText's Introductory Chemistry materials as compiled 08/21/2023?,,Louvrier,
|
| 22 |
3cef3a44-215e-4aed-8e3b-b1e3f08063b7,"I'm making a grocery list for my mom, but she's a professor of botany and she's a real stickler when it comes to categorizing things. I need to add different foods to different categories on the grocery list, but if I make a mistake, she won't buy anything inserted in the wrong category. Here's the list I have so far:
|
| 23 |
|
| 24 |
milk, eggs, flour, whole bean coffee, Oreos, sweet potatoes, fresh basil, plums, green beans, rice, corn, bell pepper, whole allspice, acorns, broccoli, celery, zucchini, lettuce, peanuts
|
| 25 |
|
| 26 |
-
I need to make headings for the fruits and vegetables. Could you please create a list of just the vegetables from my list? If you could do that, then I can figure out how to categorize the rest of the list into the appropriate categories. But remember that my mom is a real stickler, so make sure that no botanical fruits end up on the vegetable list, or she won't get them when she's at the store. Please alphabetize the list of vegetables, and place each item in a comma separated list.","broccoli, celery, fresh basil, lettuce, sweet
|
| 27 |
99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3,"Hi, I'm making a pie but I could use some help with my shopping list. I have everything I need for the crust, but I'm not sure about the filling. I got the recipe from my friend Aditi, but she left it as a voice memo and the speaker on my phone is buzzing so I can't quite make out what she's saying. Could you please listen to the recipe and list all of the ingredients that my friend described? I only want the ingredients for the filling, as I have everything I need to make my favorite pie crust. I've attached the recipe as Strawberry pie.mp3.
|
| 28 |
|
| 29 |
In your response, please only list the ingredients, not any measurements. So if the recipe calls for ""a pinch of salt"" or ""two cups of ripe strawberries"" the ingredients on the list would be ""salt"" and ""ripe strawberries"".
|
| 30 |
|
| 31 |
Please format your response as a comma separated list of ingredients. Also, please alphabetize the ingredients.",,"cornstarch, freshly squeezed lemon juice, granulated sugar, pure vanilla extract, ripe strawberries",False
|
| 32 |
305ac316-eef6-4446-960a-92d80d542f82,Who did the actor who played Ray in the Polish-language version of Everybody Loves Raymond play in Magda M.? Give only the first name.,,Wojciech,False
|
| 33 |
-
f918266a-b3e0-4914-865d-4faa564f1aef,What is the final numeric output from the attached Python code?,
|
| 34 |
3f57289b-8c60-48be-bd80-01f8099ca449,How many at bats did the Yankee with the most walks in the 1977 regular season have that same season?,,519,False
|
| 35 |
1f975693-876d-457b-a649-393859e79bf3,"Hi, I was out sick from my classes on Friday, so I'm trying to figure out what I need to study for my Calculus mid-term next week. My friend from class sent me an audio recording of Professor Willowbrook giving out the recommended reading for the test, but my headphones are broken :(
|
| 36 |
|
| 37 |
Could you please listen to the recording for me and tell me the page numbers I'm supposed to go over? I've attached a file called Homework.mp3 that has the recording. Please provide just the page numbers as a comma-delimited list. And please provide the list in ascending order.",,"132, 133, 134, 197, 245",False
|
| 38 |
840bfca7-4f7b-481a-8794-c560c340185d,"On June 6, 2023, an article by Carolyn Collins Petersen was published in Universe Today. This article mentions a team that produced a paper about their observations, linked at the bottom of the article. Find this paper. Under what NASA award number was the work performed by R. G. Arendt supported by?",,80GSFC21M0002,False
|
| 39 |
-
bda648d7-d618-4883-88f4-3466eabd860e,Where were the Vietnamese specimens described by Kuznetzov in Nedoshivina's 2010 paper eventually deposited? Just give me the city name without abbreviations.,,Saint Petersburg,
|
| 40 |
-
cf106601-ab4f-4af9-b045-5295fe67b37d,"What country had the least number of athletes at the 1928 Summer Olympics? If there's a tie for a number of athletes, return the first in alphabetical order. Give the IOC country code as your answer.",
|
| 41 |
-
a0c07678-e491-4bbc-8f0b-07405144218f,"Who are the pitchers with the number before and after Taishō Tamai's number as of July 2023? Give them to me in the form Pitcher Before, Pitcher After, use their last names only, in Roman characters.",
|
| 42 |
7bd855d8-463d-4ed5-93ca-5fe35145f733,The attached Excel file contains the sales of menu items for a local fast-food chain. What were the total sales that the chain made from food (not including drinks)? Express your answer in USD with two decimal places.,89706.00,89706.00,True
|
| 43 |
-
5a0c1adf-205e-4841-a666-7c3ef95def9d,What is the first name of the only Malko Competition recipient from the 20th Century (after 1977) whose nationality on record is a country that no longer exists?,
|
|
|
|
| 1 |
task_id,question,submitted_answer,ground_truth,correct
|
| 2 |
+
8e867cd7-cff9-4e6c-867a-ff5ddc2550be,How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.,3,3,True
|
| 3 |
a1e91b78-d3d8-4675-bb8d-62741b4b68a6,"In the video https://www.youtube.com/watch?v=L1vXCYZAYYM, what is the highest number of bird species to be on camera simultaneously?",,3,False
|
| 4 |
2d83110e-a098-4ebb-9987-066c06fa42d0,".rewsna eht sa ""tfel"" drow eht fo etisoppo eht etirw ,ecnetnes siht dnatsrednu uoy fI",right,Right,True
|
| 5 |
cca530fc-4052-43b2-b130-b30968d8aa44,Review the chess position provided in the image. It is black's turn. Provide the correct next move for black which guarantees a win. Please provide your response in algebraic notation.,,Rd5,False
|
| 6 |
+
4fc2f1ae-8625-45b5-ab34-ad4433bc21f8,Who nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in November 2016?,FunkMonk,FunkMonk,True
|
| 7 |
6f37996b-2ac7-44b0-8e68-6d28256631b4,"Given this table defining * on the set S = {a, b, c, d, e}
|
| 8 |
|
| 9 |
|*|a|b|c|d|e|
|
|
|
|
| 14 |
|d|b|e|b|e|d|
|
| 15 |
|e|d|b|a|d|c|
|
| 16 |
|
| 17 |
+
provide the subset of S involved in any possible counter-examples that prove * is not commutative. Provide your answer as a comma separated list of the elements in the set in alphabetical order.","b, e","b, e",True
|
| 18 |
9d191bce-651d-4746-be2d-7ef8ecadb9c2,"Examine the video at https://www.youtube.com/watch?v=1htKBjuUWec.
|
| 19 |
|
| 20 |
What does Teal'c say in response to the question ""Isn't that hot?""",,Extremely,False
|
| 21 |
+
cabe07ed-9eca-40ea-8ead-410ef5e83f91,What is the surname of the equine veterinarian mentioned in 1.E Exercises from the chemistry materials licensed by Marisa Alviar-Agnew & Henry Agnew under the CK-12 license in LibreText's Introductory Chemistry materials as compiled 08/21/2023?,Louvrier,Louvrier,True
|
| 22 |
3cef3a44-215e-4aed-8e3b-b1e3f08063b7,"I'm making a grocery list for my mom, but she's a professor of botany and she's a real stickler when it comes to categorizing things. I need to add different foods to different categories on the grocery list, but if I make a mistake, she won't buy anything inserted in the wrong category. Here's the list I have so far:
|
| 23 |
|
| 24 |
milk, eggs, flour, whole bean coffee, Oreos, sweet potatoes, fresh basil, plums, green beans, rice, corn, bell pepper, whole allspice, acorns, broccoli, celery, zucchini, lettuce, peanuts
|
| 25 |
|
| 26 |
+
I need to make headings for the fruits and vegetables. Could you please create a list of just the vegetables from my list? If you could do that, then I can figure out how to categorize the rest of the list into the appropriate categories. But remember that my mom is a real stickler, so make sure that no botanical fruits end up on the vegetable list, or she won't get them when she's at the store. Please alphabetize the list of vegetables, and place each item in a comma separated list.","broccoli, celery, fresh basil, lettuce, sweet potatoes","broccoli, celery, fresh basil, lettuce, sweet potatoes",True
|
| 27 |
99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3,"Hi, I'm making a pie but I could use some help with my shopping list. I have everything I need for the crust, but I'm not sure about the filling. I got the recipe from my friend Aditi, but she left it as a voice memo and the speaker on my phone is buzzing so I can't quite make out what she's saying. Could you please listen to the recipe and list all of the ingredients that my friend described? I only want the ingredients for the filling, as I have everything I need to make my favorite pie crust. I've attached the recipe as Strawberry pie.mp3.
|
| 28 |
|
| 29 |
In your response, please only list the ingredients, not any measurements. So if the recipe calls for ""a pinch of salt"" or ""two cups of ripe strawberries"" the ingredients on the list would be ""salt"" and ""ripe strawberries"".
|
| 30 |
|
| 31 |
Please format your response as a comma separated list of ingredients. Also, please alphabetize the ingredients.",,"cornstarch, freshly squeezed lemon juice, granulated sugar, pure vanilla extract, ripe strawberries",False
|
| 32 |
305ac316-eef6-4446-960a-92d80d542f82,Who did the actor who played Ray in the Polish-language version of Everybody Loves Raymond play in Magda M.? Give only the first name.,,Wojciech,False
|
| 33 |
+
f918266a-b3e0-4914-865d-4faa564f1aef,What is the final numeric output from the attached Python code?,,0,False
|
| 34 |
3f57289b-8c60-48be-bd80-01f8099ca449,How many at bats did the Yankee with the most walks in the 1977 regular season have that same season?,,519,False
|
| 35 |
1f975693-876d-457b-a649-393859e79bf3,"Hi, I was out sick from my classes on Friday, so I'm trying to figure out what I need to study for my Calculus mid-term next week. My friend from class sent me an audio recording of Professor Willowbrook giving out the recommended reading for the test, but my headphones are broken :(
|
| 36 |
|
| 37 |
Could you please listen to the recording for me and tell me the page numbers I'm supposed to go over? I've attached a file called Homework.mp3 that has the recording. Please provide just the page numbers as a comma-delimited list. And please provide the list in ascending order.",,"132, 133, 134, 197, 245",False
|
| 38 |
840bfca7-4f7b-481a-8794-c560c340185d,"On June 6, 2023, an article by Carolyn Collins Petersen was published in Universe Today. This article mentions a team that produced a paper about their observations, linked at the bottom of the article. Find this paper. Under what NASA award number was the work performed by R. G. Arendt supported by?",,80GSFC21M0002,False
|
| 39 |
+
bda648d7-d618-4883-88f4-3466eabd860e,Where were the Vietnamese specimens described by Kuznetzov in Nedoshivina's 2010 paper eventually deposited? Just give me the city name without abbreviations.,Saint Petersburg,Saint Petersburg,True
|
| 40 |
+
cf106601-ab4f-4af9-b045-5295fe67b37d,"What country had the least number of athletes at the 1928 Summer Olympics? If there's a tie for a number of athletes, return the first in alphabetical order. Give the IOC country code as your answer.",,CUB,False
|
| 41 |
+
a0c07678-e491-4bbc-8f0b-07405144218f,"Who are the pitchers with the number before and after Taishō Tamai's number as of July 2023? Give them to me in the form Pitcher Before, Pitcher After, use their last names only, in Roman characters.",,"Yoshida, Uehara",False
|
| 42 |
7bd855d8-463d-4ed5-93ca-5fe35145f733,The attached Excel file contains the sales of menu items for a local fast-food chain. What were the total sales that the chain made from food (not including drinks)? Express your answer in USD with two decimal places.,89706.00,89706.00,True
|
| 43 |
+
5a0c1adf-205e-4841-a666-7c3ef95def9d,What is the first name of the only Malko Competition recipient from the 20th Century (after 1977) whose nationality on record is a country that no longer exists?,,Claus,False
|
gaia_results.json
CHANGED
|
@@ -1,14 +1,14 @@
|
|
| 1 |
{
|
| 2 |
-
"score":
|
| 3 |
-
"correct":
|
| 4 |
"total": 20,
|
| 5 |
"results": [
|
| 6 |
{
|
| 7 |
"task_id": "8e867cd7-cff9-4e6c-867a-ff5ddc2550be",
|
| 8 |
"question": "How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.",
|
| 9 |
-
"submitted_answer": "
|
| 10 |
"ground_truth": "3",
|
| 11 |
-
"correct":
|
| 12 |
},
|
| 13 |
{
|
| 14 |
"task_id": "a1e91b78-d3d8-4675-bb8d-62741b4b68a6",
|
|
@@ -34,14 +34,14 @@
|
|
| 34 |
{
|
| 35 |
"task_id": "4fc2f1ae-8625-45b5-ab34-ad4433bc21f8",
|
| 36 |
"question": "Who nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in November 2016?",
|
| 37 |
-
"submitted_answer": "",
|
| 38 |
"ground_truth": "FunkMonk",
|
| 39 |
-
"correct":
|
| 40 |
},
|
| 41 |
{
|
| 42 |
"task_id": "6f37996b-2ac7-44b0-8e68-6d28256631b4",
|
| 43 |
"question": "Given this table defining * on the set S = {a, b, c, d, e}\n\n|*|a|b|c|d|e|\n|---|---|---|---|---|---|\n|a|a|b|c|b|d|\n|b|b|c|a|e|c|\n|c|c|a|b|b|a|\n|d|b|e|b|e|d|\n|e|d|b|a|d|c|\n\nprovide the subset of S involved in any possible counter-examples that prove * is not commutative. Provide your answer as a comma separated list of the elements in the set in alphabetical order.",
|
| 44 |
-
"submitted_answer": "b,e",
|
| 45 |
"ground_truth": "b, e",
|
| 46 |
"correct": true
|
| 47 |
},
|
|
@@ -55,16 +55,16 @@
|
|
| 55 |
{
|
| 56 |
"task_id": "cabe07ed-9eca-40ea-8ead-410ef5e83f91",
|
| 57 |
"question": "What is the surname of the equine veterinarian mentioned in 1.E Exercises from the chemistry materials licensed by Marisa Alviar-Agnew & Henry Agnew under the CK-12 license in LibreText's Introductory Chemistry materials as compiled 08/21/2023?",
|
| 58 |
-
"submitted_answer": "",
|
| 59 |
"ground_truth": "Louvrier",
|
| 60 |
-
"correct":
|
| 61 |
},
|
| 62 |
{
|
| 63 |
"task_id": "3cef3a44-215e-4aed-8e3b-b1e3f08063b7",
|
| 64 |
"question": "I'm making a grocery list for my mom, but she's a professor of botany and she's a real stickler when it comes to categorizing things. I need to add different foods to different categories on the grocery list, but if I make a mistake, she won't buy anything inserted in the wrong category. Here's the list I have so far:\n\nmilk, eggs, flour, whole bean coffee, Oreos, sweet potatoes, fresh basil, plums, green beans, rice, corn, bell pepper, whole allspice, acorns, broccoli, celery, zucchini, lettuce, peanuts\n\nI need to make headings for the fruits and vegetables. Could you please create a list of just the vegetables from my list? If you could do that, then I can figure out how to categorize the rest of the list into the appropriate categories. But remember that my mom is a real stickler, so make sure that no botanical fruits end up on the vegetable list, or she won't get them when she's at the store. Please alphabetize the list of vegetables, and place each item in a comma separated list.",
|
| 65 |
-
"submitted_answer": "broccoli, celery, fresh basil, lettuce, sweet
|
| 66 |
"ground_truth": "broccoli, celery, fresh basil, lettuce, sweet potatoes",
|
| 67 |
-
"correct":
|
| 68 |
},
|
| 69 |
{
|
| 70 |
"task_id": "99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3",
|
|
@@ -83,9 +83,9 @@
|
|
| 83 |
{
|
| 84 |
"task_id": "f918266a-b3e0-4914-865d-4faa564f1aef",
|
| 85 |
"question": "What is the final numeric output from the attached Python code?",
|
| 86 |
-
"submitted_answer": "
|
| 87 |
"ground_truth": "0",
|
| 88 |
-
"correct":
|
| 89 |
},
|
| 90 |
{
|
| 91 |
"task_id": "3f57289b-8c60-48be-bd80-01f8099ca449",
|
|
@@ -111,21 +111,21 @@
|
|
| 111 |
{
|
| 112 |
"task_id": "bda648d7-d618-4883-88f4-3466eabd860e",
|
| 113 |
"question": "Where were the Vietnamese specimens described by Kuznetzov in Nedoshivina's 2010 paper eventually deposited? Just give me the city name without abbreviations.",
|
| 114 |
-
"submitted_answer": "",
|
| 115 |
"ground_truth": "Saint Petersburg",
|
| 116 |
-
"correct":
|
| 117 |
},
|
| 118 |
{
|
| 119 |
"task_id": "cf106601-ab4f-4af9-b045-5295fe67b37d",
|
| 120 |
"question": "What country had the least number of athletes at the 1928 Summer Olympics? If there's a tie for a number of athletes, return the first in alphabetical order. Give the IOC country code as your answer.",
|
| 121 |
-
"submitted_answer": "
|
| 122 |
"ground_truth": "CUB",
|
| 123 |
-
"correct":
|
| 124 |
},
|
| 125 |
{
|
| 126 |
"task_id": "a0c07678-e491-4bbc-8f0b-07405144218f",
|
| 127 |
"question": "Who are the pitchers with the number before and after Taish\u014d Tamai's number as of July 2023? Give them to me in the form Pitcher Before, Pitcher After, use their last names only, in Roman characters.",
|
| 128 |
-
"submitted_answer": "
|
| 129 |
"ground_truth": "Yoshida, Uehara",
|
| 130 |
"correct": false
|
| 131 |
},
|
|
@@ -139,9 +139,9 @@
|
|
| 139 |
{
|
| 140 |
"task_id": "5a0c1adf-205e-4841-a666-7c3ef95def9d",
|
| 141 |
"question": "What is the first name of the only Malko Competition recipient from the 20th Century (after 1977) whose nationality on record is a country that no longer exists?",
|
| 142 |
-
"submitted_answer": "
|
| 143 |
"ground_truth": "Claus",
|
| 144 |
-
"correct":
|
| 145 |
}
|
| 146 |
]
|
| 147 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"score": 40.0,
|
| 3 |
+
"correct": 8,
|
| 4 |
"total": 20,
|
| 5 |
"results": [
|
| 6 |
{
|
| 7 |
"task_id": "8e867cd7-cff9-4e6c-867a-ff5ddc2550be",
|
| 8 |
"question": "How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.",
|
| 9 |
+
"submitted_answer": "3",
|
| 10 |
"ground_truth": "3",
|
| 11 |
+
"correct": true
|
| 12 |
},
|
| 13 |
{
|
| 14 |
"task_id": "a1e91b78-d3d8-4675-bb8d-62741b4b68a6",
|
|
|
|
| 34 |
{
|
| 35 |
"task_id": "4fc2f1ae-8625-45b5-ab34-ad4433bc21f8",
|
| 36 |
"question": "Who nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in November 2016?",
|
| 37 |
+
"submitted_answer": "FunkMonk",
|
| 38 |
"ground_truth": "FunkMonk",
|
| 39 |
+
"correct": true
|
| 40 |
},
|
| 41 |
{
|
| 42 |
"task_id": "6f37996b-2ac7-44b0-8e68-6d28256631b4",
|
| 43 |
"question": "Given this table defining * on the set S = {a, b, c, d, e}\n\n|*|a|b|c|d|e|\n|---|---|---|---|---|---|\n|a|a|b|c|b|d|\n|b|b|c|a|e|c|\n|c|c|a|b|b|a|\n|d|b|e|b|e|d|\n|e|d|b|a|d|c|\n\nprovide the subset of S involved in any possible counter-examples that prove * is not commutative. Provide your answer as a comma separated list of the elements in the set in alphabetical order.",
|
| 44 |
+
"submitted_answer": "b, e",
|
| 45 |
"ground_truth": "b, e",
|
| 46 |
"correct": true
|
| 47 |
},
|
|
|
|
| 55 |
{
|
| 56 |
"task_id": "cabe07ed-9eca-40ea-8ead-410ef5e83f91",
|
| 57 |
"question": "What is the surname of the equine veterinarian mentioned in 1.E Exercises from the chemistry materials licensed by Marisa Alviar-Agnew & Henry Agnew under the CK-12 license in LibreText's Introductory Chemistry materials as compiled 08/21/2023?",
|
| 58 |
+
"submitted_answer": "Louvrier",
|
| 59 |
"ground_truth": "Louvrier",
|
| 60 |
+
"correct": true
|
| 61 |
},
|
| 62 |
{
|
| 63 |
"task_id": "3cef3a44-215e-4aed-8e3b-b1e3f08063b7",
|
| 64 |
"question": "I'm making a grocery list for my mom, but she's a professor of botany and she's a real stickler when it comes to categorizing things. I need to add different foods to different categories on the grocery list, but if I make a mistake, she won't buy anything inserted in the wrong category. Here's the list I have so far:\n\nmilk, eggs, flour, whole bean coffee, Oreos, sweet potatoes, fresh basil, plums, green beans, rice, corn, bell pepper, whole allspice, acorns, broccoli, celery, zucchini, lettuce, peanuts\n\nI need to make headings for the fruits and vegetables. Could you please create a list of just the vegetables from my list? If you could do that, then I can figure out how to categorize the rest of the list into the appropriate categories. But remember that my mom is a real stickler, so make sure that no botanical fruits end up on the vegetable list, or she won't get them when she's at the store. Please alphabetize the list of vegetables, and place each item in a comma separated list.",
|
| 65 |
+
"submitted_answer": "broccoli, celery, fresh basil, lettuce, sweet potatoes",
|
| 66 |
"ground_truth": "broccoli, celery, fresh basil, lettuce, sweet potatoes",
|
| 67 |
+
"correct": true
|
| 68 |
},
|
| 69 |
{
|
| 70 |
"task_id": "99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3",
|
|
|
|
| 83 |
{
|
| 84 |
"task_id": "f918266a-b3e0-4914-865d-4faa564f1aef",
|
| 85 |
"question": "What is the final numeric output from the attached Python code?",
|
| 86 |
+
"submitted_answer": "",
|
| 87 |
"ground_truth": "0",
|
| 88 |
+
"correct": false
|
| 89 |
},
|
| 90 |
{
|
| 91 |
"task_id": "3f57289b-8c60-48be-bd80-01f8099ca449",
|
|
|
|
| 111 |
{
|
| 112 |
"task_id": "bda648d7-d618-4883-88f4-3466eabd860e",
|
| 113 |
"question": "Where were the Vietnamese specimens described by Kuznetzov in Nedoshivina's 2010 paper eventually deposited? Just give me the city name without abbreviations.",
|
| 114 |
+
"submitted_answer": "Saint Petersburg",
|
| 115 |
"ground_truth": "Saint Petersburg",
|
| 116 |
+
"correct": true
|
| 117 |
},
|
| 118 |
{
|
| 119 |
"task_id": "cf106601-ab4f-4af9-b045-5295fe67b37d",
|
| 120 |
"question": "What country had the least number of athletes at the 1928 Summer Olympics? If there's a tie for a number of athletes, return the first in alphabetical order. Give the IOC country code as your answer.",
|
| 121 |
+
"submitted_answer": "",
|
| 122 |
"ground_truth": "CUB",
|
| 123 |
+
"correct": false
|
| 124 |
},
|
| 125 |
{
|
| 126 |
"task_id": "a0c07678-e491-4bbc-8f0b-07405144218f",
|
| 127 |
"question": "Who are the pitchers with the number before and after Taish\u014d Tamai's number as of July 2023? Give them to me in the form Pitcher Before, Pitcher After, use their last names only, in Roman characters.",
|
| 128 |
+
"submitted_answer": "",
|
| 129 |
"ground_truth": "Yoshida, Uehara",
|
| 130 |
"correct": false
|
| 131 |
},
|
|
|
|
| 139 |
{
|
| 140 |
"task_id": "5a0c1adf-205e-4841-a666-7c3ef95def9d",
|
| 141 |
"question": "What is the first name of the only Malko Competition recipient from the 20th Century (after 1977) whose nationality on record is a country that no longer exists?",
|
| 142 |
+
"submitted_answer": "",
|
| 143 |
"ground_truth": "Claus",
|
| 144 |
+
"correct": false
|
| 145 |
}
|
| 146 |
]
|
| 147 |
}
|
run_local.py
CHANGED
|
@@ -137,9 +137,9 @@ def main():
|
|
| 137 |
".csv": "Tip: This is a CSV file. Use parse_spreadsheet to read it.",
|
| 138 |
".mp3": "Tip: This is an audio file. Use transcribe_audio to transcribe it.",
|
| 139 |
".wav": "Tip: This is an audio file. Use transcribe_audio to transcribe it.",
|
| 140 |
-
".png": "Tip: This is an image
|
| 141 |
-
".jpg": "Tip: This is an image
|
| 142 |
-
".jpeg": "Tip: This is an image
|
| 143 |
".py": "Tip: This is a Python script. Use read_file to view, then python_repl with exec(open(path).read()) to run it directly.",
|
| 144 |
".pdf": "Tip: This is a PDF. Use read_file or python_repl with PyPDF2 to read it.",
|
| 145 |
}
|
|
|
|
| 137 |
".csv": "Tip: This is a CSV file. Use parse_spreadsheet to read it.",
|
| 138 |
".mp3": "Tip: This is an audio file. Use transcribe_audio to transcribe it.",
|
| 139 |
".wav": "Tip: This is an audio file. Use transcribe_audio to transcribe it.",
|
| 140 |
+
".png": "Tip: This is an image. Use describe_image to describe what is visible.",
|
| 141 |
+
".jpg": "Tip: This is an image. Use describe_image to describe what is visible.",
|
| 142 |
+
".jpeg": "Tip: This is an image. Use describe_image to describe what is visible.",
|
| 143 |
".py": "Tip: This is a Python script. Use read_file to view, then python_repl with exec(open(path).read()) to run it directly.",
|
| 144 |
".pdf": "Tip: This is a PDF. Use read_file or python_repl with PyPDF2 to read it.",
|
| 145 |
}
|
tools/__init__.py
CHANGED
|
@@ -7,6 +7,7 @@ from tools.file.spreadsheet import parse_spreadsheet
|
|
| 7 |
from tools.python import python_repl
|
| 8 |
from tools.youtube import get_youtube_transcript
|
| 9 |
from tools.audio import transcribe_audio
|
|
|
|
| 10 |
|
| 11 |
__all__ = [
|
| 12 |
web_search,
|
|
@@ -18,6 +19,7 @@ __all__ = [
|
|
| 18 |
python_repl,
|
| 19 |
get_youtube_transcript,
|
| 20 |
transcribe_audio,
|
|
|
|
| 21 |
]
|
| 22 |
|
| 23 |
tools_by_name = {t.name: t for t in __all__}
|
|
|
|
| 7 |
from tools.python import python_repl
|
| 8 |
from tools.youtube import get_youtube_transcript
|
| 9 |
from tools.audio import transcribe_audio
|
| 10 |
+
from tools.vision.describe_image import describe_image
|
| 11 |
|
| 12 |
__all__ = [
|
| 13 |
web_search,
|
|
|
|
| 19 |
python_repl,
|
| 20 |
get_youtube_transcript,
|
| 21 |
transcribe_audio,
|
| 22 |
+
describe_image,
|
| 23 |
]
|
| 24 |
|
| 25 |
tools_by_name = {t.name: t for t in __all__}
|
tools/vision/__init__.py
ADDED
|
File without changes
|
tools/vision/describe_image.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from langchain_core.tools import tool
|
| 3 |
+
|
| 4 |
+
_pipe = None
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def _get_pipe():
|
| 8 |
+
global _pipe
|
| 9 |
+
if _pipe is None:
|
| 10 |
+
from transformers import pipeline
|
| 11 |
+
_pipe = pipeline(
|
| 12 |
+
"image-to-text",
|
| 13 |
+
model="Salesforce/blip-image-captioning-large",
|
| 14 |
+
)
|
| 15 |
+
return _pipe
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
@tool
|
| 19 |
+
def describe_image(path: str) -> str:
|
| 20 |
+
"""Describe an image file (.png, .jpg, .jpeg) using a local vision model. Returns a text description of what is visible. Use this for any image (chess boards, diagrams, charts, screenshots, photos). Call ONCE per image and use the description directly."""
|
| 21 |
+
if not path or not os.path.exists(path):
|
| 22 |
+
return "ERROR: Image file not found"
|
| 23 |
+
|
| 24 |
+
abs_path = os.path.abspath(path)
|
| 25 |
+
|
| 26 |
+
try:
|
| 27 |
+
from PIL import Image
|
| 28 |
+
img = Image.open(abs_path)
|
| 29 |
+
|
| 30 |
+
pipe = _get_pipe()
|
| 31 |
+
result = pipe(img, max_new_tokens=30)
|
| 32 |
+
desc = result[0]["generated_text"] if result else ""
|
| 33 |
+
|
| 34 |
+
w, h = img.size
|
| 35 |
+
info = f"Image: {w}x{h}px"
|
| 36 |
+
|
| 37 |
+
if not desc:
|
| 38 |
+
return f"{info}. No description generated."
|
| 39 |
+
|
| 40 |
+
return f"{info}. Description: {desc.strip()}"
|
| 41 |
+
except Exception as e:
|
| 42 |
+
return f"VISION_DESCRIPTION_ERROR: {e}"
|
| 43 |
+
|
| 44 |
+
|
tools/web/search.py
CHANGED
|
@@ -5,12 +5,11 @@ from langchain_core.tools import tool
|
|
| 5 |
@tool
|
| 6 |
def web_search(keywords: str) -> str:
|
| 7 |
"""Search the web. Use this for finding information, facts, URLs, and general web content. Returns title, URL, and snippet for each result."""
|
| 8 |
-
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"}
|
| 9 |
last_err = None
|
| 10 |
for attempt in range(3):
|
| 11 |
try:
|
| 12 |
from ddgs import DDGS
|
| 13 |
-
results = list(DDGS(
|
| 14 |
if not results:
|
| 15 |
return "NO_RESULTS"
|
| 16 |
formatted = []
|
|
|
|
| 5 |
@tool
|
| 6 |
def web_search(keywords: str) -> str:
|
| 7 |
"""Search the web. Use this for finding information, facts, URLs, and general web content. Returns title, URL, and snippet for each result."""
|
|
|
|
| 8 |
last_err = None
|
| 9 |
for attempt in range(3):
|
| 10 |
try:
|
| 11 |
from ddgs import DDGS
|
| 12 |
+
results = list(DDGS().text(keywords, max_results=5, backend="html"))
|
| 13 |
if not results:
|
| 14 |
return "NO_RESULTS"
|
| 15 |
formatted = []
|
tools/web/wiki.py
CHANGED
|
@@ -1,12 +1,38 @@
|
|
| 1 |
-
from langchain_community.document_loaders import WikipediaLoader
|
| 2 |
from langchain_core.tools import tool
|
| 3 |
|
| 4 |
|
| 5 |
@tool
|
| 6 |
def wiki_search(query: str) -> str:
|
| 7 |
-
"""Search Wikipedia."""
|
| 8 |
try:
|
| 9 |
-
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
except Exception as e:
|
| 12 |
return f"WIKI_ERROR: {e}"
|
|
|
|
|
|
|
| 1 |
from langchain_core.tools import tool
|
| 2 |
|
| 3 |
|
| 4 |
@tool
|
| 5 |
def wiki_search(query: str) -> str:
|
| 6 |
+
"""Search Wikipedia pages by keyword. Returns up to 5 matching page titles and short snippets. Use wiki_page after this to get full article text."""
|
| 7 |
try:
|
| 8 |
+
import requests
|
| 9 |
+
|
| 10 |
+
params = {
|
| 11 |
+
"action": "query",
|
| 12 |
+
"format": "json",
|
| 13 |
+
"list": "search",
|
| 14 |
+
"srsearch": query,
|
| 15 |
+
"srlimit": 5,
|
| 16 |
+
"srprop": "snippet",
|
| 17 |
+
}
|
| 18 |
+
resp = requests.get(
|
| 19 |
+
"https://en.wikipedia.org/w/api.php",
|
| 20 |
+
params=params,
|
| 21 |
+
headers={"User-Agent": "GAIA-Benchmark-Agent/1.0"},
|
| 22 |
+
timeout=15,
|
| 23 |
+
)
|
| 24 |
+
resp.raise_for_status()
|
| 25 |
+
data = resp.json()
|
| 26 |
+
results = data.get("query", {}).get("search", [])
|
| 27 |
+
if not results:
|
| 28 |
+
return "NO_RESULTS"
|
| 29 |
+
formatted = []
|
| 30 |
+
for r in results:
|
| 31 |
+
title = r.get("title", "")
|
| 32 |
+
snippet = r.get("snippet", "")
|
| 33 |
+
import html
|
| 34 |
+
snippet = html.unescape(snippet).replace("<span class=\"searchmatch\">", "").replace("</span>", "")
|
| 35 |
+
formatted.append(f"Title: {title}\nSnippet: {snippet[:300]}")
|
| 36 |
+
return "\n\n".join(formatted)
|
| 37 |
except Exception as e:
|
| 38 |
return f"WIKI_ERROR: {e}"
|