Spaces:
Sleeping
Sleeping
added capacity to use files associated to questions
Browse files
app.py
CHANGED
|
@@ -75,6 +75,10 @@ def log_backend_file_status(with_file, total_count: int, api_url: str):
|
|
| 75 |
def get_hf_token(profile: gr.OAuthProfile | None):
|
| 76 |
token = None
|
| 77 |
if profile:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
for attr in ("access_token", "token"):
|
| 79 |
token = getattr(profile, attr, None)
|
| 80 |
if token:
|
|
@@ -96,68 +100,61 @@ def get_hf_token(profile: gr.OAuthProfile | None):
|
|
| 96 |
)
|
| 97 |
if token:
|
| 98 |
print("Using token from environment.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
return token
|
| 100 |
|
| 101 |
|
| 102 |
-
def
|
| 103 |
-
|
| 104 |
try:
|
| 105 |
-
from huggingface_hub import
|
| 106 |
except Exception as e:
|
| 107 |
-
print(f"
|
| 108 |
-
return
|
| 109 |
-
|
| 110 |
-
token = get_hf_token(profile)
|
| 111 |
-
if not token:
|
| 112 |
-
print("Skipping GAIA file fetch (no HF toLangAgentken found in profile or env).")
|
| 113 |
-
return
|
| 114 |
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
if not matches:
|
| 132 |
-
matches = [p for p in gaia_files_cache if task_id in p]
|
| 133 |
-
if not matches:
|
| 134 |
-
print(f"GAIA file not found for task {task_id} (looking for {fname}).")
|
| 135 |
-
continue
|
| 136 |
-
match_path = matches[0]
|
| 137 |
-
try:
|
| 138 |
-
local_path = hf_hub_download(
|
| 139 |
-
gaia_repo,
|
| 140 |
-
match_path,
|
| 141 |
-
repo_type="dataset",
|
| 142 |
-
token=token,
|
| 143 |
-
)
|
| 144 |
-
print(f"Downloaded GAIA file for task {task_id} to {local_path}")
|
| 145 |
-
except Exception as e:
|
| 146 |
-
print(f"Failed to download GAIA file for task {task_id} ({match_path}): {e}")
|
| 147 |
|
| 148 |
|
| 149 |
-
def
|
| 150 |
if not file_name:
|
| 151 |
return None
|
|
|
|
| 152 |
candidate = os.path.join("validation", file_name)
|
| 153 |
if os.path.exists(candidate):
|
| 154 |
print(f"Local file found: {candidate}")
|
| 155 |
return candidate
|
| 156 |
-
print(f"No local file found (expected {candidate})")
|
| 157 |
-
return
|
| 158 |
|
| 159 |
|
| 160 |
-
def run_agent_on_questions(agent, questions_data):
|
| 161 |
results_log = []
|
| 162 |
answers_payload = []
|
| 163 |
print(f"Running agent on {len(questions_data)} questions...")
|
|
@@ -165,7 +162,7 @@ def run_agent_on_questions(agent, questions_data):
|
|
| 165 |
task_id = item.get("task_id")
|
| 166 |
question_text = item.get("question")
|
| 167 |
file_name = item.get("file_name")
|
| 168 |
-
file_path =
|
| 169 |
if not task_id or question_text is None:
|
| 170 |
print(f"Skipping item with missing task_id or question: {item}")
|
| 171 |
continue
|
|
@@ -253,9 +250,8 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
| 253 |
|
| 254 |
with_file = [q for q in questions_data if q.get("file_name")]
|
| 255 |
log_backend_file_status(with_file, len(questions_data), api_url)
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
answers_payload, results_log = run_agent_on_questions(agent, questions_data)
|
| 259 |
if not answers_payload:
|
| 260 |
print("Agent did not produce any answers to submit.")
|
| 261 |
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
|
|
|
| 75 |
def get_hf_token(profile: gr.OAuthProfile | None):
|
| 76 |
token = None
|
| 77 |
if profile:
|
| 78 |
+
try:
|
| 79 |
+
print("Profile attributes:", list(profile.__dict__.keys()))
|
| 80 |
+
except Exception as e:
|
| 81 |
+
print(f"Could not inspect profile attributes: {e}")
|
| 82 |
for attr in ("access_token", "token"):
|
| 83 |
token = getattr(profile, attr, None)
|
| 84 |
if token:
|
|
|
|
| 100 |
)
|
| 101 |
if token:
|
| 102 |
print("Using token from environment.")
|
| 103 |
+
if not token:
|
| 104 |
+
try:
|
| 105 |
+
from huggingface_hub import HfFolder
|
| 106 |
+
token = HfFolder.get_token()
|
| 107 |
+
if token:
|
| 108 |
+
print("Using token from local HF cache (huggingface-cli login).")
|
| 109 |
+
except Exception as e:
|
| 110 |
+
print(f"Could not load token from local HF cache: {e}")
|
| 111 |
+
if token:
|
| 112 |
+
# Avoid printing full token; show a short preview for debugging.
|
| 113 |
+
print(f"HF token obtained (length {len(token)}).")
|
| 114 |
+
else:
|
| 115 |
+
print("No HF token available from profile or environment.")
|
| 116 |
return token
|
| 117 |
|
| 118 |
|
| 119 |
+
def download_gaia_file(file_name: str, token: str | None):
|
| 120 |
+
"""Download a GAIA validation file by name from the pinned revision."""
|
| 121 |
try:
|
| 122 |
+
from huggingface_hub import hf_hub_download
|
| 123 |
except Exception as e:
|
| 124 |
+
print(f"Cannot download {file_name}: huggingface_hub unavailable ({e}).")
|
| 125 |
+
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
|
| 127 |
+
repo_id = "gaia-benchmark/GAIA"
|
| 128 |
+
revision = "86620fe7a265fdd074ea8d8c8b7a556a1058b0af"
|
| 129 |
+
path_in_repo = f"2023/validation/{file_name}"
|
| 130 |
+
try:
|
| 131 |
+
local_path = hf_hub_download(
|
| 132 |
+
repo_id=repo_id,
|
| 133 |
+
filename=path_in_repo,
|
| 134 |
+
repo_type="dataset",
|
| 135 |
+
token=token, # can be None if huggingface-cli cache is available
|
| 136 |
+
revision=revision,
|
| 137 |
+
)
|
| 138 |
+
print(f"Downloaded GAIA file {file_name} to {local_path}")
|
| 139 |
+
return local_path
|
| 140 |
+
except Exception as e:
|
| 141 |
+
print(f"Failed to download GAIA file {file_name}: {e}")
|
| 142 |
+
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
|
| 144 |
|
| 145 |
+
def resolve_file(file_name: str | None, token: str | None):
|
| 146 |
if not file_name:
|
| 147 |
return None
|
| 148 |
+
# Prefer local cache if present.
|
| 149 |
candidate = os.path.join("validation", file_name)
|
| 150 |
if os.path.exists(candidate):
|
| 151 |
print(f"Local file found: {candidate}")
|
| 152 |
return candidate
|
| 153 |
+
print(f"No local file found (expected {candidate}), trying GAIA download.")
|
| 154 |
+
return download_gaia_file(file_name, token)
|
| 155 |
|
| 156 |
|
| 157 |
+
def run_agent_on_questions(agent, questions_data, token: str | None):
|
| 158 |
results_log = []
|
| 159 |
answers_payload = []
|
| 160 |
print(f"Running agent on {len(questions_data)} questions...")
|
|
|
|
| 162 |
task_id = item.get("task_id")
|
| 163 |
question_text = item.get("question")
|
| 164 |
file_name = item.get("file_name")
|
| 165 |
+
file_path = resolve_file(file_name, token)
|
| 166 |
if not task_id or question_text is None:
|
| 167 |
print(f"Skipping item with missing task_id or question: {item}")
|
| 168 |
continue
|
|
|
|
| 250 |
|
| 251 |
with_file = [q for q in questions_data if q.get("file_name")]
|
| 252 |
log_backend_file_status(with_file, len(questions_data), api_url)
|
| 253 |
+
token = get_hf_token(profile)
|
| 254 |
+
answers_payload, results_log = run_agent_on_questions(agent, questions_data, token)
|
|
|
|
| 255 |
if not answers_payload:
|
| 256 |
print("Agent did not produce any answers to submit.")
|
| 257 |
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|