Omlesna commited on
Commit
ffa5609
·
1 Parent(s): 43761d3

added capacity to use files associated to questions

Browse files
Files changed (1) hide show
  1. app.py +46 -50
app.py CHANGED
@@ -75,6 +75,10 @@ def log_backend_file_status(with_file, total_count: int, api_url: str):
75
  def get_hf_token(profile: gr.OAuthProfile | None):
76
  token = None
77
  if profile:
 
 
 
 
78
  for attr in ("access_token", "token"):
79
  token = getattr(profile, attr, None)
80
  if token:
@@ -96,68 +100,61 @@ def get_hf_token(profile: gr.OAuthProfile | None):
96
  )
97
  if token:
98
  print("Using token from environment.")
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  return token
100
 
101
 
102
- def try_fetch_from_gaia(with_file, profile: gr.OAuthProfile | None):
103
- gaia_repo = "gaia-benchmark/GAIA"
104
  try:
105
- from huggingface_hub import list_repo_files, hf_hub_download
106
  except Exception as e:
107
- print(f"Skipping GAIA file fetch (huggingface_hub not available): {e}")
108
- return
109
-
110
- token = get_hf_token(profile)
111
- if not token:
112
- print("Skipping GAIA file fetch (no HF toLangAgentken found in profile or env).")
113
- return
114
 
115
- gaia_files_cache = None
116
- for q in with_file:
117
- fname = q.get("file_name")
118
- task_id = q.get("task_id")
119
- if gaia_files_cache is None:
120
- try:
121
- gaia_files_cache = list_repo_files(
122
- gaia_repo, repo_type="dataset", token=token
123
- )
124
- print(f"GAIA repo file count: {len(gaia_files_cache)}")
125
- except Exception as e:
126
- print(f"Failed to list GAIA repo files: {e}")
127
- gaia_files_cache = []
128
- matches = []
129
- if gaia_files_cache:
130
- matches = [p for p in gaia_files_cache if p.endswith(fname)]
131
- if not matches:
132
- matches = [p for p in gaia_files_cache if task_id in p]
133
- if not matches:
134
- print(f"GAIA file not found for task {task_id} (looking for {fname}).")
135
- continue
136
- match_path = matches[0]
137
- try:
138
- local_path = hf_hub_download(
139
- gaia_repo,
140
- match_path,
141
- repo_type="dataset",
142
- token=token,
143
- )
144
- print(f"Downloaded GAIA file for task {task_id} to {local_path}")
145
- except Exception as e:
146
- print(f"Failed to download GAIA file for task {task_id} ({match_path}): {e}")
147
 
148
 
149
- def resolve_local_file(file_name: str | None):
150
  if not file_name:
151
  return None
 
152
  candidate = os.path.join("validation", file_name)
153
  if os.path.exists(candidate):
154
  print(f"Local file found: {candidate}")
155
  return candidate
156
- print(f"No local file found (expected {candidate})")
157
- return None
158
 
159
 
160
- def run_agent_on_questions(agent, questions_data):
161
  results_log = []
162
  answers_payload = []
163
  print(f"Running agent on {len(questions_data)} questions...")
@@ -165,7 +162,7 @@ def run_agent_on_questions(agent, questions_data):
165
  task_id = item.get("task_id")
166
  question_text = item.get("question")
167
  file_name = item.get("file_name")
168
- file_path = resolve_local_file(file_name)
169
  if not task_id or question_text is None:
170
  print(f"Skipping item with missing task_id or question: {item}")
171
  continue
@@ -253,9 +250,8 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
253
 
254
  with_file = [q for q in questions_data if q.get("file_name")]
255
  log_backend_file_status(with_file, len(questions_data), api_url)
256
- try_fetch_from_gaia(with_file, profile)
257
-
258
- answers_payload, results_log = run_agent_on_questions(agent, questions_data)
259
  if not answers_payload:
260
  print("Agent did not produce any answers to submit.")
261
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
 
75
  def get_hf_token(profile: gr.OAuthProfile | None):
76
  token = None
77
  if profile:
78
+ try:
79
+ print("Profile attributes:", list(profile.__dict__.keys()))
80
+ except Exception as e:
81
+ print(f"Could not inspect profile attributes: {e}")
82
  for attr in ("access_token", "token"):
83
  token = getattr(profile, attr, None)
84
  if token:
 
100
  )
101
  if token:
102
  print("Using token from environment.")
103
+ if not token:
104
+ try:
105
+ from huggingface_hub import HfFolder
106
+ token = HfFolder.get_token()
107
+ if token:
108
+ print("Using token from local HF cache (huggingface-cli login).")
109
+ except Exception as e:
110
+ print(f"Could not load token from local HF cache: {e}")
111
+ if token:
112
+ # Avoid printing full token; show a short preview for debugging.
113
+ print(f"HF token obtained (length {len(token)}).")
114
+ else:
115
+ print("No HF token available from profile or environment.")
116
  return token
117
 
118
 
119
+ def download_gaia_file(file_name: str, token: str | None):
120
+ """Download a GAIA validation file by name from the pinned revision."""
121
  try:
122
+ from huggingface_hub import hf_hub_download
123
  except Exception as e:
124
+ print(f"Cannot download {file_name}: huggingface_hub unavailable ({e}).")
125
+ return None
 
 
 
 
 
126
 
127
+ repo_id = "gaia-benchmark/GAIA"
128
+ revision = "86620fe7a265fdd074ea8d8c8b7a556a1058b0af"
129
+ path_in_repo = f"2023/validation/{file_name}"
130
+ try:
131
+ local_path = hf_hub_download(
132
+ repo_id=repo_id,
133
+ filename=path_in_repo,
134
+ repo_type="dataset",
135
+ token=token, # can be None if huggingface-cli cache is available
136
+ revision=revision,
137
+ )
138
+ print(f"Downloaded GAIA file {file_name} to {local_path}")
139
+ return local_path
140
+ except Exception as e:
141
+ print(f"Failed to download GAIA file {file_name}: {e}")
142
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
 
144
 
145
+ def resolve_file(file_name: str | None, token: str | None):
146
  if not file_name:
147
  return None
148
+ # Prefer local cache if present.
149
  candidate = os.path.join("validation", file_name)
150
  if os.path.exists(candidate):
151
  print(f"Local file found: {candidate}")
152
  return candidate
153
+ print(f"No local file found (expected {candidate}), trying GAIA download.")
154
+ return download_gaia_file(file_name, token)
155
 
156
 
157
+ def run_agent_on_questions(agent, questions_data, token: str | None):
158
  results_log = []
159
  answers_payload = []
160
  print(f"Running agent on {len(questions_data)} questions...")
 
162
  task_id = item.get("task_id")
163
  question_text = item.get("question")
164
  file_name = item.get("file_name")
165
+ file_path = resolve_file(file_name, token)
166
  if not task_id or question_text is None:
167
  print(f"Skipping item with missing task_id or question: {item}")
168
  continue
 
250
 
251
  with_file = [q for q in questions_data if q.get("file_name")]
252
  log_backend_file_status(with_file, len(questions_data), api_url)
253
+ token = get_hf_token(profile)
254
+ answers_payload, results_log = run_agent_on_questions(agent, questions_data, token)
 
255
  if not answers_payload:
256
  print("Agent did not produce any answers to submit.")
257
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)