Final_Assignment_Template

Sleeping

App Files Files Community

Omlesna commited on Nov 28, 2025

Commit

ffa5609

1 Parent(s): 43761d3

added capacity to use files associated to questions

Browse files

Files changed (1) hide show

app.py +46 -50

app.py CHANGED Viewed

@@ -75,6 +75,10 @@ def log_backend_file_status(with_file, total_count: int, api_url: str):
 def get_hf_token(profile: gr.OAuthProfile | None):
     token = None
     if profile:
         for attr in ("access_token", "token"):
             token = getattr(profile, attr, None)
             if token:
@@ -96,68 +100,61 @@ def get_hf_token(profile: gr.OAuthProfile | None):
         )
         if token:
             print("Using token from environment.")
     return token
-def try_fetch_from_gaia(with_file, profile: gr.OAuthProfile | None):
-    gaia_repo = "gaia-benchmark/GAIA"
     try:
-        from huggingface_hub import list_repo_files, hf_hub_download
     except Exception as e:
-        print(f"Skipping GAIA file fetch (huggingface_hub not available): {e}")
-        return
-    token = get_hf_token(profile)
-    if not token:
-        print("Skipping GAIA file fetch (no HF toLangAgentken found in profile or env).")
-        return
-    gaia_files_cache = None
-    for q in with_file:
-        fname = q.get("file_name")
-        task_id = q.get("task_id")
-        if gaia_files_cache is None:
-            try:
-                gaia_files_cache = list_repo_files(
-                    gaia_repo, repo_type="dataset", token=token
-                )
-                print(f"GAIA repo file count: {len(gaia_files_cache)}")
-            except Exception as e:
-                print(f"Failed to list GAIA repo files: {e}")
-                gaia_files_cache = []
-        matches = []
-        if gaia_files_cache:
-            matches = [p for p in gaia_files_cache if p.endswith(fname)]
-            if not matches:
-                matches = [p for p in gaia_files_cache if task_id in p]
-        if not matches:
-            print(f"GAIA file not found for task {task_id} (looking for {fname}).")
-            continue
-        match_path = matches[0]
-        try:
-            local_path = hf_hub_download(
-                gaia_repo,
-                match_path,
-                repo_type="dataset",
-                token=token,
-            )
-            print(f"Downloaded GAIA file for task {task_id} to {local_path}")
-        except Exception as e:
-            print(f"Failed to download GAIA file for task {task_id} ({match_path}): {e}")
-def resolve_local_file(file_name: str | None):
     if not file_name:
         return None
     candidate = os.path.join("validation", file_name)
     if os.path.exists(candidate):
         print(f"Local file found: {candidate}")
         return candidate
-    print(f"No local file found (expected {candidate})")
-    return None
-def run_agent_on_questions(agent, questions_data):
     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
@@ -165,7 +162,7 @@ def run_agent_on_questions(agent, questions_data):
         task_id = item.get("task_id")
         question_text = item.get("question")
         file_name = item.get("file_name")
-        file_path = resolve_local_file(file_name)
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
@@ -253,9 +250,8 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     with_file = [q for q in questions_data if q.get("file_name")]
     log_backend_file_status(with_file, len(questions_data), api_url)
-    try_fetch_from_gaia(with_file, profile)
-    answers_payload, results_log = run_agent_on_questions(agent, questions_data)
     if not answers_payload:
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)

 def get_hf_token(profile: gr.OAuthProfile | None):
     token = None
     if profile:
+        try:
+            print("Profile attributes:", list(profile.__dict__.keys()))
+        except Exception as e:
+            print(f"Could not inspect profile attributes: {e}")
         for attr in ("access_token", "token"):
             token = getattr(profile, attr, None)
             if token:
         )
         if token:
             print("Using token from environment.")
+    if not token:
+        try:
+            from huggingface_hub import HfFolder
+            token = HfFolder.get_token()
+            if token:
+                print("Using token from local HF cache (huggingface-cli login).")
+        except Exception as e:
+            print(f"Could not load token from local HF cache: {e}")
+    if token:
+        # Avoid printing full token; show a short preview for debugging.
+        print(f"HF token obtained (length {len(token)}).")
+    else:
+        print("No HF token available from profile or environment.")
     return token
+def download_gaia_file(file_name: str, token: str | None):
+    """Download a GAIA validation file by name from the pinned revision."""
     try:
+        from huggingface_hub import hf_hub_download
     except Exception as e:
+        print(f"Cannot download {file_name}: huggingface_hub unavailable ({e}).")
+        return None
+    repo_id = "gaia-benchmark/GAIA"
+    revision = "86620fe7a265fdd074ea8d8c8b7a556a1058b0af"
+    path_in_repo = f"2023/validation/{file_name}"
+    try:
+        local_path = hf_hub_download(
+            repo_id=repo_id,
+            filename=path_in_repo,
+            repo_type="dataset",
+            token=token,  # can be None if huggingface-cli cache is available
+            revision=revision,
+        )
+        print(f"Downloaded GAIA file {file_name} to {local_path}")
+        return local_path
+    except Exception as e:
+        print(f"Failed to download GAIA file {file_name}: {e}")
+        return None
+def resolve_file(file_name: str | None, token: str | None):
     if not file_name:
         return None
+    # Prefer local cache if present.
     candidate = os.path.join("validation", file_name)
     if os.path.exists(candidate):
         print(f"Local file found: {candidate}")
         return candidate
+    print(f"No local file found (expected {candidate}), trying GAIA download.")
+    return download_gaia_file(file_name, token)
+def run_agent_on_questions(agent, questions_data, token: str | None):
     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
         task_id = item.get("task_id")
         question_text = item.get("question")
         file_name = item.get("file_name")
+        file_path = resolve_file(file_name, token)
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
     with_file = [q for q in questions_data if q.get("file_name")]
     log_backend_file_status(with_file, len(questions_data), api_url)
+    token = get_hf_token(profile)
+    answers_payload, results_log = run_agent_on_questions(agent, questions_data, token)
     if not answers_payload:
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)