Sborole-Final-Assignment

Running

App Files Files Community

Sborole commited on Nov 21, 2025

Commit

314a7e7

verified ·

1 Parent(s): 6391ed8

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -14

app.py CHANGED Viewed

@@ -65,7 +65,9 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
-    for item in questions_data[:5]:
         print(f"ITEMS {item}")
         task_id = item.get("task_id")
         question_text = item.get("question")
@@ -74,20 +76,18 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
         file_path = ""
         if file_name:
-            file_url = f"{DEFAULT_API_URL}/files/{file_name}"
-            response = requests.get(file_url)
-            if response.status_code == 200:
-                if file_name.endswith((".txt", ".py")):
-                    file_content = response.text
-                else:
-                    # save images/videos/audio locally if needed
-                    file_content = response.content
-                os.makedirs("./files, exist_ok= True")
-                file_path = f"./files/{file_name}"
-                with open(file_path, "wb") as f:
-                    f.write(response.content)
             else:
-                print(f"File not found: {file_url} (status {response.status_code})")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
@@ -189,6 +189,16 @@ if __name__ == "__main__":
     space_host_startup = os.getenv("SPACE_HOST")
     space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
     if space_host_startup:
         print(f"✅ SPACE_HOST found: {space_host_startup}")
         print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")

     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
+    files_base = os.path.join(data_dir, "2023", "test")
+    subset = dataset.select(range(20))
+    for item in subset:
         print(f"ITEMS {item}")
         task_id = item.get("task_id")
         question_text = item.get("question")
         file_path = ""
         if file_name:
+            file_path = os.path.join(files_base, file_name)
+            # Decide binary or text
+            if file_name.endswith((".txt", ".py", ".csv", ".json")):
+                with open(file_path, "r", encoding="utf-8") as f:
+                    file_content = f.read()
             else:
+                with open(file_path, "rb") as f:
+                    file_content = f.read()
+        else:
+            print(f"File not found: {file_url} (status {response.status_code})")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
     space_host_startup = os.getenv("SPACE_HOST")
     space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
+    data_dir = snapshot_download(
+        repo_id="gaia-benchmark/GAIA",
+        repo_type="dataset"
+    )
+    dataset = load_dataset(data_dir, "2023_level1", split="test")
+    print("Dataset", dataset)
+    print(len(dataset))
+    print(type(dataset))
     if space_host_startup:
         print(f"✅ SPACE_HOST found: {space_host_startup}")
         print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")