Sborole-Final-Assignment

Sleeping

App Files Files Community

Sborole commited on Dec 1, 2025

Commit

5085001

verified ·

1 Parent(s): 3811bfe

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -36

app.py CHANGED Viewed

@@ -48,7 +48,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
         repo_type="dataset"
     )
-    dataset = load_dataset(data_dir, "2023_level1", split="validation")
     print("Dataset", dataset)
     print("Length is ", len(dataset))
     print(type(dataset))
@@ -63,13 +63,12 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
                 id_to_path[ex["task_id"]] = full_path
     # The 'id_to_path' dictionary is essential for your file reading tool.
-    print(f"Mapped {len(id_to_path)} question IDs to resource files.")
     # 3. Run your Agent
     results_log = []
     answers_payload = []
-    files_base = os.path.join(data_dir, "2023", "test")
-    subset = dataset.select(range(20))
     space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
     if profile:
@@ -138,7 +137,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
          ]
     subset = dataset.filter(lambda example: example['task_id'] in target_task_ids)
     subset = subset.to_list()
     results_log = []
     answers_payload = []
@@ -149,39 +148,48 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
         question_text = item.get("Question")
         print(f"question_text is {question_text}")
         file_name = item.get("file_name")
-        file_path = ""
         file_content = None
-        if file_name:
-            file_path = os.path.join(files_base, file_name)
             # Decide binary or text
-            if file_name.endswith((".txt", ".py", ".csv", ".json")):
-                try:
-                    with open(file_path, "r", encoding="utf-8") as f:
-                        file_content = f.read()
-                        print(f"File Content is {file_content}, {file_path}")
-                except Exception as e:
-                    print(f"Error reading text file {file_path}: {e}")
-                    file_content = None
-            elif file_name.endswith(".docx"):
-                try:
-                    doc = Document(file_path)
-                    file_content = "\n".join([p.text for p in doc.paragraphs])
-                    print(f"Docx content loaded, {file_path}")
-                except Exception as e:
-                    print(f"Error reading docx file {file_path}: {e}")
-                    file_content = None
-            else:  # binary files like images, audio, video
-                try:
-                    with open(file_path, "rb") as f:
-                        file_content = f.read()
-                        print(f"Binary file loaded, {file_path}")
-                except Exception as e:
-                    print(f"Error reading binary file {file_path}: {e}")
-                    file_content = None
         if not task_id or question_text is None:

         repo_type="dataset"
     )
+    dataset = load_dataset(data_dir, "2023_level1", split="validation", cache_dir=data_dir)
     print("Dataset", dataset)
     print("Length is ", len(dataset))
     print(type(dataset))
                 id_to_path[ex["task_id"]] = full_path
     # The 'id_to_path' dictionary is essential for your file reading tool.
+    print(f"Mapped {len(id_to_path)} {id_to_path} question IDs to resource files.")
     # 3. Run your Agent
     results_log = []
     answers_payload = []
+    #files_base = os.path.join(data_dir, "2023", "test")
     space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
     if profile:
          ]
     subset = dataset.filter(lambda example: example['task_id'] in target_task_ids)
     subset = subset.to_list()
+    print(subset)
     results_log = []
     answers_payload = []
         question_text = item.get("Question")
         print(f"question_text is {question_text}")
         file_name = item.get("file_name")
+        print(f"File Name {file_name}")
+        file_path = id_to_path.get(task_id, None)
         file_content = None
+        if file_name and file_path:
+            exists = os.path.exists(file_path)
+            print("Checking file path")
+            debug_log.append({
+                "Task ID": task_id,
+                "File Name": file_name,
+                "Path Exists": "✅ YES" if exists else "❌ NO",
+                "Calculated Path": path_check
+            })
+            print(f"Attempting to load file at: {file_path} (Exists: {exists})")
+            if exists:
             # Decide binary or text
+                if file_name.endswith((".txt", ".py", ".csv", ".json")):
+                    try:
+                        with open(file_path, "r", encoding="utf-8") as f:
+                            file_content = f.read()
+                            print(f"File Content is {file_content}, {file_path}")
+                    except Exception as e:
+                        print(f"Error reading text file {file_path}: {e}")
+                        file_content = None
+                elif file_name.endswith(".docx"):
+                    try:
+                        doc = Document(file_path)
+                        file_content = "\n".join([p.text for p in doc.paragraphs])
+                        print(f"Docx content loaded, {file_path}")
+                    except Exception as e:
+                        print(f"Error reading docx file {file_path}: {e}")
+                        file_content = None
+                else:  # binary files like images, audio, video
+                    try:
+                        with open(file_path, "rb") as f:
+                            file_content = f.read()
+                            print(f"Binary file loaded, {file_path}")
+                    except Exception as e:
+                        print(f"Error reading binary file {file_path}: {e}")
+                        file_content = None
         if not task_id or question_text is None: