Sborole commited on
Commit
5085001
·
verified ·
1 Parent(s): 3811bfe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -36
app.py CHANGED
@@ -48,7 +48,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
48
  repo_type="dataset"
49
  )
50
 
51
- dataset = load_dataset(data_dir, "2023_level1", split="validation")
52
  print("Dataset", dataset)
53
  print("Length is ", len(dataset))
54
  print(type(dataset))
@@ -63,13 +63,12 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
63
  id_to_path[ex["task_id"]] = full_path
64
 
65
  # The 'id_to_path' dictionary is essential for your file reading tool.
66
- print(f"Mapped {len(id_to_path)} question IDs to resource files.")
67
 
68
  # 3. Run your Agent
69
  results_log = []
70
  answers_payload = []
71
- files_base = os.path.join(data_dir, "2023", "test")
72
- subset = dataset.select(range(20))
73
  space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
74
 
75
  if profile:
@@ -138,7 +137,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
138
  ]
139
  subset = dataset.filter(lambda example: example['task_id'] in target_task_ids)
140
  subset = subset.to_list()
141
-
142
 
143
  results_log = []
144
  answers_payload = []
@@ -149,39 +148,48 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
149
  question_text = item.get("Question")
150
  print(f"question_text is {question_text}")
151
  file_name = item.get("file_name")
152
-
153
- file_path = ""
154
  file_content = None
155
- if file_name:
156
- file_path = os.path.join(files_base, file_name)
157
-
 
 
 
 
 
 
 
 
 
158
  # Decide binary or text
159
- if file_name.endswith((".txt", ".py", ".csv", ".json")):
160
- try:
161
- with open(file_path, "r", encoding="utf-8") as f:
162
- file_content = f.read()
163
- print(f"File Content is {file_content}, {file_path}")
164
- except Exception as e:
165
- print(f"Error reading text file {file_path}: {e}")
166
- file_content = None
167
- elif file_name.endswith(".docx"):
168
- try:
169
- doc = Document(file_path)
170
- file_content = "\n".join([p.text for p in doc.paragraphs])
171
- print(f"Docx content loaded, {file_path}")
172
- except Exception as e:
173
- print(f"Error reading docx file {file_path}: {e}")
174
- file_content = None
175
-
176
-
177
- else: # binary files like images, audio, video
178
- try:
179
- with open(file_path, "rb") as f:
180
- file_content = f.read()
181
- print(f"Binary file loaded, {file_path}")
182
- except Exception as e:
183
- print(f"Error reading binary file {file_path}: {e}")
184
- file_content = None
185
 
186
 
187
  if not task_id or question_text is None:
 
48
  repo_type="dataset"
49
  )
50
 
51
+ dataset = load_dataset(data_dir, "2023_level1", split="validation", cache_dir=data_dir)
52
  print("Dataset", dataset)
53
  print("Length is ", len(dataset))
54
  print(type(dataset))
 
63
  id_to_path[ex["task_id"]] = full_path
64
 
65
  # The 'id_to_path' dictionary is essential for your file reading tool.
66
+ print(f"Mapped {len(id_to_path)} {id_to_path} question IDs to resource files.")
67
 
68
  # 3. Run your Agent
69
  results_log = []
70
  answers_payload = []
71
+ #files_base = os.path.join(data_dir, "2023", "test")
 
72
  space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
73
 
74
  if profile:
 
137
  ]
138
  subset = dataset.filter(lambda example: example['task_id'] in target_task_ids)
139
  subset = subset.to_list()
140
+ print(subset)
141
 
142
  results_log = []
143
  answers_payload = []
 
148
  question_text = item.get("Question")
149
  print(f"question_text is {question_text}")
150
  file_name = item.get("file_name")
151
+ print(f"File Name {file_name}")
152
+ file_path = id_to_path.get(task_id, None)
153
  file_content = None
154
+ if file_name and file_path:
155
+ exists = os.path.exists(file_path)
156
+ print("Checking file path")
157
+ debug_log.append({
158
+ "Task ID": task_id,
159
+ "File Name": file_name,
160
+ "Path Exists": "✅ YES" if exists else "❌ NO",
161
+ "Calculated Path": path_check
162
+ })
163
+ print(f"Attempting to load file at: {file_path} (Exists: {exists})")
164
+
165
+ if exists:
166
  # Decide binary or text
167
+ if file_name.endswith((".txt", ".py", ".csv", ".json")):
168
+ try:
169
+ with open(file_path, "r", encoding="utf-8") as f:
170
+ file_content = f.read()
171
+ print(f"File Content is {file_content}, {file_path}")
172
+ except Exception as e:
173
+ print(f"Error reading text file {file_path}: {e}")
174
+ file_content = None
175
+ elif file_name.endswith(".docx"):
176
+ try:
177
+ doc = Document(file_path)
178
+ file_content = "\n".join([p.text for p in doc.paragraphs])
179
+ print(f"Docx content loaded, {file_path}")
180
+ except Exception as e:
181
+ print(f"Error reading docx file {file_path}: {e}")
182
+ file_content = None
183
+
184
+
185
+ else: # binary files like images, audio, video
186
+ try:
187
+ with open(file_path, "rb") as f:
188
+ file_content = f.read()
189
+ print(f"Binary file loaded, {file_path}")
190
+ except Exception as e:
191
+ print(f"Error reading binary file {file_path}: {e}")
192
+ file_content = None
193
 
194
 
195
  if not task_id or question_text is None: