Spaces:

gabejavitt
/

agentCourse

Sleeping

App Files Files Community

gabejavitt commited on Nov 5, 2025

Commit

ed23d35

verified ·

1 Parent(s): 9329283

Update app.py

Browse files

Files changed (1) hide show

app.py +125 -24

app.py CHANGED Viewed

@@ -1421,34 +1421,49 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     print(agent_code)
     # 2. Fetch Questions
     print(f"Fetching questions from: {questions_url}")
     try:
         response = requests.get(questions_url, timeout=15)
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
-             print("Fetched questions list is empty.")
-             return "Fetched questions list is empty or invalid format.", None
-        print(f"Fetched {len(questions_data)} questions.")
     except requests.exceptions.RequestException as e:
-        print(f"Error fetching questions: {e}")
         return f"Error fetching questions: {e}", None
     except requests.exceptions.JSONDecodeError as e:
-         print(f"Error decoding JSON response from questions endpoint: {e}")
-         print(f"Response text: {response.text[:500]}")
-         return f"Error decoding server response for questions: {e}", None
     except Exception as e:
-        print(f"An unexpected error occurred fetching questions: {e}")
         return f"An unexpected error occurred fetching questions: {e}", None
     # 3. Run your Agent
     results_log = []
     answers_payload = []
-    print(f"Running agent on {len(questions_data)} questions...")
-    for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
         # Initialize file variables for the current question
         local_file_path = None
@@ -1461,29 +1476,31 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
             # Extract the original file name to preserve the extension
             original_filename = file_path_from_api.split('/')[-1]
-            # Set the path where the file will be saved locally
             local_file_path = original_filename
             print(f"📥 Downloading file for task {task_id}...")
             print(f"   URL: {file_download_url}")
             print(f"   Saving to: {local_file_path}")
             try:
                 file_response = requests.get(file_download_url, timeout=15)
                 file_response.raise_for_status()
                 with open(local_file_path, 'wb') as f:
                     f.write(file_response.content)
                 file_size = os.path.getsize(local_file_path)
                 print(f"✅ Downloaded file: {original_filename} ({file_size} bytes)")
-                # Add verification
                 if not os.path.exists(local_file_path):
                     print(f"⚠️ Warning: File saved but cannot be found at {local_file_path}")
                     local_file_path = None
                 else:
-                    print(f"✓ File accessible at: {os.path.abspath(local_file_path)}")  # Debug line
             except requests.exceptions.RequestException as e:
                 error_message = f"[FILE DOWNLOAD ERROR: Could not fetch file: {e}]"
@@ -1495,34 +1512,93 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
                 local_file_path = None
         if not task_id or question_text is None:
-            print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
             # Pass file_path to agent
             submitted_answer = agent(question_text, local_file_path)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
-            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
-             print(f"Error running agent on task {task_id}: {e}")
-             print(traceback.format_exc())
-             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
     if not answers_payload:
-        print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
     # 4. Prepare Submission
     submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
-    status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
-    print(status_update)
     # 5. Submit
-    print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
         response = requests.post(submit_url, json=submission_data, timeout=60)
         response.raise_for_status()
         result_data = response.json()
         final_status = (
             f"Submission Successful!\n"
             f"User: {result_data.get('username')}\n"
@@ -1530,9 +1606,14 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
             f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
             f"Message: {result_data.get('message', 'No message received.')}"
         )
-        print("Submission successful.")
         results_df = pd.DataFrame(results_log)
         return final_status, results_df
     except requests.exceptions.HTTPError as e:
         error_detail = f"Server responded with status {e.response.status_code}."
         try:
@@ -1541,22 +1622,42 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         except requests.exceptions.JSONDecodeError:
             error_detail += f" Response: {e.response.text[:500]}"
         status_message = f"Submission Failed: {error_detail}"
         print(status_message)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
     except requests.exceptions.Timeout:
         status_message = "Submission Failed: The request timed out."
         print(status_message)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
     except requests.exceptions.RequestException as e:
         status_message = f"Submission Failed: Network error - {e}"
         print(status_message)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
     except Exception as e:
         status_message = f"An unexpected error occurred during submission: {e}"
         print(status_message)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df

     print(agent_code)
     # 2. Fetch Questions
+    print(f"\n{'='*70}")
+    print(f"📥 FETCHING QUESTIONS")
+    print(f"{'='*70}")
     print(f"Fetching questions from: {questions_url}")
     try:
         response = requests.get(questions_url, timeout=15)
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
+            print("Fetched questions list is empty.")
+            return "Fetched questions list is empty or invalid format.", None
+        print(f"✅ Fetched {len(questions_data)} questions.")
+        print(f"{'='*70}\n")
     except requests.exceptions.RequestException as e:
+        print(f"❌ Error fetching questions: {e}")
         return f"Error fetching questions: {e}", None
     except requests.exceptions.JSONDecodeError as e:
+        print(f"❌ Error decoding JSON response from questions endpoint: {e}")
+        print(f"Response text: {response.text[:500]}")
+        return f"Error decoding server response for questions: {e}", None
     except Exception as e:
+        print(f"❌ An unexpected error occurred fetching questions: {e}")
         return f"An unexpected error occurred fetching questions: {e}", None
     # 3. Run your Agent
+    print(f"\n{'='*70}")
+    print(f"🚀 STARTING EVALUATION")
+    print(f"{'='*70}")
+    print(f"Total questions to process: {len(questions_data)}")
+    print(f"{'='*70}\n")
     results_log = []
     answers_payload = []
+    for idx, item in enumerate(questions_data, 1):
+        print(f"\n{'='*70}")
+        print(f"📝 PROCESSING QUESTION {idx}/{len(questions_data)}")
+        print(f"{'='*70}")
         task_id = item.get("task_id")
         question_text = item.get("question")
+        correct_answer = item.get("answer", "N/A")  # Get correct answer from API
         # Initialize file variables for the current question
         local_file_path = None
             # Extract the original file name to preserve the extension
             original_filename = file_path_from_api.split('/')[-1]
+            # Save to current directory instead of /tmp
             local_file_path = original_filename
             print(f"📥 Downloading file for task {task_id}...")
             print(f"   URL: {file_download_url}")
+            print(f"   Original filename: {original_filename}")
             print(f"   Saving to: {local_file_path}")
             try:
                 file_response = requests.get(file_download_url, timeout=15)
                 file_response.raise_for_status()
+                # Save the raw bytes content to the local file path
                 with open(local_file_path, 'wb') as f:
                     f.write(file_response.content)
                 file_size = os.path.getsize(local_file_path)
                 print(f"✅ Downloaded file: {original_filename} ({file_size} bytes)")
+                # Verify file exists and is readable
                 if not os.path.exists(local_file_path):
                     print(f"⚠️ Warning: File saved but cannot be found at {local_file_path}")
                     local_file_path = None
                 else:
+                    print(f"✓ File accessible at: {os.path.abspath(local_file_path)}")
             except requests.exceptions.RequestException as e:
                 error_message = f"[FILE DOWNLOAD ERROR: Could not fetch file: {e}]"
                 local_file_path = None
         if not task_id or question_text is None:
+            print(f"⚠️ Skipping item with missing task_id or question: {item}")
             continue
         try:
             # Pass file_path to agent
             submitted_answer = agent(question_text, local_file_path)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
+            # Check if answer is correct
+            is_correct = submitted_answer.strip().lower() == correct_answer.strip().lower()
+            correctness = "✅ CORRECT" if is_correct else "❌ WRONG"
+            # Log with correctness indicator
+            print(f"\n{correctness} - Task {task_id}")
+            print(f"   Submitted: '{submitted_answer}'")
+            print(f"   Expected:  '{correct_answer}'")
+            results_log.append({
+                "Task ID": task_id,
+                "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
+                "Submitted Answer": submitted_answer,
+                "Correct Answer": correct_answer,
+                "Status": "✅" if is_correct else "❌"
+            })
+            print(f"✅ Question {idx}/{len(questions_data)} completed")
         except Exception as e:
+            print(f"❌ Error running agent on task {task_id}: {e}")
+            print(traceback.format_exc())
+            results_log.append({
+                "Task ID": task_id,
+                "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
+                "Submitted Answer": f"AGENT ERROR: {e}",
+                "Correct Answer": correct_answer,
+                "Status": "❌"
+            })
+            # Continue with other questions even if one fails
+            answers_payload.append({"task_id": task_id, "submitted_answer": f"ERROR: {str(e)[:100]}"})
+    # Summary after all questions processed
+    print(f"\n{'='*70}")
+    print(f"✅ ALL QUESTIONS PROCESSED")
+    print(f"{'='*70}")
+    print(f"Total answers collected: {len(answers_payload)}")
+    # Calculate pre-submission accuracy
+    correct_count = sum(1 for log in results_log if log.get("Status") == "✅")
+    total_count = len(results_log)
+    accuracy = (correct_count / total_count * 100) if total_count > 0 else 0
+    print(f"\n{'='*70}")
+    print(f"📊 PRE-SUBMISSION SUMMARY")
+    print(f"{'='*70}")
+    print(f"Correct: {correct_count}/{total_count} ({accuracy:.1f}%)")
+    print(f"{'='*70}\n")
     if not answers_payload:
+        print("⚠️ Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
     # 4. Prepare Submission
     submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
     # 5. Submit
+    print(f"\n{'='*70}")
+    print(f"📤 SUBMITTING TO API")
+    print(f"{'='*70}")
+    print(f"URL: {submit_url}")
+    print(f"Username: {username}")
+    print(f"Answers to submit: {len(answers_payload)}")
+    print(f"{'='*70}\n")
     try:
+        print("⏳ Sending POST request...")
         response = requests.post(submit_url, json=submission_data, timeout=60)
+        print(f"✅ Got response: Status {response.status_code}")
         response.raise_for_status()
         result_data = response.json()
+        print(f"\n{'='*70}")
+        print(f"📊 SUBMISSION RESULTS")
+        print(f"{'='*70}")
+        print(f"Response data: {result_data}")
+        print(f"{'='*70}\n")
         final_status = (
             f"Submission Successful!\n"
             f"User: {result_data.get('username')}\n"
             f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
             f"Message: {result_data.get('message', 'No message received.')}"
         )
+        print(final_status)
+        print("="*70)
+        print("✅ Submission successful.")
         results_df = pd.DataFrame(results_log)
         return final_status, results_df
     except requests.exceptions.HTTPError as e:
         error_detail = f"Server responded with status {e.response.status_code}."
         try:
         except requests.exceptions.JSONDecodeError:
             error_detail += f" Response: {e.response.text[:500]}"
         status_message = f"Submission Failed: {error_detail}"
+        print(f"\n{'='*70}")
+        print(f"❌ SUBMISSION FAILED")
+        print(f"{'='*70}")
         print(status_message)
+        print(f"{'='*70}\n")
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
     except requests.exceptions.Timeout:
         status_message = "Submission Failed: The request timed out."
+        print(f"\n{'='*70}")
+        print(f"❌ SUBMISSION FAILED")
+        print(f"{'='*70}")
         print(status_message)
+        print(f"{'='*70}\n")
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
     except requests.exceptions.RequestException as e:
         status_message = f"Submission Failed: Network error - {e}"
+        print(f"\n{'='*70}")
+        print(f"❌ SUBMISSION FAILED")
+        print(f"{'='*70}")
         print(status_message)
+        print(f"{'='*70}\n")
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
     except Exception as e:
         status_message = f"An unexpected error occurred during submission: {e}"
+        print(f"\n{'='*70}")
+        print(f"❌ SUBMISSION FAILED")
+        print(f"{'='*70}")
         print(status_message)
+        print(traceback.format_exc())
+        print(f"{'='*70}\n")
         results_df = pd.DataFrame(results_log)
         return status_message, results_df