Spaces:

gabejavitt
/

agentCourse

Sleeping

App Files Files Community

gabejavitt commited on Nov 10, 2025

Commit

ebe46d7

verified ·

1 Parent(s): 26b8984

Update app.py

Browse files

Files changed (1) hide show

app.py +101 -1

app.py CHANGED Viewed

@@ -1845,6 +1845,92 @@ except Exception as e:
 # =============================================================================
 # RUN AND SUBMIT FUNCTION
 # =============================================================================
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
@@ -1876,6 +1962,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     print(agent_code)
     # 2. Fetch Questions
     print(f"\n{'='*70}")
     print(f"📥 FETCHING QUESTIONS")
@@ -1902,6 +1989,19 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         print(f"❌ An unexpected error occurred fetching questions: {e}")
         return f"An unexpected error occurred fetching questions: {e}", None
     # 3. Run your Agent
     print(f"\n{'='*70}")
     print(f"🚀 STARTING EVALUATION")
@@ -1919,7 +2019,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         task_id = item.get("task_id")
         question_text = item.get("question")
-        correct_answer = item.get("answer", "N/A")  # Get correct answer from API
         # Look for file locally in files/ directory
         local_file_path = None

 # =============================================================================
 # RUN AND SUBMIT FUNCTION
 # =============================================================================
+def load_answer_sheet(filepath: str = "answer_sheet.json") -> Dict[str, str]:
+    """Load the answer sheet from a JSON file"""
+    try:
+        if os.path.exists(filepath):
+            with open(filepath, 'r', encoding='utf-8') as f:
+                answers = json.load(f)
+            print(f"✅ Loaded answer sheet with {len(answers)} answers from {filepath}")
+            return answers
+        else:
+            print(f"⚠️ Answer sheet not found at {filepath}")
+            return {}
+    except Exception as e:
+        print(f"❌ Error loading answer sheet: {e}")
+        return {}
+def check_answer_correctness(submitted: str, correct: str) -> Tuple[bool, str]:
+    """
+    Check if submitted answer matches correct answer with fuzzy matching
+    Returns: (is_correct, feedback_message)
+    """
+    # Normalize both answers
+    submitted_norm = submitted.strip().lower()
+    correct_norm = correct.strip().lower()
+    # Exact match
+    if submitted_norm == correct_norm:
+        return True, "✅ EXACT MATCH"
+    # Remove common punctuation and check again
+    import string
+    submitted_clean = submitted_norm.translate(str.maketrans('', '', string.punctuation))
+    correct_clean = correct_norm.translate(str.maketrans('', '', string.punctuation))
+    if submitted_clean == correct_clean:
+        return True, "✅ MATCH (punctuation difference)"
+    # Check if it's a number formatting issue
+    try:
+        # Try to parse as numbers
+        submitted_num = float(submitted_clean.replace(',', '').replace('
+, ''))
+        correct_num = float(correct_clean.replace(',', '').replace('
+, ''))
+        if abs(submitted_num - correct_num) < 0.01:  # Allow small floating point differences
+            return True, "✅ MATCH (numeric equivalence)"
+    except (ValueError, AttributeError):
+        pass
+    # Check if submitted answer contains correct answer (for list-type answers)
+    if ',' in correct_norm:
+        correct_items = set([item.strip() for item in correct_norm.split(',')])
+        submitted_items = set([item.strip() for item in submitted_norm.split(',')])
+        if correct_items == submitted_items:
+            return True, "✅ MATCH (item order difference)"
+        missing_items = correct_items - submitted_items
+        extra_items = submitted_items - correct_items
+        if missing_items and not extra_items:
+            return False, f"❌ MISSING: {', '.join(missing_items)}"
+        elif extra_items and not missing_items:
+            return False, f"❌ EXTRA: {', '.join(extra_items)}"
+        elif missing_items and extra_items:
+            return False, f"❌ MISSING: {', '.join(missing_items)} | EXTRA: {', '.join(extra_items)}"
+    # Check case-insensitive substring match
+    if submitted_norm in correct_norm or correct_norm in submitted_norm:
+        return False, f"❌ PARTIAL MATCH (submitted: '{submitted}' | correct: '{correct}')"
+    return False, f"❌ WRONG (submitted: '{submitted}' | correct: '{correct}')"
+def create_answer_sheet_template(questions: List[Dict], filepath: str = "answer_sheet.json"):
+    """Create an answer sheet template from questions"""
+    answer_template = {}
+    for q in questions:
+        answer_template[q['task_id']] = ""
+    with open(filepath, 'w', encoding='utf-8') as f:
+        json.dump(answer_template, f, indent=2)
+    print(f"✅ Created answer sheet template at {filepath}")
+    print(f"   Please fill in the correct answers for {len(answer_template)} questions")
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     print(agent_code)
     # 2. Fetch Questions
     print(f"\n{'='*70}")
     print(f"📥 FETCHING QUESTIONS")
         print(f"❌ An unexpected error occurred fetching questions: {e}")
         return f"An unexpected error occurred fetching questions: {e}", None
+    # Load answer sheet
+    answer_sheet = load_answer_sheet("answer_sheet.json")
+    # If answer sheet doesn't exist, create template
+    if not answer_sheet:
+        create_answer_sheet_template(questions, "answer_sheet.json")
+        print("\n⚠️ Please fill in the answer_sheet.json file with correct answers")
+        print("   Then run the script again to check agent performance\n")
+    results = []
+    local_correct = 0
+    local_total = 0
     # 3. Run your Agent
     print(f"\n{'='*70}")
     print(f"🚀 STARTING EVALUATION")
         task_id = item.get("task_id")
         question_text = item.get("question")
+        correct_answer = answer_sheet[task_id]
         # Look for file locally in files/ directory
         local_file_path = None