Added simulation of question list.
Browse files
app.py
CHANGED
|
@@ -101,35 +101,39 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
| 101 |
|
| 102 |
# 2. Fetch Questions
|
| 103 |
print(f"Fetching questions from: {questions_url}")
|
| 104 |
-
try:
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
except requests.exceptions.RequestException as e:
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
except requests.exceptions.JSONDecodeError as e:
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
except Exception as e:
|
| 120 |
-
|
| 121 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
|
| 123 |
# 3. Run your Agent
|
| 124 |
results_log = []
|
| 125 |
answers_payload = []
|
| 126 |
print(f"Running agent on {len(questions_data)} questions...")
|
| 127 |
-
count = 0
|
| 128 |
-
question_array = [1, 3, 5, 6, 7] # Example question array for testing
|
| 129 |
for item in questions_data:
|
| 130 |
-
count += 1
|
| 131 |
-
if count not in question_array:
|
| 132 |
-
|
| 133 |
task_id = item.get("task_id")
|
| 134 |
question_text = item.get("question")
|
| 135 |
if not task_id or question_text is None:
|
|
@@ -198,7 +202,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
| 198 |
|
| 199 |
# --- Build Gradio Interface using Blocks ---
|
| 200 |
with gr.Blocks() as demo:
|
| 201 |
-
gr.Markdown("# Basic Agent Evaluation Runner #
|
| 202 |
gr.Markdown(
|
| 203 |
"""
|
| 204 |
**Instructions:**
|
|
|
|
| 101 |
|
| 102 |
# 2. Fetch Questions
|
| 103 |
print(f"Fetching questions from: {questions_url}")
|
| 104 |
+
# try:
|
| 105 |
+
# response = requests.get(questions_url, timeout=15)
|
| 106 |
+
# response.raise_for_status()
|
| 107 |
+
# questions_data = response.json()
|
| 108 |
+
# if not questions_data:
|
| 109 |
+
# print("Fetched questions list is empty.")
|
| 110 |
+
# return "Fetched questions list is empty or invalid format.", None
|
| 111 |
+
# print(f"Fetched {len(questions_data)} questions.")
|
| 112 |
+
# except requests.exceptions.RequestException as e:
|
| 113 |
+
# print(f"Error fetching questions: {e}")
|
| 114 |
+
# return f"Error fetching questions: {e}", None
|
| 115 |
+
# except requests.exceptions.JSONDecodeError as e:
|
| 116 |
+
# print(f"Error decoding JSON response from questions endpoint: {e}")
|
| 117 |
+
# print(f"Response text: {response.text[:500]}")
|
| 118 |
+
# return f"Error decoding server response for questions: {e}", None
|
| 119 |
+
# except Exception as e:
|
| 120 |
+
# print(f"An unexpected error occurred fetching questions: {e}")
|
| 121 |
+
# return f"An unexpected error occurred fetching questions: {e}", None
|
| 122 |
+
questions_data = [] # Simulated questions data for testing
|
| 123 |
+
questions_data.append({"task_id": "8e867cd7-cff9-4e6c-867a-ff5ddc2550be", "question": "How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia."})
|
| 124 |
+
questions_data.append({"task_id": "2d83110e-a098-4ebb-9987-066c06fa42d0", "question": ".rewsna eht sa "tfel" drow eht fo etisoppo eht etirw ,ecnetnes siht dnatsrednu uoy fI"})
|
| 125 |
+
questions_data.append({"task_id": "4fc2f1ae-8625-45b5-ab34-ad4433bc21f8", "question": "Who nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in November 2016?"})
|
| 126 |
|
| 127 |
# 3. Run your Agent
|
| 128 |
results_log = []
|
| 129 |
answers_payload = []
|
| 130 |
print(f"Running agent on {len(questions_data)} questions...")
|
| 131 |
+
#count = 0
|
| 132 |
+
#question_array = [1, 3, 5, 6, 7] # Example question array for testing
|
| 133 |
for item in questions_data:
|
| 134 |
+
#count += 1
|
| 135 |
+
#if count not in question_array:
|
| 136 |
+
# continue
|
| 137 |
task_id = item.get("task_id")
|
| 138 |
question_text = item.get("question")
|
| 139 |
if not task_id or question_text is None:
|
|
|
|
| 202 |
|
| 203 |
# --- Build Gradio Interface using Blocks ---
|
| 204 |
with gr.Blocks() as demo:
|
| 205 |
+
gr.Markdown("# Basic Agent Evaluation Runner #22")
|
| 206 |
gr.Markdown(
|
| 207 |
"""
|
| 208 |
**Instructions:**
|