Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,160 +1,118 @@
|
|
| 1 |
import os
|
| 2 |
import gradio as gr
|
| 3 |
import requests
|
|
|
|
| 4 |
import pandas as pd
|
| 5 |
-
from
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
# --- Constants ---
|
| 8 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
|
|
|
|
|
|
| 9 |
|
| 10 |
-
# ---
|
| 11 |
class BasicAgent:
|
| 12 |
def __init__(self):
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
print("Model loaded.")
|
| 16 |
-
|
| 17 |
def __call__(self, question: str) -> str:
|
| 18 |
-
prompt = f"Answer the
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
try:
|
| 54 |
-
response = requests.post(submit_url, json=submission_data, timeout=60)
|
| 55 |
-
response.raise_for_status()
|
| 56 |
-
result_data = response.json()
|
| 57 |
-
return result_data
|
| 58 |
-
except requests.exceptions.RequestException as e:
|
| 59 |
-
print(f"Submission failed: {e}")
|
| 60 |
-
return None
|
| 61 |
-
|
| 62 |
-
# --- Main Execution Function ---
|
| 63 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
| 64 |
-
"""
|
| 65 |
-
Fetches all questions, runs the agent on them, and submits all answers.
|
| 66 |
-
"""
|
| 67 |
-
# --- Check Hugging Face Profile ---
|
| 68 |
if profile:
|
| 69 |
-
username = profile.username
|
| 70 |
-
print(f"User logged in: {username}")
|
| 71 |
else:
|
| 72 |
-
|
| 73 |
-
|
|
|
|
|
|
|
|
|
|
| 74 |
|
| 75 |
-
# --- Initialize Agent ---
|
| 76 |
try:
|
| 77 |
-
|
|
|
|
|
|
|
| 78 |
except Exception as e:
|
| 79 |
-
|
| 80 |
-
return f"Error initializing agent: {e}", None
|
| 81 |
-
|
| 82 |
-
# --- Fetch Questions ---
|
| 83 |
-
questions_data = fetch_questions(DEFAULT_API_URL)
|
| 84 |
-
if not questions_data:
|
| 85 |
-
return "Failed to fetch questions.", None
|
| 86 |
|
| 87 |
-
# --- Process Questions ---
|
| 88 |
answers_payload = []
|
| 89 |
results_log = []
|
| 90 |
for item in questions_data:
|
| 91 |
task_id = item.get("task_id")
|
| 92 |
question_text = item.get("question")
|
| 93 |
-
if not task_id or
|
| 94 |
-
print(f"Skipping item with missing task_id or question: {item}")
|
| 95 |
continue
|
| 96 |
try:
|
| 97 |
-
submitted_answer = agent
|
| 98 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
| 99 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
| 100 |
except Exception as e:
|
| 101 |
-
|
| 102 |
-
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
|
| 103 |
-
|
| 104 |
-
# --- Prepare Submission Data ---
|
| 105 |
-
if not answers_payload:
|
| 106 |
-
return "No answers to submit.", pd.DataFrame(results_log)
|
| 107 |
|
| 108 |
submission_data = {
|
| 109 |
"username": username.strip(),
|
| 110 |
-
"agent_code":
|
| 111 |
"answers": answers_payload
|
| 112 |
}
|
| 113 |
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
f"
|
| 122 |
-
f"Overall Score: {result_data.get('score', 'N/A')}% "
|
| 123 |
-
f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
|
| 124 |
-
f"Message: {result_data.get('message', 'No message received.')}"
|
| 125 |
-
)
|
| 126 |
-
|
| 127 |
-
results_df = pd.DataFrame(results_log)
|
| 128 |
-
return final_status, results_df
|
| 129 |
|
| 130 |
# --- Gradio Interface ---
|
| 131 |
with gr.Blocks() as demo:
|
| 132 |
-
gr.Markdown("#
|
| 133 |
-
gr.Markdown(
|
| 134 |
-
"""
|
| 135 |
**Instructions:**
|
| 136 |
-
1.
|
| 137 |
-
2.
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
---
|
| 141 |
-
**Disclaimers:**
|
| 142 |
-
Clicking "submit" might take some time to process all questions and submit answers.
|
| 143 |
-
"""
|
| 144 |
-
)
|
| 145 |
-
|
| 146 |
gr.LoginButton()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
|
| 148 |
-
run_button = gr.Button("Run Evaluation & Submit All Answers")
|
| 149 |
-
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
|
| 150 |
-
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
|
| 151 |
-
|
| 152 |
-
run_button.click(
|
| 153 |
-
fn=run_and_submit_all,
|
| 154 |
-
outputs=[status_output, results_table]
|
| 155 |
-
)
|
| 156 |
-
|
| 157 |
-
# --- Run the Interface ---
|
| 158 |
if __name__ == "__main__":
|
| 159 |
-
|
| 160 |
-
demo.launch(debug=True, share=False)
|
|
|
|
| 1 |
import os
|
| 2 |
import gradio as gr
|
| 3 |
import requests
|
| 4 |
+
import time
|
| 5 |
import pandas as pd
|
| 6 |
+
from dotenv import load_dotenv
|
| 7 |
+
|
| 8 |
+
# Load environment variables
|
| 9 |
+
load_dotenv()
|
| 10 |
|
| 11 |
# --- Constants ---
|
| 12 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 13 |
+
HF_API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
|
| 14 |
+
HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
|
| 15 |
|
| 16 |
+
# --- Agent Definition ---
|
| 17 |
class BasicAgent:
|
| 18 |
def __init__(self):
|
| 19 |
+
self.headers = {"Authorization": f"Bearer {HUGGINGFACE_TOKEN}"}
|
| 20 |
+
|
|
|
|
|
|
|
| 21 |
def __call__(self, question: str) -> str:
|
| 22 |
+
prompt = f"""Answer the question concisely and factually. Provide only the answer in the exact format required by the question, with no additional text, explanations, or formatting. Do not use quotation marks, bullet points, or any other formatting. Ensure correct casing and pluralization as specified.
|
| 23 |
+
|
| 24 |
+
Question: {question}
|
| 25 |
+
Answer: """
|
| 26 |
+
payload = {
|
| 27 |
+
"inputs": prompt,
|
| 28 |
+
"parameters": {
|
| 29 |
+
"max_new_tokens": 100,
|
| 30 |
+
"return_full_text": False
|
| 31 |
+
}
|
| 32 |
+
}
|
| 33 |
+
max_retries = 3
|
| 34 |
+
for _ in range(max_retries):
|
| 35 |
+
try:
|
| 36 |
+
response = requests.post(HF_API_URL, headers=self.headers, json=payload, timeout=30)
|
| 37 |
+
response.raise_for_status()
|
| 38 |
+
answer = response.json()[0]['generated_text'].strip()
|
| 39 |
+
# Clean up answer
|
| 40 |
+
answer = answer.strip().rstrip('.').strip()
|
| 41 |
+
return answer
|
| 42 |
+
except requests.exceptions.HTTPError as e:
|
| 43 |
+
if e.response.status_code == 503:
|
| 44 |
+
time.sleep(10)
|
| 45 |
+
else:
|
| 46 |
+
print(f"HTTP Error: {e.response.text}")
|
| 47 |
+
break
|
| 48 |
+
except requests.exceptions.RequestException as e:
|
| 49 |
+
print(f"Request failed: {e}, retrying...")
|
| 50 |
+
time.sleep(5)
|
| 51 |
+
except Exception as e:
|
| 52 |
+
print(f"Error: {e}")
|
| 53 |
+
break
|
| 54 |
+
return "Error: Unable to generate answer."
|
| 55 |
+
|
| 56 |
+
# --- Submission Logic ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
if profile:
|
| 59 |
+
username = f"{profile.username}"
|
|
|
|
| 60 |
else:
|
| 61 |
+
return "Please Login to Hugging Face with the button.", None
|
| 62 |
+
|
| 63 |
+
agent = BasicAgent()
|
| 64 |
+
space_id = os.getenv("SPACE_ID")
|
| 65 |
+
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""
|
| 66 |
|
|
|
|
| 67 |
try:
|
| 68 |
+
response = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
|
| 69 |
+
response.raise_for_status()
|
| 70 |
+
questions_data = response.json()
|
| 71 |
except Exception as e:
|
| 72 |
+
return f"Error fetching questions: {e}", None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
|
|
|
|
| 74 |
answers_payload = []
|
| 75 |
results_log = []
|
| 76 |
for item in questions_data:
|
| 77 |
task_id = item.get("task_id")
|
| 78 |
question_text = item.get("question")
|
| 79 |
+
if not task_id or not question_text:
|
|
|
|
| 80 |
continue
|
| 81 |
try:
|
| 82 |
+
submitted_answer = agent(question_text)
|
| 83 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
| 84 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
| 85 |
except Exception as e:
|
| 86 |
+
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"Error: {e}"})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
|
| 88 |
submission_data = {
|
| 89 |
"username": username.strip(),
|
| 90 |
+
"agent_code": agent_code,
|
| 91 |
"answers": answers_payload
|
| 92 |
}
|
| 93 |
|
| 94 |
+
try:
|
| 95 |
+
response = requests.post(f"{DEFAULT_API_URL}/submit", json=submission_data, timeout=60)
|
| 96 |
+
response.raise_for_status()
|
| 97 |
+
result_data = response.json()
|
| 98 |
+
final_status = f"Success! Score: {result_data.get('score', 'N/A')}%"
|
| 99 |
+
return final_status, pd.DataFrame(results_log)
|
| 100 |
+
except Exception as e:
|
| 101 |
+
return f"Submission failed: {e}", pd.DataFrame(results_log)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
|
| 103 |
# --- Gradio Interface ---
|
| 104 |
with gr.Blocks() as demo:
|
| 105 |
+
gr.Markdown("# GAIA Evaluation Agent")
|
| 106 |
+
gr.Markdown("""
|
|
|
|
| 107 |
**Instructions:**
|
| 108 |
+
1. Log in to your Hugging Face account using the button below.
|
| 109 |
+
2. Click 'Run Evaluation & Submit Answers' to fetch questions, run your agent, and submit answers.
|
| 110 |
+
""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
gr.LoginButton()
|
| 112 |
+
run_button = gr.Button("Run Evaluation & Submit Answers")
|
| 113 |
+
status_output = gr.Textbox(label="Status")
|
| 114 |
+
results_table = gr.DataFrame(label="Results")
|
| 115 |
+
run_button.click(run_and_submit_all, outputs=[status_output, results_table])
|
| 116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
if __name__ == "__main__":
|
| 118 |
+
demo.launch()
|
|
|