DarrenDsa's picture
Improved logic in app.py
cd5f764 verified
import os
import re
import json
import time
import requests
import gradio as gr
# ── Constants ──────────────────────────────────────────────────────────────────
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
# ── Helper Tools ───────────────────────────────────────────────────────────────
def download_file(task_id: str) -> str:
"""Download file from GAIA API."""
url = f"{DEFAULT_API_URL}/files/{task_id}"
try:
resp = requests.get(url, timeout=30)
resp.raise_for_status()
return resp.text[:10000]
except Exception as e:
return f"[File error: {e}]"
# ── Direct Perplexity Call ─────────────────────────────────────────────────────
def call_perplexity(system_prompt: str, user_message: str, api_key: str) -> str:
"""Call Perplexity API directly."""
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
}
data = {
"model": "sonar-pro", # Updated model name (sonar-large was deprecated Feb 2025)
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_message},
],
"temperature": 0.2,
"max_tokens": 500,
}
try:
resp = requests.post(PERPLEXITY_API_URL, headers=headers, json=data, timeout=60)
resp.raise_for_status()
result = resp.json()
content = result["choices"][0]["message"]["content"]
print(f"[DEBUG] Raw Perplexity response: {content[:200]}") # Debug log
return content
except requests.HTTPError as e:
error_body = e.response.text if hasattr(e.response, 'text') else str(e)
print(f"[DEBUG] HTTP Error: {e.response.status_code} - {error_body[:200]}")
return f"HTTP_ERROR: {e.response.status_code}"
except Exception as e:
print(f"[DEBUG] Exception: {str(e)[:200]}")
return f"ERROR: {e}"
# ── Answer Cleaner ─────────────────────────────────────────────────────────────
def clean_answer(raw: str) -> str:
"""Strip preamble and get bare answer."""
original = raw
raw = str(raw).strip()
# Don't clean error messages - return them as-is for debugging
if raw.startswith("ERROR:") or raw.startswith("HTTP_ERROR:"):
return raw
# Remove common prefixes
for prefix in ["FINAL ANSWER:", "Final Answer:", "final answer:", "Answer:", "answer:",
"The answer is:", "The answer is", "Result:", "**Answer:**", "Based on"]:
if raw.lower().startswith(prefix.lower()):
raw = raw[len(prefix):].strip()
break
# Take first line if multi-line and short enough
if '\n' in raw:
first_line = raw.split('\n')[0].strip()
if len(first_line) < 150:
raw = first_line
# Remove quotes
if len(raw) >= 2 and raw[0] in ('"', "'") and raw[0] == raw[-1]:
raw = raw[1:-1].strip()
# Remove markdown bold
raw = re.sub(r'\*\*(.*?)\*\*', r'\1', raw)
# DON'T filter out answers - just clean them
result = raw.strip()
# Debug log the cleaning
if result != original:
print(f"[DEBUG] Cleaned '{original[:100]}...' β†’ '{result[:100]}'")
return result
# ── System Prompt ──────────────────────────────────────────────────────────────
SYSTEM_PROMPT = """Answer the question with ONLY the final answer. No explanation.
RULES:
- NO periods at end ("right" NOT "right.")
- Numbers: digits only ("42")
- Country names: full name ("Malta" NOT "MLT")
- Lists: comma-separated
EXAMPLES:
Q: "What year was Mona Lisa painted?" β†’ 1503
Q: "Opposite of left" β†’ right
"""
# ── Main Runner ────────────────────────────────────────────────────────────────
def run_and_submit_all(profile: gr.OAuthProfile | None):
if not profile:
return "❌ Please log in first.", None
username = profile.username
api_key = os.environ.get("PERPLEXITY_API_KEY")
if not api_key:
return "❌ PERPLEXITY_API_KEY not found in Space secrets!", None
# Test the API key first
print(f"[DEBUG] API key exists, length: {len(api_key)}, starts with: {api_key[:10]}")
space_id = os.environ.get("SPACE_ID", "")
agent_code_url = (
f"https://huggingface.co/spaces/{space_id}/tree/main"
if space_id
else f"https://huggingface.co/spaces/{username}/my-gaia-agent/tree/main"
)
log = [f"πŸ‘€ User: {username}", "πŸ“₯ Fetching questions..."]
# Fetch questions
try:
resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
resp.raise_for_status()
questions = resp.json()
log.append(f"βœ… {len(questions)} questions loaded")
except Exception as e:
return f"❌ Failed to fetch questions: {e}", None
log.append(f"πŸ€– Testing Perplexity API (key: {api_key[:10]}...)")
# Test API with simple question first
test_answer = call_perplexity("You are helpful.", "What is 2+2?", api_key)
log.append(f"πŸ§ͺ Test call result: {test_answer[:100]}")
log.append("─" * 40)
answers = []
results_log = []
for i, q in enumerate(questions):
task_id = q.get("task_id", "")
question_text = q.get("question", "")
log.append(f"[{i+1}/20] {question_text[:65]}...")
# Rate limit
if i > 0:
time.sleep(4)
# Check if file mentioned
file_content = ""
if any(word in question_text.lower() for word in ["file", "image", "attached", "spreadsheet", "document", "excel"]):
file_content = download_file(task_id)
if not file_content.startswith("[File error"):
question_text = f"{question_text}\n\nFile content:\n{file_content[:2000]}"
# Call Perplexity
try:
user_prompt = f"Question: {question_text}\n\nAnswer with ONLY the answer, nothing else."
raw_answer = call_perplexity(SYSTEM_PROMPT, user_prompt, api_key)
final_answer = clean_answer(raw_answer)
log.append(f" πŸ“ Raw: {raw_answer[:80]}")
log.append(f" βœ… Final: {final_answer[:80] if final_answer else '(empty after cleaning)'}")
except Exception as e:
final_answer = f"EXCEPTION: {str(e)[:80]}"
log.append(f" ❌ Error: {final_answer}")
answers.append({"task_id": task_id, "submitted_answer": final_answer})
results_log.append({
"#": i + 1,
"Task ID": task_id[:8] + "...",
"Question": question_text[:65] + "..." if len(question_text) > 65 else question_text,
"Answer": final_answer or "(empty)",
})
answered = sum(1 for a in answers if a["submitted_answer"] and not a["submitted_answer"].startswith("ERROR") and not a["submitted_answer"].startswith("EXCEPTION"))
log.append("─" * 40)
log.append(f"πŸ“Š Answered: {answered}/20")
# Submit
payload = {
"username": username,
"agent_code": agent_code_url,
"answers": answers,
}
for attempt in range(3):
try:
log.append(f"πŸ“€ Submitting ({attempt+1}/3)...")
sub = requests.post(f"{DEFAULT_API_URL}/submit", json=payload, timeout=60)
sub.raise_for_status()
data = sub.json()
score = data.get("score", "N/A")
correct = data.get("correct_count", "?")
log += [
"─" * 40,
"βœ… SUBMITTED!",
f"πŸ“Š Score: {score}%",
f"βœ”οΈ Correct: {correct}/20",
f"πŸ”— {agent_code_url}",
"─" * 40,
"πŸ† https://huggingface.co/spaces/agents-course/Students_leaderboard",
]
break
except Exception as e:
log.append(f"⚠️ Failed: {str(e)[:60]}")
if attempt < 2:
time.sleep(5)
return "\n".join(log), results_log
# ── Gradio UI ──────────────────────────────────────────────────────────────────
with gr.Blocks(title="GAIA Agent", theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# πŸ€– HF Agents Course β€” Unit 4
**Perplexity Sonar Large (Direct API with Debug Logging)**
1. Log in with Hugging Face
2. Click Run & Submit
3. Check the logs to see what Perplexity is returning
""")
gr.LoginButton()
run_btn = gr.Button("πŸš€ Run Agent & Submit All Answers", variant="primary", size="lg")
status_box = gr.Textbox(label="Live Log (with debug info)", lines=25, interactive=False)
results_table = gr.DataFrame(label="Results", headers=["#", "Task ID", "Question", "Answer"])
run_btn.click(fn=run_and_submit_all, outputs=[status_box, results_table])
gr.Markdown("**Debug version** - Shows raw Perplexity responses")
demo.launch()