Update app.py
Browse files
app.py
CHANGED
|
@@ -16,6 +16,13 @@ from typing import Optional, Tuple, List, Dict, Any
|
|
| 16 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 17 |
GROQ_API = "https://api.groq.com/openai/v1/chat/completions"
|
| 18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
# ==========================================
|
| 20 |
# TOOLS
|
| 21 |
# ==========================================
|
|
@@ -355,41 +362,62 @@ def search_wikipedia(query: str) -> str:
|
|
| 355 |
# GROQ LLM
|
| 356 |
# ==========================================
|
| 357 |
|
| 358 |
-
def ask_groq(messages: List[Dict], groq_key: str, max_tokens: int = 400, temperature: float = 0.1) -> str:
|
| 359 |
-
"""Send request to Groq API with retries."""
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
GROQ_API,
|
| 364 |
-
headers={
|
| 365 |
-
"Authorization": f"Bearer {groq_key}",
|
| 366 |
-
"Content-Type": "application/json"
|
| 367 |
-
},
|
| 368 |
-
json={
|
| 369 |
-
"model": "llama-3.3-70b-versatile",
|
| 370 |
-
"messages": messages,
|
| 371 |
-
"temperature": temperature,
|
| 372 |
-
"max_tokens": max_tokens,
|
| 373 |
-
},
|
| 374 |
-
timeout=30,
|
| 375 |
-
)
|
| 376 |
-
|
| 377 |
-
if resp.status_code == 200:
|
| 378 |
-
return resp.json()["choices"][0]["message"]["content"].strip()
|
| 379 |
-
elif resp.status_code == 429:
|
| 380 |
-
wait_time = 5 * (attempt + 1)
|
| 381 |
-
print(f" β³ Rate limited, waiting {wait_time}s...")
|
| 382 |
-
time.sleep(wait_time)
|
| 383 |
-
else:
|
| 384 |
-
print(f" β οΈ Groq API error: {resp.status_code} - {resp.text[:200]}")
|
| 385 |
-
time.sleep(2)
|
| 386 |
-
except requests.exceptions.Timeout:
|
| 387 |
-
print(f" β οΈ Groq timeout (attempt {attempt + 1})")
|
| 388 |
-
time.sleep(3)
|
| 389 |
-
except Exception as e:
|
| 390 |
-
print(f" β οΈ Groq error: {e}")
|
| 391 |
-
time.sleep(2)
|
| 392 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 393 |
return ""
|
| 394 |
|
| 395 |
|
|
@@ -637,7 +665,9 @@ def solve_question(question: str, task_id: str, groq_key: str) -> str:
|
|
| 637 |
]
|
| 638 |
|
| 639 |
answer_raw = ask_groq(messages, groq_key, max_tokens=400, temperature=0.1)
|
| 640 |
-
answer = clean_answer(answer_raw)
|
|
|
|
|
|
|
| 641 |
|
| 642 |
# If answer isn't valid, try again with different approach
|
| 643 |
if not is_valid_answer(answer):
|
|
@@ -649,13 +679,22 @@ def solve_question(question: str, task_id: str, groq_key: str) -> str:
|
|
| 649 |
{"role": "user", "content": f"Using your knowledge, answer this question with ONLY the final answer:\n\n{processed_q}"}
|
| 650 |
]
|
| 651 |
answer_raw = ask_groq(retry_messages, groq_key, max_tokens=400, temperature=0.2)
|
| 652 |
-
answer = clean_answer(answer_raw)
|
| 653 |
-
|
| 654 |
-
|
| 655 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 656 |
answer = "I don't know"
|
|
|
|
| 657 |
|
| 658 |
-
print(f" β
Answer: {answer}")
|
| 659 |
return answer
|
| 660 |
|
| 661 |
|
|
@@ -679,8 +718,19 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
| 679 |
print(f"\n{'='*60}")
|
| 680 |
print(f"π€ User: {username}")
|
| 681 |
print(f"π€ Agent: GAIA Agent v4")
|
|
|
|
| 682 |
print(f"{'='*60}")
|
| 683 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 684 |
# Fetch questions
|
| 685 |
try:
|
| 686 |
resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=20)
|
|
@@ -723,8 +773,8 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
| 723 |
"Answer": answer
|
| 724 |
})
|
| 725 |
|
| 726 |
-
# Rate limit protection
|
| 727 |
-
time.sleep(
|
| 728 |
|
| 729 |
if not answers:
|
| 730 |
return "β Nessuna risposta generata.", pd.DataFrame(results)
|
|
|
|
| 16 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 17 |
GROQ_API = "https://api.groq.com/openai/v1/chat/completions"
|
| 18 |
|
| 19 |
+
# Models to try in order of preference
|
| 20 |
+
GROQ_MODELS = [
|
| 21 |
+
"llama-3.3-70b-versatile",
|
| 22 |
+
"llama-3.1-70b-versatile",
|
| 23 |
+
"mixtral-8x7b-32768",
|
| 24 |
+
]
|
| 25 |
+
|
| 26 |
# ==========================================
|
| 27 |
# TOOLS
|
| 28 |
# ==========================================
|
|
|
|
| 362 |
# GROQ LLM
|
| 363 |
# ==========================================
|
| 364 |
|
| 365 |
+
def ask_groq(messages: List[Dict], groq_key: str, max_tokens: int = 400, temperature: float = 0.1, model: str = None) -> str:
|
| 366 |
+
"""Send request to Groq API with retries and model fallback."""
|
| 367 |
+
if not groq_key:
|
| 368 |
+
print(" β GROQ_API_KEY is empty!")
|
| 369 |
+
return ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 370 |
|
| 371 |
+
# Use specified model or try all models in order
|
| 372 |
+
models_to_try = [model] if model else GROQ_MODELS
|
| 373 |
+
|
| 374 |
+
for model_name in models_to_try:
|
| 375 |
+
for attempt in range(2): # 2 attempts per model
|
| 376 |
+
try:
|
| 377 |
+
resp = requests.post(
|
| 378 |
+
GROQ_API,
|
| 379 |
+
headers={
|
| 380 |
+
"Authorization": f"Bearer {groq_key}",
|
| 381 |
+
"Content-Type": "application/json"
|
| 382 |
+
},
|
| 383 |
+
json={
|
| 384 |
+
"model": model_name,
|
| 385 |
+
"messages": messages,
|
| 386 |
+
"temperature": temperature,
|
| 387 |
+
"max_tokens": max_tokens,
|
| 388 |
+
},
|
| 389 |
+
timeout=60,
|
| 390 |
+
)
|
| 391 |
+
|
| 392 |
+
if resp.status_code == 200:
|
| 393 |
+
result = resp.json()
|
| 394 |
+
content = result.get("choices", [{}])[0].get("message", {}).get("content", "")
|
| 395 |
+
if content:
|
| 396 |
+
print(f" π [{model_name}] Response: {content[:80]}...")
|
| 397 |
+
return content.strip()
|
| 398 |
+
else:
|
| 399 |
+
print(f" β οΈ [{model_name}] Empty content")
|
| 400 |
+
elif resp.status_code == 429:
|
| 401 |
+
wait_time = 10 * (attempt + 1)
|
| 402 |
+
print(f" β³ [{model_name}] Rate limited, waiting {wait_time}s...")
|
| 403 |
+
time.sleep(wait_time)
|
| 404 |
+
elif resp.status_code == 401:
|
| 405 |
+
print(f" β Groq API key invalid!")
|
| 406 |
+
return ""
|
| 407 |
+
elif resp.status_code == 404:
|
| 408 |
+
print(f" β οΈ Model {model_name} not found, trying next...")
|
| 409 |
+
break # Try next model
|
| 410 |
+
else:
|
| 411 |
+
print(f" β οΈ [{model_name}] HTTP {resp.status_code}: {resp.text[:200]}")
|
| 412 |
+
time.sleep(3)
|
| 413 |
+
except requests.exceptions.Timeout:
|
| 414 |
+
print(f" β οΈ [{model_name}] Timeout (attempt {attempt + 1}/2)")
|
| 415 |
+
time.sleep(5)
|
| 416 |
+
except Exception as e:
|
| 417 |
+
print(f" β οΈ [{model_name}] Error: {type(e).__name__}: {e}")
|
| 418 |
+
time.sleep(3)
|
| 419 |
+
|
| 420 |
+
print(" β All Groq attempts failed")
|
| 421 |
return ""
|
| 422 |
|
| 423 |
|
|
|
|
| 665 |
]
|
| 666 |
|
| 667 |
answer_raw = ask_groq(messages, groq_key, max_tokens=400, temperature=0.1)
|
| 668 |
+
answer = clean_answer(answer_raw) if answer_raw else ""
|
| 669 |
+
|
| 670 |
+
print(f" π€ Raw: '{answer_raw[:100] if answer_raw else '[empty]'}' -> Clean: '{answer}'")
|
| 671 |
|
| 672 |
# If answer isn't valid, try again with different approach
|
| 673 |
if not is_valid_answer(answer):
|
|
|
|
| 679 |
{"role": "user", "content": f"Using your knowledge, answer this question with ONLY the final answer:\n\n{processed_q}"}
|
| 680 |
]
|
| 681 |
answer_raw = ask_groq(retry_messages, groq_key, max_tokens=400, temperature=0.2)
|
| 682 |
+
answer = clean_answer(answer_raw) if answer_raw else ""
|
| 683 |
+
print(f" π€ Retry raw: '{answer_raw[:100] if answer_raw else '[empty]'}' -> Clean: '{answer}'")
|
| 684 |
+
|
| 685 |
+
# If still no valid answer but we have some text, use it anyway
|
| 686 |
+
if not answer or len(answer.strip()) == 0:
|
| 687 |
+
# If we got something from Groq, try to extract any content
|
| 688 |
+
if answer_raw and len(answer_raw.strip()) > 0:
|
| 689 |
+
answer = answer_raw.strip().split('\n')[0].strip()
|
| 690 |
+
print(f" π Using raw first line: '{answer}'")
|
| 691 |
+
|
| 692 |
+
# Final validation - only return "I don't know" as absolute last resort
|
| 693 |
+
if not answer or len(answer.strip()) == 0:
|
| 694 |
answer = "I don't know"
|
| 695 |
+
print(f" β No answer found, defaulting to 'I don't know'")
|
| 696 |
|
| 697 |
+
print(f" β
Final Answer: {answer}")
|
| 698 |
return answer
|
| 699 |
|
| 700 |
|
|
|
|
| 718 |
print(f"\n{'='*60}")
|
| 719 |
print(f"π€ User: {username}")
|
| 720 |
print(f"π€ Agent: GAIA Agent v4")
|
| 721 |
+
print(f"π API Key: {groq_key[:8]}...{groq_key[-4:]}")
|
| 722 |
print(f"{'='*60}")
|
| 723 |
|
| 724 |
+
# Test Groq API connectivity first
|
| 725 |
+
print("\nπ Testing Groq API connectivity...")
|
| 726 |
+
test_response = ask_groq(
|
| 727 |
+
[{"role": "user", "content": "Say 'OK' and nothing else."}],
|
| 728 |
+
groq_key, max_tokens=10, temperature=0.0
|
| 729 |
+
)
|
| 730 |
+
if not test_response:
|
| 731 |
+
return "β Groq API test failed! Check your API key and try again.", None
|
| 732 |
+
print(f"β
Groq API test passed: '{test_response}'")
|
| 733 |
+
|
| 734 |
# Fetch questions
|
| 735 |
try:
|
| 736 |
resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=20)
|
|
|
|
| 773 |
"Answer": answer
|
| 774 |
})
|
| 775 |
|
| 776 |
+
# Rate limit protection - increase delay between questions
|
| 777 |
+
time.sleep(2.5)
|
| 778 |
|
| 779 |
if not answers:
|
| 780 |
return "β Nessuna risposta generata.", pd.DataFrame(results)
|