Update app.py
Browse files
app.py
CHANGED
|
@@ -435,14 +435,24 @@ def preprocess_question(question: str) -> str:
|
|
| 435 |
# Keywords that indicate proper English text
|
| 436 |
keywords = ["answer", "what", "who", "how", "find", "list", "which", "where",
|
| 437 |
"when", "the", "is", "are", "was", "were", "has", "have", "this",
|
| 438 |
-
"that", "from", "with", "about", "question", "video", "image"
|
|
|
|
| 439 |
|
| 440 |
orig_score = sum(1 for w in keywords if w in stripped.lower())
|
| 441 |
rev_score = sum(1 for w in keywords if w in reversed_text.lower())
|
| 442 |
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 446 |
return reversed_text
|
| 447 |
|
| 448 |
return stripped
|
|
@@ -525,14 +535,21 @@ def is_valid_answer(answer: str) -> bool:
|
|
| 525 |
if not answer or len(answer.strip()) < 1:
|
| 526 |
return False
|
| 527 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 528 |
invalid_phrases = [
|
| 529 |
"i don't know", "i dont know", "i do not know",
|
| 530 |
-
"unknown", "n/a", "
|
| 531 |
"i cannot", "i can't", "i cant",
|
| 532 |
"not available", "no answer", "unable to",
|
| 533 |
"i'm not sure", "im not sure", "i am not sure",
|
| 534 |
"no image", "cannot determine", "insufficient information",
|
| 535 |
-
"not provided", "cannot access", "i'm unable", "i am unable"
|
|
|
|
|
|
|
| 536 |
]
|
| 537 |
|
| 538 |
answer_lower = answer.lower()
|
|
@@ -543,23 +560,33 @@ def is_valid_answer(answer: str) -> bool:
|
|
| 543 |
# MAIN SOLVER
|
| 544 |
# ==========================================
|
| 545 |
|
| 546 |
-
SYSTEM_PROMPT = """You are
|
| 547 |
-
|
| 548 |
-
|
| 549 |
-
1. Output ONLY the
|
| 550 |
-
2. Numbers:
|
| 551 |
-
3. Names:
|
| 552 |
-
4. Lists:
|
| 553 |
-
5.
|
| 554 |
-
6.
|
| 555 |
-
7.
|
| 556 |
-
|
| 557 |
-
|
| 558 |
-
|
| 559 |
-
-
|
| 560 |
-
-
|
| 561 |
-
-
|
| 562 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 563 |
|
| 564 |
|
| 565 |
def solve_question(question: str, task_id: str, groq_key: str) -> str:
|
|
@@ -570,6 +597,18 @@ def solve_question(question: str, task_id: str, groq_key: str) -> str:
|
|
| 570 |
processed_q = preprocess_question(question)
|
| 571 |
context_parts = []
|
| 572 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 573 |
# 1. Check for attached files
|
| 574 |
file_content, file_type = fetch_task_file(task_id)
|
| 575 |
if file_content and file_type != "none":
|
|
|
|
| 435 |
# Keywords that indicate proper English text
|
| 436 |
keywords = ["answer", "what", "who", "how", "find", "list", "which", "where",
|
| 437 |
"when", "the", "is", "are", "was", "were", "has", "have", "this",
|
| 438 |
+
"that", "from", "with", "about", "question", "video", "image",
|
| 439 |
+
"write", "opposite", "sentence", "if", "you", "understand"]
|
| 440 |
|
| 441 |
orig_score = sum(1 for w in keywords if w in stripped.lower())
|
| 442 |
rev_score = sum(1 for w in keywords if w in reversed_text.lower())
|
| 443 |
|
| 444 |
+
print(f" π Text analysis: orig_keywords={orig_score}, rev_keywords={rev_score}")
|
| 445 |
+
|
| 446 |
+
# If reversed text has more keywords, use it
|
| 447 |
+
if rev_score > orig_score + 1 and len(stripped) > 20:
|
| 448 |
+
print(f" π Detected reversed text!")
|
| 449 |
+
print(f" π Reversed: {reversed_text[:100]}...")
|
| 450 |
+
return reversed_text
|
| 451 |
+
|
| 452 |
+
# Also check if text starts with punctuation (common in reversed text)
|
| 453 |
+
if stripped and stripped[0] in '.!?,;:' and rev_score >= orig_score:
|
| 454 |
+
print(f" π Text starts with punctuation, trying reversed")
|
| 455 |
+
print(f" π Reversed: {reversed_text[:100]}...")
|
| 456 |
return reversed_text
|
| 457 |
|
| 458 |
return stripped
|
|
|
|
| 535 |
if not answer or len(answer.strip()) < 1:
|
| 536 |
return False
|
| 537 |
|
| 538 |
+
# If answer is too long, it's probably not a direct answer
|
| 539 |
+
if len(answer) > 200:
|
| 540 |
+
print(f" β οΈ Answer too long ({len(answer)} chars), likely not a direct answer")
|
| 541 |
+
return False
|
| 542 |
+
|
| 543 |
invalid_phrases = [
|
| 544 |
"i don't know", "i dont know", "i do not know",
|
| 545 |
+
"unknown", "n/a", "error",
|
| 546 |
"i cannot", "i can't", "i cant",
|
| 547 |
"not available", "no answer", "unable to",
|
| 548 |
"i'm not sure", "im not sure", "i am not sure",
|
| 549 |
"no image", "cannot determine", "insufficient information",
|
| 550 |
+
"not provided", "cannot access", "i'm unable", "i am unable",
|
| 551 |
+
"not able to", "i am not able", "however", "based on typical",
|
| 552 |
+
"without access", "no transcript", "no information"
|
| 553 |
]
|
| 554 |
|
| 555 |
answer_lower = answer.lower()
|
|
|
|
| 560 |
# MAIN SOLVER
|
| 561 |
# ==========================================
|
| 562 |
|
| 563 |
+
SYSTEM_PROMPT = """You are solving GAIA benchmark questions. Give ONLY the exact answer.
|
| 564 |
+
|
| 565 |
+
RULES:
|
| 566 |
+
1. Output ONLY the answer - no explanations, no "The answer is", no reasoning
|
| 567 |
+
2. Numbers: just the number (42 or 3.14)
|
| 568 |
+
3. Names: just the name (Marie Curie)
|
| 569 |
+
4. Lists: comma-separated (apple, banana, cherry)
|
| 570 |
+
5. If asked for the opposite of a word, give just that opposite word
|
| 571 |
+
6. If asked to count something, give just the count number
|
| 572 |
+
7. Never say "I don't know" - always attempt an answer based on context or knowledge
|
| 573 |
+
|
| 574 |
+
Examples of correct output format:
|
| 575 |
+
- Q: "What is 2+2?" A: "4"
|
| 576 |
+
- Q: "Write the opposite of left" A: "right"
|
| 577 |
+
- Q: "Who painted the Mona Lisa?" A: "Leonardo da Vinci"
|
| 578 |
+
- Q: "How many legs does a spider have?" A: "8\""""
|
| 579 |
+
|
| 580 |
+
|
| 581 |
+
def is_simple_question(question: str) -> bool:
|
| 582 |
+
"""Check if question is simple enough to answer without web search."""
|
| 583 |
+
q_lower = question.lower()
|
| 584 |
+
# Simple questions about opposites, basic facts, math
|
| 585 |
+
simple_patterns = [
|
| 586 |
+
"opposite of", "antonym of", "what is the opposite",
|
| 587 |
+
"write the opposite", "2+2", "2 + 2",
|
| 588 |
+
]
|
| 589 |
+
return any(p in q_lower for p in simple_patterns) and len(question) < 200
|
| 590 |
|
| 591 |
|
| 592 |
def solve_question(question: str, task_id: str, groq_key: str) -> str:
|
|
|
|
| 597 |
processed_q = preprocess_question(question)
|
| 598 |
context_parts = []
|
| 599 |
|
| 600 |
+
# Check if it's a simple question that doesn't need web search
|
| 601 |
+
if is_simple_question(processed_q):
|
| 602 |
+
print(" β‘ Simple question detected, answering directly")
|
| 603 |
+
answer_raw = ask_groq([
|
| 604 |
+
{"role": "system", "content": SYSTEM_PROMPT},
|
| 605 |
+
{"role": "user", "content": f"Answer this directly: {processed_q}"}
|
| 606 |
+
], groq_key, max_tokens=50, temperature=0.0)
|
| 607 |
+
answer = clean_answer(answer_raw) if answer_raw else ""
|
| 608 |
+
if answer and is_valid_answer(answer):
|
| 609 |
+
print(f" β
Direct answer: {answer}")
|
| 610 |
+
return answer
|
| 611 |
+
|
| 612 |
# 1. Check for attached files
|
| 613 |
file_content, file_type = fetch_task_file(task_id)
|
| 614 |
if file_content and file_type != "none":
|