emanuelediluzio commited on
Commit
7179605
Β·
verified Β·
1 Parent(s): 4319e54

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -23
app.py CHANGED
@@ -435,14 +435,24 @@ def preprocess_question(question: str) -> str:
435
  # Keywords that indicate proper English text
436
  keywords = ["answer", "what", "who", "how", "find", "list", "which", "where",
437
  "when", "the", "is", "are", "was", "were", "has", "have", "this",
438
- "that", "from", "with", "about", "question", "video", "image"]
 
439
 
440
  orig_score = sum(1 for w in keywords if w in stripped.lower())
441
  rev_score = sum(1 for w in keywords if w in reversed_text.lower())
442
 
443
- # If reversed text has significantly more keywords, use it
444
- if rev_score > orig_score + 2 and len(stripped) > 30:
445
- print(f" πŸ”„ Detected reversed text, using reversed version")
 
 
 
 
 
 
 
 
 
446
  return reversed_text
447
 
448
  return stripped
@@ -525,14 +535,21 @@ def is_valid_answer(answer: str) -> bool:
525
  if not answer or len(answer.strip()) < 1:
526
  return False
527
 
 
 
 
 
 
528
  invalid_phrases = [
529
  "i don't know", "i dont know", "i do not know",
530
- "unknown", "n/a", "none", "error",
531
  "i cannot", "i can't", "i cant",
532
  "not available", "no answer", "unable to",
533
  "i'm not sure", "im not sure", "i am not sure",
534
  "no image", "cannot determine", "insufficient information",
535
- "not provided", "cannot access", "i'm unable", "i am unable"
 
 
536
  ]
537
 
538
  answer_lower = answer.lower()
@@ -543,23 +560,33 @@ def is_valid_answer(answer: str) -> bool:
543
  # MAIN SOLVER
544
  # ==========================================
545
 
546
- SYSTEM_PROMPT = """You are an expert AI assistant solving GAIA benchmark questions.
547
-
548
- CRITICAL RULES - Follow these EXACTLY:
549
- 1. Output ONLY the final answer - no explanations, no reasoning, no "The answer is"
550
- 2. Numbers: output just the number (e.g., "42" or "3.14")
551
- 3. Names: output just the name (e.g., "Marie Curie" or "Paris")
552
- 4. Lists: use comma-separated format (e.g., "apple, banana, cherry")
553
- 5. Dates: use the format requested or standard format
554
- 6. Do NOT add a period at the end
555
- 7. If data is provided (CSV, Excel, etc.), analyze it carefully and compute any needed calculations
556
- 8. For math/counting questions, show your work internally but output only the final number
557
-
558
- SPECIAL CASES:
559
- - For reversed/scrambled questions: the question has been corrected for you
560
- - For video questions without transcript: answer based on any description provided
561
- - For image questions: answer based on any text description of the image
562
- - When asked about specific facts, be precise and concise"""
 
 
 
 
 
 
 
 
 
 
563
 
564
 
565
  def solve_question(question: str, task_id: str, groq_key: str) -> str:
@@ -570,6 +597,18 @@ def solve_question(question: str, task_id: str, groq_key: str) -> str:
570
  processed_q = preprocess_question(question)
571
  context_parts = []
572
 
 
 
 
 
 
 
 
 
 
 
 
 
573
  # 1. Check for attached files
574
  file_content, file_type = fetch_task_file(task_id)
575
  if file_content and file_type != "none":
 
435
  # Keywords that indicate proper English text
436
  keywords = ["answer", "what", "who", "how", "find", "list", "which", "where",
437
  "when", "the", "is", "are", "was", "were", "has", "have", "this",
438
+ "that", "from", "with", "about", "question", "video", "image",
439
+ "write", "opposite", "sentence", "if", "you", "understand"]
440
 
441
  orig_score = sum(1 for w in keywords if w in stripped.lower())
442
  rev_score = sum(1 for w in keywords if w in reversed_text.lower())
443
 
444
+ print(f" πŸ“Š Text analysis: orig_keywords={orig_score}, rev_keywords={rev_score}")
445
+
446
+ # If reversed text has more keywords, use it
447
+ if rev_score > orig_score + 1 and len(stripped) > 20:
448
+ print(f" πŸ”„ Detected reversed text!")
449
+ print(f" πŸ“ Reversed: {reversed_text[:100]}...")
450
+ return reversed_text
451
+
452
+ # Also check if text starts with punctuation (common in reversed text)
453
+ if stripped and stripped[0] in '.!?,;:' and rev_score >= orig_score:
454
+ print(f" πŸ”„ Text starts with punctuation, trying reversed")
455
+ print(f" πŸ“ Reversed: {reversed_text[:100]}...")
456
  return reversed_text
457
 
458
  return stripped
 
535
  if not answer or len(answer.strip()) < 1:
536
  return False
537
 
538
+ # If answer is too long, it's probably not a direct answer
539
+ if len(answer) > 200:
540
+ print(f" ⚠️ Answer too long ({len(answer)} chars), likely not a direct answer")
541
+ return False
542
+
543
  invalid_phrases = [
544
  "i don't know", "i dont know", "i do not know",
545
+ "unknown", "n/a", "error",
546
  "i cannot", "i can't", "i cant",
547
  "not available", "no answer", "unable to",
548
  "i'm not sure", "im not sure", "i am not sure",
549
  "no image", "cannot determine", "insufficient information",
550
+ "not provided", "cannot access", "i'm unable", "i am unable",
551
+ "not able to", "i am not able", "however", "based on typical",
552
+ "without access", "no transcript", "no information"
553
  ]
554
 
555
  answer_lower = answer.lower()
 
560
  # MAIN SOLVER
561
  # ==========================================
562
 
563
+ SYSTEM_PROMPT = """You are solving GAIA benchmark questions. Give ONLY the exact answer.
564
+
565
+ RULES:
566
+ 1. Output ONLY the answer - no explanations, no "The answer is", no reasoning
567
+ 2. Numbers: just the number (42 or 3.14)
568
+ 3. Names: just the name (Marie Curie)
569
+ 4. Lists: comma-separated (apple, banana, cherry)
570
+ 5. If asked for the opposite of a word, give just that opposite word
571
+ 6. If asked to count something, give just the count number
572
+ 7. Never say "I don't know" - always attempt an answer based on context or knowledge
573
+
574
+ Examples of correct output format:
575
+ - Q: "What is 2+2?" A: "4"
576
+ - Q: "Write the opposite of left" A: "right"
577
+ - Q: "Who painted the Mona Lisa?" A: "Leonardo da Vinci"
578
+ - Q: "How many legs does a spider have?" A: "8\""""
579
+
580
+
581
+ def is_simple_question(question: str) -> bool:
582
+ """Check if question is simple enough to answer without web search."""
583
+ q_lower = question.lower()
584
+ # Simple questions about opposites, basic facts, math
585
+ simple_patterns = [
586
+ "opposite of", "antonym of", "what is the opposite",
587
+ "write the opposite", "2+2", "2 + 2",
588
+ ]
589
+ return any(p in q_lower for p in simple_patterns) and len(question) < 200
590
 
591
 
592
  def solve_question(question: str, task_id: str, groq_key: str) -> str:
 
597
  processed_q = preprocess_question(question)
598
  context_parts = []
599
 
600
+ # Check if it's a simple question that doesn't need web search
601
+ if is_simple_question(processed_q):
602
+ print(" ⚑ Simple question detected, answering directly")
603
+ answer_raw = ask_groq([
604
+ {"role": "system", "content": SYSTEM_PROMPT},
605
+ {"role": "user", "content": f"Answer this directly: {processed_q}"}
606
+ ], groq_key, max_tokens=50, temperature=0.0)
607
+ answer = clean_answer(answer_raw) if answer_raw else ""
608
+ if answer and is_valid_answer(answer):
609
+ print(f" βœ… Direct answer: {answer}")
610
+ return answer
611
+
612
  # 1. Check for attached files
613
  file_content, file_type = fetch_task_file(task_id)
614
  if file_content and file_type != "none":