iitmbs24f commited on
Commit
093e58a
·
verified ·
1 Parent(s): 8c80842

Upload 9 files

Browse files
Files changed (1) hide show
  1. app/solver.py +122 -14
app/solver.py CHANGED
@@ -918,17 +918,41 @@ def solve_project2_entry(text: str, email: str) -> str:
918
  return email
919
 
920
  def solve_project2_uv(text: str, email: str, page_content: Dict[str, Any]) -> str:
921
- """Q2: /project2-uv - Return user-agent from JSON response"""
922
  try:
923
- url = f"https://tds-llm-analysis.s-anand.net/project2/uv.json?email={email}"
924
- response = requests.get(url, headers={"Accept": "application/json"}, timeout=10)
925
- response.raise_for_status()
926
- data = response.json()
927
- user_agent = data.get("user-agent", "")
928
- logger.info(f"Extracted user-agent: {user_agent}")
929
- return user_agent
 
 
 
 
 
 
 
 
 
 
 
 
 
930
  except Exception as e:
931
  logger.error(f"Error in project2-uv: {e}")
 
 
 
 
 
 
 
 
 
 
 
932
  return ""
933
 
934
  def solve_project2_git(text: str, email: str) -> str:
@@ -1285,6 +1309,37 @@ class QuizSolver:
1285
 
1286
  # Ensure answer is in the correct format (string or simple JSON-serializable)
1287
  answer = self._normalize_answer(answer)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1288
  logger.info(f"Answer computed: {str(answer)[:200]}...")
1289
 
1290
  # Store answer for final quiz
@@ -1757,14 +1812,15 @@ class QuizSolver:
1757
  logger.warning(f"Error handling GitHub API: {e}")
1758
  # Continue with other strategies
1759
 
1760
- # Strategy 7: Use LLM to solve (only if we have enough time)
1761
  remaining = self._check_time_remaining()
1762
  # For audio passphrase questions, use LLM even with less time
1763
  is_audio_question = 'transcribe' in question.lower() or 'passphrase' in question.lower() or 'spoken phrase' in question.lower()
1764
- min_time_needed = 15.0 if is_audio_question else 25.0 # Lower threshold for audio questions
 
1765
 
1766
- # Only use LLM if we have enough time AND haven't found answer yet
1767
- # Reserve at least 10s for submission
1768
  if remaining >= min_time_needed:
1769
  logger.info("Attempting to solve with LLM...")
1770
  try:
@@ -1800,8 +1856,57 @@ class QuizSolver:
1800
  logger.info("Extracted simple answer from question")
1801
  return simple_answer
1802
 
1803
- # Strategy 9: Last resort - return a default answer
1804
- logger.warning("Could not solve question, using default answer")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1805
  return "answer"
1806
 
1807
  async def _extract_secret_from_scrape_task(self, question: str, page_content: Dict[str, Any]) -> Optional[str]:
@@ -2398,6 +2503,9 @@ class QuizSolver:
2398
  # If it's very long, truncate
2399
  if len(answer) > 1000:
2400
  answer = answer[:1000]
 
 
 
2401
  return answer
2402
 
2403
  # For other types, convert to string
 
918
  return email
919
 
920
  def solve_project2_uv(text: str, email: str, page_content: Dict[str, Any]) -> str:
921
+ """Q2: /project2-uv - Return the command string (not the output)"""
922
  try:
923
+ # The question asks for the command string, not the user-agent value
924
+ # Construct the command: uv http get <url> -H "Accept: application/json"
925
+ from urllib.parse import urlencode, urlparse
926
+
927
+ base_url = page_content.get('url', '')
928
+ # Extract the base domain from the current URL
929
+ if 'tds-llm-analysis.s-anand.net' in base_url:
930
+ domain = 'https://tds-llm-analysis.s-anand.net'
931
+ else:
932
+ # Fallback: construct from current URL
933
+ parsed = urlparse(base_url)
934
+ domain = f"{parsed.scheme}://{parsed.netloc}"
935
+
936
+ # URL encode the email parameter
937
+ params = urlencode({'email': email})
938
+ api_url = f"{domain}/project2/uv.json?{params}"
939
+
940
+ command = f'uv http get {api_url} -H "Accept: application/json"'
941
+ logger.info(f"Constructed command string: {command}")
942
+ return command
943
  except Exception as e:
944
  logger.error(f"Error in project2-uv: {e}")
945
+ # Fallback: try to extract from question text
946
+ if 'uv http get' in text.lower():
947
+ # Try to find the command in the text
948
+ import re
949
+ cmd_match = re.search(r'(uv\s+http\s+get\s+[^\n<>"]+(?:\s+-H\s+"[^"]+")?)', text, re.IGNORECASE)
950
+ if cmd_match:
951
+ cmd = cmd_match.group(1).strip()
952
+ # Replace email placeholder if present
953
+ if email and ('<your email>' in cmd or '<email>' in cmd):
954
+ cmd = cmd.replace('<your email>', email).replace('<email>', email)
955
+ return cmd
956
  return ""
957
 
958
  def solve_project2_git(text: str, email: str) -> str:
 
1309
 
1310
  # Ensure answer is in the correct format (string or simple JSON-serializable)
1311
  answer = self._normalize_answer(answer)
1312
+
1313
+ # Validate answer is not empty - try to extract from page if empty
1314
+ if not answer or (isinstance(answer, str) and not answer.strip()):
1315
+ logger.warning("Answer is empty, attempting to extract from page content")
1316
+ # Try one more time to extract answer from page
1317
+ text = page_content.get('all_text', page_content.get('text', ''))
1318
+ if text:
1319
+ # Try to find any meaningful content
1320
+ simple_answer = self._extract_simple_answer(question_text, page_content)
1321
+ if simple_answer and simple_answer.strip():
1322
+ answer = simple_answer
1323
+ logger.info(f"Extracted answer from page: {answer[:100]}...")
1324
+ else:
1325
+ # Use LLM as last resort if we have time
1326
+ remaining = self._check_time_remaining()
1327
+ if remaining >= 10.0:
1328
+ try:
1329
+ available_data = self._extract_data_from_page(page_content)
1330
+ available_data['email'] = email
1331
+ llm_answer = await solve_with_llm(question_text, available_data)
1332
+ if llm_answer and llm_answer.strip():
1333
+ answer = llm_answer.strip()
1334
+ logger.info(f"LLM provided answer: {answer[:100]}...")
1335
+ except Exception as e:
1336
+ logger.warning(f"LLM retry failed: {e}")
1337
+
1338
+ # Only use fallback if still empty
1339
+ if not answer or (isinstance(answer, str) and not answer.strip()):
1340
+ logger.warning("Still empty after retry, using minimal fallback")
1341
+ answer = "answer" # Fallback to prevent empty submission
1342
+
1343
  logger.info(f"Answer computed: {str(answer)[:200]}...")
1344
 
1345
  # Store answer for final quiz
 
1812
  logger.warning(f"Error handling GitHub API: {e}")
1813
  # Continue with other strategies
1814
 
1815
+ # Strategy 7: Use LLM to solve (be more aggressive - use it earlier)
1816
  remaining = self._check_time_remaining()
1817
  # For audio passphrase questions, use LLM even with less time
1818
  is_audio_question = 'transcribe' in question.lower() or 'passphrase' in question.lower() or 'spoken phrase' in question.lower()
1819
+ # Lower thresholds to use LLM more often
1820
+ min_time_needed = 10.0 if is_audio_question else 15.0 # Reduced from 15/25 to 10/15
1821
 
1822
+ # Use LLM if we have enough time AND haven't found answer yet
1823
+ # Reserve at least 5s for submission (reduced from 10s)
1824
  if remaining >= min_time_needed:
1825
  logger.info("Attempting to solve with LLM...")
1826
  try:
 
1856
  logger.info("Extracted simple answer from question")
1857
  return simple_answer
1858
 
1859
+ # Strategy 9: Final LLM attempt - use LLM even with limited time if we haven't found an answer
1860
+ remaining = self._check_time_remaining()
1861
+ if remaining >= 10.0: # Try LLM if we have at least 10 seconds
1862
+ logger.info("Final attempt: Using LLM to solve question")
1863
+ try:
1864
+ llm_answer = await solve_with_llm(question, available_data)
1865
+ if llm_answer and llm_answer.strip():
1866
+ # Try to parse as JSON if it looks like JSON
1867
+ json_answer = extract_json_from_text(llm_answer)
1868
+ if json_answer:
1869
+ return json_answer
1870
+ # Clean up the answer
1871
+ llm_answer = llm_answer.strip()
1872
+ if len(llm_answer) > 0:
1873
+ logger.info("LLM provided answer in final attempt")
1874
+ return llm_answer
1875
+ except Exception as e:
1876
+ logger.warning(f"Final LLM attempt failed: {e}")
1877
+
1878
+ # Strategy 10: Extract any meaningful text from page as last resort
1879
+ text = page_content.get('all_text', page_content.get('text', ''))
1880
+ # Try to find any substantial content that might be the answer
1881
+ if text:
1882
+ # Look for any quoted strings, numbers, or substantial text
1883
+ # Extract first substantial sentence or phrase
1884
+ sentences = re.split(r'[.!?]\s+', text)
1885
+ for sentence in sentences:
1886
+ sentence = sentence.strip()
1887
+ # Skip if it's too short, too long, or looks like instructions
1888
+ if 5 <= len(sentence) <= 200:
1889
+ # Skip common instruction phrases
1890
+ if not any(phrase in sentence.lower() for phrase in [
1891
+ 'submit', 'answer', 'question', 'click', 'enter', 'provide',
1892
+ 'please', 'note:', 'important', 'remember'
1893
+ ]):
1894
+ logger.info(f"Extracted potential answer from page text: {sentence[:100]}...")
1895
+ return sentence
1896
+
1897
+ # Last resort: Try to extract any URL, email, or code from the page
1898
+ url_match = re.search(r'https?://[^\s<>"\'\)]+', text)
1899
+ if url_match:
1900
+ logger.info(f"Extracted URL as answer: {url_match.group(0)}")
1901
+ return url_match.group(0)
1902
+
1903
+ email_match = re.search(r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}', text)
1904
+ if email_match:
1905
+ logger.info(f"Extracted email as answer: {email_match.group(0)}")
1906
+ return email_match.group(0)
1907
+
1908
+ # Only use fallback if absolutely nothing found
1909
+ logger.warning("Could not solve question after all strategies, using minimal fallback")
1910
  return "answer"
1911
 
1912
  async def _extract_secret_from_scrape_task(self, question: str, page_content: Dict[str, Any]) -> Optional[str]:
 
2503
  # If it's very long, truncate
2504
  if len(answer) > 1000:
2505
  answer = answer[:1000]
2506
+ # Ensure we don't return empty string
2507
+ if not answer:
2508
+ return "answer" # Fallback
2509
  return answer
2510
 
2511
  # For other types, convert to string