quantumbit commited on
Commit
2667439
·
verified ·
1 Parent(s): 7f91338

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -2
app.py CHANGED
@@ -35,7 +35,7 @@ class ChallengeResponse(BaseModel):
35
  # Scraper
36
  # -------------------------
37
  def scrape_with_requests(url: str) -> Dict[str, Any]:
38
- """Scrape a webpage and extract visible + hidden info."""
39
  try:
40
  headers = {
41
  "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) "
@@ -77,16 +77,33 @@ def scrape_with_requests(url: str) -> Dict[str, Any]:
77
  if k.startswith("data-") and isinstance(v, str) and v.strip():
78
  hidden_values.append(f"{k}={v.strip()}")
79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  return {
81
  "title": title,
82
  "visible_text": visible_text,
83
- "hidden_values": hidden_values[:200],
84
  }
 
85
  except Exception as e:
86
  logger.error(f"Request scraping failed for {url}: {e}")
87
  return {}
88
 
89
 
 
90
  def answer_question(question: str, content: Dict[str, Any]) -> str:
91
  """Simple rule-based extraction for Round 5 questions."""
92
  ql = question.lower()
 
35
  # Scraper
36
  # -------------------------
37
  def scrape_with_requests(url: str) -> Dict[str, Any]:
38
+ """Scrape a webpage and extract visible + hidden info (expanded)."""
39
  try:
40
  headers = {
41
  "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) "
 
77
  if k.startswith("data-") and isinstance(v, str) and v.strip():
78
  hidden_values.append(f"{k}={v.strip()}")
79
 
80
+ # Script tags (look for JSON-like challenge info)
81
+ for script in soup.find_all("script"):
82
+ txt = script.get_text(" ", strip=True)
83
+ if txt:
84
+ # Look for "challengeId", "completionCode", etc.
85
+ matches = re.findall(r"(challenge\w*|code)\s*[:=]\s*['\"]?([A-Za-z0-9\-_]+)", txt, flags=re.I)
86
+ for k, v in matches:
87
+ hidden_values.append(f"script {k}={v}")
88
+
89
+ # Regex tokens (catch suspicious long strings)
90
+ tokens = re.findall(r"[A-Za-z0-9_\-]{8,}", html)
91
+ for t in tokens:
92
+ if any(x in t.lower() for x in ["chall", "code", "id"]):
93
+ hidden_values.append(f"token {t}")
94
+
95
  return {
96
  "title": title,
97
  "visible_text": visible_text,
98
+ "hidden_values": hidden_values[:500], # keep cap
99
  }
100
+
101
  except Exception as e:
102
  logger.error(f"Request scraping failed for {url}: {e}")
103
  return {}
104
 
105
 
106
+
107
  def answer_question(question: str, content: Dict[str, Any]) -> str:
108
  """Simple rule-based extraction for Round 5 questions."""
109
  ql = question.lower()