Spaces:

quantumbit
/

check

Sleeping

App Files Files Community

quantumbit commited on Aug 22, 2025

Commit

2667439

verified ·

1 Parent(s): 7f91338

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -2

app.py CHANGED Viewed

@@ -35,7 +35,7 @@ class ChallengeResponse(BaseModel):
 # Scraper
 # -------------------------
 def scrape_with_requests(url: str) -> Dict[str, Any]:
-    """Scrape a webpage and extract visible + hidden info."""
     try:
         headers = {
             "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) "
@@ -77,16 +77,33 @@ def scrape_with_requests(url: str) -> Dict[str, Any]:
                 if k.startswith("data-") and isinstance(v, str) and v.strip():
                     hidden_values.append(f"{k}={v.strip()}")
         return {
             "title": title,
             "visible_text": visible_text,
-            "hidden_values": hidden_values[:200],
         }
     except Exception as e:
         logger.error(f"Request scraping failed for {url}: {e}")
         return {}
 def answer_question(question: str, content: Dict[str, Any]) -> str:
     """Simple rule-based extraction for Round 5 questions."""
     ql = question.lower()

 # Scraper
 # -------------------------
 def scrape_with_requests(url: str) -> Dict[str, Any]:
+    """Scrape a webpage and extract visible + hidden info (expanded)."""
     try:
         headers = {
             "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) "
                 if k.startswith("data-") and isinstance(v, str) and v.strip():
                     hidden_values.append(f"{k}={v.strip()}")
+        # Script tags (look for JSON-like challenge info)
+        for script in soup.find_all("script"):
+            txt = script.get_text(" ", strip=True)
+            if txt:
+                # Look for "challengeId", "completionCode", etc.
+                matches = re.findall(r"(challenge\w*|code)\s*[:=]\s*['\"]?([A-Za-z0-9\-_]+)", txt, flags=re.I)
+                for k, v in matches:
+                    hidden_values.append(f"script {k}={v}")
+        # Regex tokens (catch suspicious long strings)
+        tokens = re.findall(r"[A-Za-z0-9_\-]{8,}", html)
+        for t in tokens:
+            if any(x in t.lower() for x in ["chall", "code", "id"]):
+                hidden_values.append(f"token {t}")
         return {
             "title": title,
             "visible_text": visible_text,
+            "hidden_values": hidden_values[:500],  # keep cap
         }
     except Exception as e:
         logger.error(f"Request scraping failed for {url}: {e}")
         return {}
 def answer_question(question: str, content: Dict[str, Any]) -> str:
     """Simple rule-based extraction for Round 5 questions."""
     ql = question.lower()