Spaces:

quantumbit
/

check

Sleeping

App Files Files Community

quantumbit commited on Aug 22, 2025

Commit

10aba13

verified ·

1 Parent(s): 1c457e1

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -26

app.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import os
 import re
 import logging
 from typing import List, Dict, Any
 from fastapi import FastAPI
@@ -31,6 +33,22 @@ class ChallengeResponse(BaseModel):
     answers: List[str]
 # -------------------------
 # Scraper
 # -------------------------
@@ -81,18 +99,20 @@ def scrape_with_requests(url: str) -> Dict[str, Any]:
         for script in soup.find_all("script"):
             txt = script.get_text(" ", strip=True)
             if txt:
-                # Look for "challengeId", "completionCode", etc.
                 matches = re.findall(r"(challenge\w*|code)\s*[:=]\s*['\"]?([A-Za-z0-9\-_]+)", txt, flags=re.I)
                 for k, v in matches:
                     hidden_values.append(f"script {k}={v}")
-        # ✅ Direct regex for challengeID inside HTML/JS blobs
-        challengeid_match = re.search(r'"?challengeID"?\s*[:=]\s*"([^"]+)"', html, re.I)
-        if challengeid_match:
-            hidden_values.append(f"challengeID={challengeid_match.group(1)}")
         # Regex tokens (catch suspicious long strings)
-        tokens = re.findall(r"[A-Za-z0-9_\-]{8,}", html)
         for t in tokens:
             if any(x in t.lower() for x in ["chall", "code", "id"]):
                 hidden_values.append(f"token {t}")
@@ -100,7 +120,7 @@ def scrape_with_requests(url: str) -> Dict[str, Any]:
         return {
             "title": title,
             "visible_text": visible_text,
-            "hidden_values": hidden_values[:500],  # keep cap
         }
     except Exception as e:
@@ -108,39 +128,27 @@ def scrape_with_requests(url: str) -> Dict[str, Any]:
         return {}
 def answer_question(question: str, content: Dict[str, Any]) -> str:
     """Simple rule-based extraction for Round 5 questions."""
     ql = question.lower()
     title = content.get("title", "")
-    visible = content.get("visible_text", "")
     hidden = content.get("hidden_values", [])
-    # Look into hidden values
     for h in hidden:
-        if "challenge" in ql and "name" in ql and "challenge" in h.lower():
             return h.split("=", 1)[-1].strip()
-        if "challenge id" in ql and "id" in h.lower():
             return h.split("=", 1)[-1].strip()
-        if "completion code" in ql or "hidden code" in ql or "code" in ql:
-            if "code" in h.lower():
-                return h.split("=", 1)[-1].strip()
     # Fallbacks
     if "challenge name" in ql and title:
         return title.strip()
-    if "challenge id" in ql:
-        m = re.search(r"challenge\s*id\s*[:\-]\s*([A-Za-z0-9\-_]+)", visible, flags=re.I)
-        if m:
-            return m.group(1)
-    if "completion code" in ql or "hidden code" in ql:
-        m = re.search(r"(?:completion|hidden)\s*code\s*[:\-]\s*([A-Za-z0-9\-_]+)", visible, flags=re.I)
-        if m:
-            return m.group(1)
     return "Challenge information not found"

 import os
 import re
+import json
+import base64
 import logging
 from typing import List, Dict, Any
 from fastapi import FastAPI
     answers: List[str]
+# -------------------------
+# Helpers
+# -------------------------
+def try_decode_jwt(token: str) -> Dict[str, Any]:
+    """Try to decode a JWT without verifying signature."""
+    try:
+        parts = token.split(".")
+        if len(parts) != 3:
+            return {}
+        payload_b64 = parts[1] + "=" * (-len(parts[1]) % 4)  # pad
+        payload_json = base64.urlsafe_b64decode(payload_b64).decode("utf-8")
+        return json.loads(payload_json)
+    except Exception:
+        return {}
 # -------------------------
 # Scraper
 # -------------------------
         for script in soup.find_all("script"):
             txt = script.get_text(" ", strip=True)
             if txt:
                 matches = re.findall(r"(challenge\w*|code)\s*[:=]\s*['\"]?([A-Za-z0-9\-_]+)", txt, flags=re.I)
                 for k, v in matches:
                     hidden_values.append(f"script {k}={v}")
+        # ✅ Look for JWT tokens in HTML and decode
+        jwt_matches = re.findall(r"[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+", html)
+        for token in jwt_matches:
+            data = try_decode_jwt(token)
+            if data:
+                for k, v in data.items():
+                    hidden_values.append(f"jwt {k}={v}")
         # Regex tokens (catch suspicious long strings)
+        tokens = re.findall(r"[A-Za-z0-9_\-]{12,}", html)
         for t in tokens:
             if any(x in t.lower() for x in ["chall", "code", "id"]):
                 hidden_values.append(f"token {t}")
         return {
             "title": title,
             "visible_text": visible_text,
+            "hidden_values": hidden_values[:500],
         }
     except Exception as e:
         return {}
+# -------------------------
+# Answer extractor
+# -------------------------
 def answer_question(question: str, content: Dict[str, Any]) -> str:
     """Simple rule-based extraction for Round 5 questions."""
     ql = question.lower()
     title = content.get("title", "")
     hidden = content.get("hidden_values", [])
     for h in hidden:
+        if "challengeid" in ql and "challengeid" in h.lower():
             return h.split("=", 1)[-1].strip()
+        if "completion" in ql and "code" in ql and "code" in h.lower():
+            return h.split("=", 1)[-1].strip()
+        if "challenge name" in ql and "challenge" in h.lower():
             return h.split("=", 1)[-1].strip()
     # Fallbacks
     if "challenge name" in ql and title:
         return title.strip()
     return "Challenge information not found"