Spaces:

quantumbit
/

check

Sleeping

App Files Files Community

quantumbit commited on Aug 22, 2025

Commit

a526fb5

verified ·

1 Parent(s): 10aba13

Update app.py

Browse files

Files changed (1) hide show

app.py +97 -24

app.py CHANGED Viewed

@@ -44,8 +44,11 @@ def try_decode_jwt(token: str) -> Dict[str, Any]:
             return {}
         payload_b64 = parts[1] + "=" * (-len(parts[1]) % 4)  # pad
         payload_json = base64.urlsafe_b64decode(payload_b64).decode("utf-8")
-        return json.loads(payload_json)
-    except Exception:
         return {}
@@ -69,6 +72,7 @@ def scrape_with_requests(url: str) -> Dict[str, Any]:
         visible_text = soup.get_text(separator=" ", strip=True)[:6000]
         hidden_values: List[str] = []
         # Hidden inputs
         for inp in soup.find_all("input", {"type": "hidden"}):
@@ -95,32 +99,63 @@ def scrape_with_requests(url: str) -> Dict[str, Any]:
                 if k.startswith("data-") and isinstance(v, str) and v.strip():
                     hidden_values.append(f"{k}={v.strip()}")
-        # Script tags (look for JSON-like challenge info)
         for script in soup.find_all("script"):
             txt = script.get_text(" ", strip=True)
             if txt:
-                matches = re.findall(r"(challenge\w*|code)\s*[:=]\s*['\"]?([A-Za-z0-9\-_]+)", txt, flags=re.I)
                 for k, v in matches:
                     hidden_values.append(f"script {k}={v}")
-        # ✅ Look for JWT tokens in HTML and decode
-        jwt_matches = re.findall(r"[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+", html)
-        for token in jwt_matches:
-            data = try_decode_jwt(token)
-            if data:
-                for k, v in data.items():
-                    hidden_values.append(f"jwt {k}={v}")
-        # Regex tokens (catch suspicious long strings)
         tokens = re.findall(r"[A-Za-z0-9_\-]{12,}", html)
         for t in tokens:
-            if any(x in t.lower() for x in ["chall", "code", "id"]):
                 hidden_values.append(f"token {t}")
         return {
             "title": title,
             "visible_text": visible_text,
             "hidden_values": hidden_values[:500],
         }
     except Exception as e:
@@ -132,23 +167,59 @@ def scrape_with_requests(url: str) -> Dict[str, Any]:
 # Answer extractor
 # -------------------------
 def answer_question(question: str, content: Dict[str, Any]) -> str:
-    """Simple rule-based extraction for Round 5 questions."""
     ql = question.lower()
     title = content.get("title", "")
     hidden = content.get("hidden_values", [])
-    for h in hidden:
-        if "challengeid" in ql and "challengeid" in h.lower():
-            return h.split("=", 1)[-1].strip()
-        if "completion" in ql and "code" in ql and "code" in h.lower():
-            return h.split("=", 1)[-1].strip()
-        if "challenge name" in ql and "challenge" in h.lower():
-            return h.split("=", 1)[-1].strip()
     # Fallbacks
     if "challenge name" in ql and title:
         return title.strip()
     return "Challenge information not found"
@@ -180,4 +251,6 @@ def challenge(req: ChallengeRequest):
         ans = answer_question(q, content)
         answers.append(ans)
         logger.info(f"Q: {q} → A: {ans}")
-    return ChallengeResponse(answers=answers)

             return {}
         payload_b64 = parts[1] + "=" * (-len(parts[1]) % 4)  # pad
         payload_json = base64.urlsafe_b64decode(payload_b64).decode("utf-8")
+        decoded_payload = json.loads(payload_json)
+        logger.info(f"Decoded JWT payload: {decoded_payload}")
+        return decoded_payload
+    except Exception as e:
+        logger.error(f"JWT decode error: {e}")
         return {}
         visible_text = soup.get_text(separator=" ", strip=True)[:6000]
         hidden_values: List[str] = []
+        jwt_data: Dict[str, Any] = {}
         # Hidden inputs
         for inp in soup.find_all("input", {"type": "hidden"}):
                 if k.startswith("data-") and isinstance(v, str) and v.strip():
                     hidden_values.append(f"{k}={v.strip()}")
+        # Script tags (look for JSON-like challenge info and completion codes)
         for script in soup.find_all("script"):
             txt = script.get_text(" ", strip=True)
             if txt:
+                # Look for completion codes or challenge codes
+                completion_matches = re.findall(r"(completion[_\s]*code|challenge[_\s]*code|code)\s*[:=]\s*['\"]?([A-Za-z0-9\-_]{6,})['\"]?", txt, flags=re.I)
+                for k, v in completion_matches:
+                    hidden_values.append(f"script completion_code={v}")
+                # General matches for challenge info
+                matches = re.findall(r"(challenge\w*|code|completion)\s*[:=]\s*['\"]?([A-Za-z0-9\-_]+)['\"]?", txt, flags=re.I)
                 for k, v in matches:
                     hidden_values.append(f"script {k}={v}")
+        # ✅ Enhanced JWT token detection and decoding
+        # Look for JWT patterns in the entire HTML content
+        jwt_patterns = [
+            r"eyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+",  # Standard JWT
+            r"[A-Za-z0-9_-]{20,}\.[A-Za-z0-9_-]{20,}\.[A-Za-z0-9_-]{20,}"  # Generic three-part tokens
+        ]
+        for pattern in jwt_patterns:
+            jwt_matches = re.findall(pattern, html)
+            for token in jwt_matches:
+                logger.info(f"Found potential JWT: {token[:50]}...")
+                data = try_decode_jwt(token)
+                if data:
+                    jwt_data.update(data)
+                    for k, v in data.items():
+                        hidden_values.append(f"jwt {k}={v}")
+        # Look for completion codes in various formats
+        completion_patterns = [
+            r"completion[_\s]*code[:\s]*([A-Za-z0-9\-_]{6,})",
+            r"challenge[_\s]*complete[_\s]*code[:\s]*([A-Za-z0-9\-_]{6,})",
+            r"code[:\s]*([A-Za-z0-9\-_]{10,})",
+        ]
+        for pattern in completion_patterns:
+            matches = re.findall(pattern, html, flags=re.I)
+            for match in matches:
+                hidden_values.append(f"completion_code {match}")
+        # Enhanced token detection
         tokens = re.findall(r"[A-Za-z0-9_\-]{12,}", html)
         for t in tokens:
+            if any(x in t.lower() for x in ["chall", "code", "id", "completion"]):
                 hidden_values.append(f"token {t}")
+        logger.info(f"Found {len(hidden_values)} hidden values")
+        logger.info(f"JWT data: {jwt_data}")
         return {
             "title": title,
             "visible_text": visible_text,
             "hidden_values": hidden_values[:500],
+            "jwt_data": jwt_data,
         }
     except Exception as e:
 # Answer extractor
 # -------------------------
 def answer_question(question: str, content: Dict[str, Any]) -> str:
+    """Enhanced rule-based extraction for Round 5 questions."""
     ql = question.lower()
     title = content.get("title", "")
     hidden = content.get("hidden_values", [])
+    jwt_data = content.get("jwt_data", {})
+    # Direct JWT data extraction
+    if "challenge id" in ql or "challengeid" in ql:
+        # First check JWT data directly
+        if "challengeID" in jwt_data:
+            return str(jwt_data["challengeID"])
+        # Then check hidden values
+        for h in hidden:
+            if "challengeid" in h.lower():
+                return h.split("=", 1)[-1].strip()
+    if "completion" in ql and "code" in ql:
+        # Look for completion codes in various formats
+        for h in hidden:
+            if "completion_code" in h.lower():
+                return h.split("=", 1)[-1].strip()
+            if "code" in h.lower() and len(h.split("=", 1)[-1].strip()) > 10:
+                return h.split("=", 1)[-1].strip()
+        # Check JWT data for any field that might be a completion code
+        for key, value in jwt_data.items():
+            if isinstance(value, str) and len(value) > 10 and key.lower() != "email":
+                return str(value)
+    if "challenge name" in ql:
+        # Check JWT data first
+        if "coolGuy" in jwt_data:
+            return str(jwt_data["coolGuy"])
+        # Then check hidden values
+        for h in hidden:
+            if "challenge" in h.lower() and "name" in h.lower():
+                return h.split("=", 1)[-1].strip()
     # Fallbacks
     if "challenge name" in ql and title:
         return title.strip()
+    # If we have JWT data, return the most likely candidate
+    if jwt_data:
+        # For challenge ID questions, return challengeID if present
+        if "challenge" in ql and "id" in ql and "challengeID" in jwt_data:
+            return str(jwt_data["challengeID"])
+        # For other questions, return the first non-standard field
+        for key, value in jwt_data.items():
+            if key not in ["iat", "exp", "email"] and isinstance(value, str):
+                return str(value)
     return "Challenge information not found"
         ans = answer_question(q, content)
         answers.append(ans)
         logger.info(f"Q: {q} → A: {ans}")
+    logger.info(f"Final answers: {answers}")
+    return ChallengeResponse(answers=answers)