quantumbit commited on
Commit
10aba13
·
verified ·
1 Parent(s): 1c457e1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -26
app.py CHANGED
@@ -1,5 +1,7 @@
1
  import os
2
  import re
 
 
3
  import logging
4
  from typing import List, Dict, Any
5
  from fastapi import FastAPI
@@ -31,6 +33,22 @@ class ChallengeResponse(BaseModel):
31
  answers: List[str]
32
 
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  # -------------------------
35
  # Scraper
36
  # -------------------------
@@ -81,18 +99,20 @@ def scrape_with_requests(url: str) -> Dict[str, Any]:
81
  for script in soup.find_all("script"):
82
  txt = script.get_text(" ", strip=True)
83
  if txt:
84
- # Look for "challengeId", "completionCode", etc.
85
  matches = re.findall(r"(challenge\w*|code)\s*[:=]\s*['\"]?([A-Za-z0-9\-_]+)", txt, flags=re.I)
86
  for k, v in matches:
87
  hidden_values.append(f"script {k}={v}")
88
 
89
- # ✅ Direct regex for challengeID inside HTML/JS blobs
90
- challengeid_match = re.search(r'"?challengeID"?\s*[:=]\s*"([^"]+)"', html, re.I)
91
- if challengeid_match:
92
- hidden_values.append(f"challengeID={challengeid_match.group(1)}")
 
 
 
93
 
94
  # Regex tokens (catch suspicious long strings)
95
- tokens = re.findall(r"[A-Za-z0-9_\-]{8,}", html)
96
  for t in tokens:
97
  if any(x in t.lower() for x in ["chall", "code", "id"]):
98
  hidden_values.append(f"token {t}")
@@ -100,7 +120,7 @@ def scrape_with_requests(url: str) -> Dict[str, Any]:
100
  return {
101
  "title": title,
102
  "visible_text": visible_text,
103
- "hidden_values": hidden_values[:500], # keep cap
104
  }
105
 
106
  except Exception as e:
@@ -108,39 +128,27 @@ def scrape_with_requests(url: str) -> Dict[str, Any]:
108
  return {}
109
 
110
 
111
-
112
-
 
113
  def answer_question(question: str, content: Dict[str, Any]) -> str:
114
  """Simple rule-based extraction for Round 5 questions."""
115
  ql = question.lower()
116
  title = content.get("title", "")
117
- visible = content.get("visible_text", "")
118
  hidden = content.get("hidden_values", [])
119
 
120
- # Look into hidden values
121
  for h in hidden:
122
- if "challenge" in ql and "name" in ql and "challenge" in h.lower():
123
  return h.split("=", 1)[-1].strip()
124
- if "challenge id" in ql and "id" in h.lower():
 
 
125
  return h.split("=", 1)[-1].strip()
126
- if "completion code" in ql or "hidden code" in ql or "code" in ql:
127
- if "code" in h.lower():
128
- return h.split("=", 1)[-1].strip()
129
 
130
  # Fallbacks
131
  if "challenge name" in ql and title:
132
  return title.strip()
133
 
134
- if "challenge id" in ql:
135
- m = re.search(r"challenge\s*id\s*[:\-]\s*([A-Za-z0-9\-_]+)", visible, flags=re.I)
136
- if m:
137
- return m.group(1)
138
-
139
- if "completion code" in ql or "hidden code" in ql:
140
- m = re.search(r"(?:completion|hidden)\s*code\s*[:\-]\s*([A-Za-z0-9\-_]+)", visible, flags=re.I)
141
- if m:
142
- return m.group(1)
143
-
144
  return "Challenge information not found"
145
 
146
 
 
1
  import os
2
  import re
3
+ import json
4
+ import base64
5
  import logging
6
  from typing import List, Dict, Any
7
  from fastapi import FastAPI
 
33
  answers: List[str]
34
 
35
 
36
+ # -------------------------
37
+ # Helpers
38
+ # -------------------------
39
+ def try_decode_jwt(token: str) -> Dict[str, Any]:
40
+ """Try to decode a JWT without verifying signature."""
41
+ try:
42
+ parts = token.split(".")
43
+ if len(parts) != 3:
44
+ return {}
45
+ payload_b64 = parts[1] + "=" * (-len(parts[1]) % 4) # pad
46
+ payload_json = base64.urlsafe_b64decode(payload_b64).decode("utf-8")
47
+ return json.loads(payload_json)
48
+ except Exception:
49
+ return {}
50
+
51
+
52
  # -------------------------
53
  # Scraper
54
  # -------------------------
 
99
  for script in soup.find_all("script"):
100
  txt = script.get_text(" ", strip=True)
101
  if txt:
 
102
  matches = re.findall(r"(challenge\w*|code)\s*[:=]\s*['\"]?([A-Za-z0-9\-_]+)", txt, flags=re.I)
103
  for k, v in matches:
104
  hidden_values.append(f"script {k}={v}")
105
 
106
+ # ✅ Look for JWT tokens in HTML and decode
107
+ jwt_matches = re.findall(r"[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+", html)
108
+ for token in jwt_matches:
109
+ data = try_decode_jwt(token)
110
+ if data:
111
+ for k, v in data.items():
112
+ hidden_values.append(f"jwt {k}={v}")
113
 
114
  # Regex tokens (catch suspicious long strings)
115
+ tokens = re.findall(r"[A-Za-z0-9_\-]{12,}", html)
116
  for t in tokens:
117
  if any(x in t.lower() for x in ["chall", "code", "id"]):
118
  hidden_values.append(f"token {t}")
 
120
  return {
121
  "title": title,
122
  "visible_text": visible_text,
123
+ "hidden_values": hidden_values[:500],
124
  }
125
 
126
  except Exception as e:
 
128
  return {}
129
 
130
 
131
+ # -------------------------
132
+ # Answer extractor
133
+ # -------------------------
134
  def answer_question(question: str, content: Dict[str, Any]) -> str:
135
  """Simple rule-based extraction for Round 5 questions."""
136
  ql = question.lower()
137
  title = content.get("title", "")
 
138
  hidden = content.get("hidden_values", [])
139
 
 
140
  for h in hidden:
141
+ if "challengeid" in ql and "challengeid" in h.lower():
142
  return h.split("=", 1)[-1].strip()
143
+ if "completion" in ql and "code" in ql and "code" in h.lower():
144
+ return h.split("=", 1)[-1].strip()
145
+ if "challenge name" in ql and "challenge" in h.lower():
146
  return h.split("=", 1)[-1].strip()
 
 
 
147
 
148
  # Fallbacks
149
  if "challenge name" in ql and title:
150
  return title.strip()
151
 
 
 
 
 
 
 
 
 
 
 
152
  return "Challenge information not found"
153
 
154