quantumbit commited on
Commit
a526fb5
·
verified ·
1 Parent(s): 10aba13

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +97 -24
app.py CHANGED
@@ -44,8 +44,11 @@ def try_decode_jwt(token: str) -> Dict[str, Any]:
44
  return {}
45
  payload_b64 = parts[1] + "=" * (-len(parts[1]) % 4) # pad
46
  payload_json = base64.urlsafe_b64decode(payload_b64).decode("utf-8")
47
- return json.loads(payload_json)
48
- except Exception:
 
 
 
49
  return {}
50
 
51
 
@@ -69,6 +72,7 @@ def scrape_with_requests(url: str) -> Dict[str, Any]:
69
  visible_text = soup.get_text(separator=" ", strip=True)[:6000]
70
 
71
  hidden_values: List[str] = []
 
72
 
73
  # Hidden inputs
74
  for inp in soup.find_all("input", {"type": "hidden"}):
@@ -95,32 +99,63 @@ def scrape_with_requests(url: str) -> Dict[str, Any]:
95
  if k.startswith("data-") and isinstance(v, str) and v.strip():
96
  hidden_values.append(f"{k}={v.strip()}")
97
 
98
- # Script tags (look for JSON-like challenge info)
99
  for script in soup.find_all("script"):
100
  txt = script.get_text(" ", strip=True)
101
  if txt:
102
- matches = re.findall(r"(challenge\w*|code)\s*[:=]\s*['\"]?([A-Za-z0-9\-_]+)", txt, flags=re.I)
 
 
 
 
 
 
103
  for k, v in matches:
104
  hidden_values.append(f"script {k}={v}")
105
 
106
- # ✅ Look for JWT tokens in HTML and decode
107
- jwt_matches = re.findall(r"[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+", html)
108
- for token in jwt_matches:
109
- data = try_decode_jwt(token)
110
- if data:
111
- for k, v in data.items():
112
- hidden_values.append(f"jwt {k}={v}")
113
-
114
- # Regex tokens (catch suspicious long strings)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  tokens = re.findall(r"[A-Za-z0-9_\-]{12,}", html)
116
  for t in tokens:
117
- if any(x in t.lower() for x in ["chall", "code", "id"]):
118
  hidden_values.append(f"token {t}")
119
 
 
 
 
120
  return {
121
  "title": title,
122
  "visible_text": visible_text,
123
  "hidden_values": hidden_values[:500],
 
124
  }
125
 
126
  except Exception as e:
@@ -132,23 +167,59 @@ def scrape_with_requests(url: str) -> Dict[str, Any]:
132
  # Answer extractor
133
  # -------------------------
134
  def answer_question(question: str, content: Dict[str, Any]) -> str:
135
- """Simple rule-based extraction for Round 5 questions."""
136
  ql = question.lower()
137
  title = content.get("title", "")
138
  hidden = content.get("hidden_values", [])
139
-
140
- for h in hidden:
141
- if "challengeid" in ql and "challengeid" in h.lower():
142
- return h.split("=", 1)[-1].strip()
143
- if "completion" in ql and "code" in ql and "code" in h.lower():
144
- return h.split("=", 1)[-1].strip()
145
- if "challenge name" in ql and "challenge" in h.lower():
146
- return h.split("=", 1)[-1].strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
148
  # Fallbacks
149
  if "challenge name" in ql and title:
150
  return title.strip()
151
 
 
 
 
 
 
 
 
 
 
 
 
152
  return "Challenge information not found"
153
 
154
 
@@ -180,4 +251,6 @@ def challenge(req: ChallengeRequest):
180
  ans = answer_question(q, content)
181
  answers.append(ans)
182
  logger.info(f"Q: {q} → A: {ans}")
183
- return ChallengeResponse(answers=answers)
 
 
 
44
  return {}
45
  payload_b64 = parts[1] + "=" * (-len(parts[1]) % 4) # pad
46
  payload_json = base64.urlsafe_b64decode(payload_b64).decode("utf-8")
47
+ decoded_payload = json.loads(payload_json)
48
+ logger.info(f"Decoded JWT payload: {decoded_payload}")
49
+ return decoded_payload
50
+ except Exception as e:
51
+ logger.error(f"JWT decode error: {e}")
52
  return {}
53
 
54
 
 
72
  visible_text = soup.get_text(separator=" ", strip=True)[:6000]
73
 
74
  hidden_values: List[str] = []
75
+ jwt_data: Dict[str, Any] = {}
76
 
77
  # Hidden inputs
78
  for inp in soup.find_all("input", {"type": "hidden"}):
 
99
  if k.startswith("data-") and isinstance(v, str) and v.strip():
100
  hidden_values.append(f"{k}={v.strip()}")
101
 
102
+ # Script tags (look for JSON-like challenge info and completion codes)
103
  for script in soup.find_all("script"):
104
  txt = script.get_text(" ", strip=True)
105
  if txt:
106
+ # Look for completion codes or challenge codes
107
+ completion_matches = re.findall(r"(completion[_\s]*code|challenge[_\s]*code|code)\s*[:=]\s*['\"]?([A-Za-z0-9\-_]{6,})['\"]?", txt, flags=re.I)
108
+ for k, v in completion_matches:
109
+ hidden_values.append(f"script completion_code={v}")
110
+
111
+ # General matches for challenge info
112
+ matches = re.findall(r"(challenge\w*|code|completion)\s*[:=]\s*['\"]?([A-Za-z0-9\-_]+)['\"]?", txt, flags=re.I)
113
  for k, v in matches:
114
  hidden_values.append(f"script {k}={v}")
115
 
116
+ # ✅ Enhanced JWT token detection and decoding
117
+ # Look for JWT patterns in the entire HTML content
118
+ jwt_patterns = [
119
+ r"eyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+", # Standard JWT
120
+ r"[A-Za-z0-9_-]{20,}\.[A-Za-z0-9_-]{20,}\.[A-Za-z0-9_-]{20,}" # Generic three-part tokens
121
+ ]
122
+
123
+ for pattern in jwt_patterns:
124
+ jwt_matches = re.findall(pattern, html)
125
+ for token in jwt_matches:
126
+ logger.info(f"Found potential JWT: {token[:50]}...")
127
+ data = try_decode_jwt(token)
128
+ if data:
129
+ jwt_data.update(data)
130
+ for k, v in data.items():
131
+ hidden_values.append(f"jwt {k}={v}")
132
+
133
+ # Look for completion codes in various formats
134
+ completion_patterns = [
135
+ r"completion[_\s]*code[:\s]*([A-Za-z0-9\-_]{6,})",
136
+ r"challenge[_\s]*complete[_\s]*code[:\s]*([A-Za-z0-9\-_]{6,})",
137
+ r"code[:\s]*([A-Za-z0-9\-_]{10,})",
138
+ ]
139
+
140
+ for pattern in completion_patterns:
141
+ matches = re.findall(pattern, html, flags=re.I)
142
+ for match in matches:
143
+ hidden_values.append(f"completion_code {match}")
144
+
145
+ # Enhanced token detection
146
  tokens = re.findall(r"[A-Za-z0-9_\-]{12,}", html)
147
  for t in tokens:
148
+ if any(x in t.lower() for x in ["chall", "code", "id", "completion"]):
149
  hidden_values.append(f"token {t}")
150
 
151
+ logger.info(f"Found {len(hidden_values)} hidden values")
152
+ logger.info(f"JWT data: {jwt_data}")
153
+
154
  return {
155
  "title": title,
156
  "visible_text": visible_text,
157
  "hidden_values": hidden_values[:500],
158
+ "jwt_data": jwt_data,
159
  }
160
 
161
  except Exception as e:
 
167
  # Answer extractor
168
  # -------------------------
169
  def answer_question(question: str, content: Dict[str, Any]) -> str:
170
+ """Enhanced rule-based extraction for Round 5 questions."""
171
  ql = question.lower()
172
  title = content.get("title", "")
173
  hidden = content.get("hidden_values", [])
174
+ jwt_data = content.get("jwt_data", {})
175
+
176
+ # Direct JWT data extraction
177
+ if "challenge id" in ql or "challengeid" in ql:
178
+ # First check JWT data directly
179
+ if "challengeID" in jwt_data:
180
+ return str(jwt_data["challengeID"])
181
+ # Then check hidden values
182
+ for h in hidden:
183
+ if "challengeid" in h.lower():
184
+ return h.split("=", 1)[-1].strip()
185
+
186
+ if "completion" in ql and "code" in ql:
187
+ # Look for completion codes in various formats
188
+ for h in hidden:
189
+ if "completion_code" in h.lower():
190
+ return h.split("=", 1)[-1].strip()
191
+ if "code" in h.lower() and len(h.split("=", 1)[-1].strip()) > 10:
192
+ return h.split("=", 1)[-1].strip()
193
+
194
+ # Check JWT data for any field that might be a completion code
195
+ for key, value in jwt_data.items():
196
+ if isinstance(value, str) and len(value) > 10 and key.lower() != "email":
197
+ return str(value)
198
+
199
+ if "challenge name" in ql:
200
+ # Check JWT data first
201
+ if "coolGuy" in jwt_data:
202
+ return str(jwt_data["coolGuy"])
203
+ # Then check hidden values
204
+ for h in hidden:
205
+ if "challenge" in h.lower() and "name" in h.lower():
206
+ return h.split("=", 1)[-1].strip()
207
 
208
  # Fallbacks
209
  if "challenge name" in ql and title:
210
  return title.strip()
211
 
212
+ # If we have JWT data, return the most likely candidate
213
+ if jwt_data:
214
+ # For challenge ID questions, return challengeID if present
215
+ if "challenge" in ql and "id" in ql and "challengeID" in jwt_data:
216
+ return str(jwt_data["challengeID"])
217
+
218
+ # For other questions, return the first non-standard field
219
+ for key, value in jwt_data.items():
220
+ if key not in ["iat", "exp", "email"] and isinstance(value, str):
221
+ return str(value)
222
+
223
  return "Challenge information not found"
224
 
225
 
 
251
  ans = answer_question(q, content)
252
  answers.append(ans)
253
  logger.info(f"Q: {q} → A: {ans}")
254
+
255
+ logger.info(f"Final answers: {answers}")
256
+ return ChallengeResponse(answers=answers)