iitmbs24f commited on
Commit
08615f0
·
verified ·
1 Parent(s): 1a87dcd

Upload 12 files

Browse files
app/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # IITM LLM Quiz Solver
2
+ __version__ = "1.0.0"
3
+
app/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (171 Bytes). View file
 
app/__pycache__/browser.cpython-311.pyc ADDED
Binary file (12.5 kB). View file
 
app/__pycache__/llm.cpython-311.pyc ADDED
Binary file (10.2 kB). View file
 
app/__pycache__/main.cpython-311.pyc ADDED
Binary file (11.3 kB). View file
 
app/__pycache__/solver.cpython-311.pyc ADDED
Binary file (27.3 kB). View file
 
app/__pycache__/utils.cpython-311.pyc ADDED
Binary file (6.5 kB). View file
 
app/browser.py ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Playwright browser helper for loading and interacting with quiz pages.
3
+ """
4
+ import asyncio
5
+ import logging
6
+ from typing import Optional, Dict, Any, List
7
+ from playwright.async_api import async_playwright, Browser, Page, BrowserContext
8
+ import time
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class BrowserHelper:
14
+ """Helper class for managing Playwright browser sessions."""
15
+
16
+ def __init__(self):
17
+ self.browser: Optional[Browser] = None
18
+ self.context: Optional[BrowserContext] = None
19
+ self.page: Optional[Page] = None
20
+ self.playwright = None
21
+
22
+ async def start(self, headless: bool = True) -> None:
23
+ """
24
+ Start Playwright browser.
25
+
26
+ Args:
27
+ headless: Run in headless mode
28
+ """
29
+ try:
30
+ self.playwright = await async_playwright().start()
31
+ self.browser = await self.playwright.chromium.launch(
32
+ headless=headless,
33
+ args=[
34
+ '--no-sandbox',
35
+ '--disable-setuid-sandbox',
36
+ '--disable-dev-shm-usage',
37
+ '--disable-accelerated-2d-canvas',
38
+ '--disable-gpu'
39
+ ]
40
+ )
41
+ self.context = await self.browser.new_context(
42
+ viewport={'width': 1920, 'height': 1080},
43
+ user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
44
+ )
45
+ self.page = await self.context.new_page()
46
+ logger.info("Browser started successfully")
47
+ except Exception as e:
48
+ logger.error(f"Error starting browser: {e}")
49
+ raise
50
+
51
+ async def load_page(self, url: str, wait_time: int = 5, timeout: int = 30000) -> Dict[str, Any]:
52
+ """
53
+ Load a page and extract all content.
54
+
55
+ Args:
56
+ url: URL to load
57
+ wait_time: Seconds to wait for page to load
58
+ timeout: Page load timeout in milliseconds
59
+
60
+ Returns:
61
+ Dictionary with page content
62
+ """
63
+ if not self.page:
64
+ await self.start()
65
+
66
+ try:
67
+ logger.info(f"Loading page: {url}")
68
+ await self.page.goto(url, wait_until='networkidle', timeout=timeout)
69
+
70
+ # Wait for dynamic content
71
+ await asyncio.sleep(wait_time)
72
+
73
+ # Extract page content
74
+ content = {
75
+ 'url': url,
76
+ 'title': await self.page.title(),
77
+ 'text': await self.page.inner_text('body'),
78
+ 'html': await self.page.content(),
79
+ 'screenshot': await self.page.screenshot(full_page=True),
80
+ }
81
+
82
+ # Try to extract all visible text elements
83
+ try:
84
+ content['all_text'] = await self.page.evaluate("""
85
+ () => {
86
+ const walker = document.createTreeWalker(
87
+ document.body,
88
+ NodeFilter.SHOW_TEXT,
89
+ null,
90
+ false
91
+ );
92
+ let text = [];
93
+ let node;
94
+ while (node = walker.nextNode()) {
95
+ if (node.textContent.trim()) {
96
+ text.push(node.textContent.trim());
97
+ }
98
+ }
99
+ return text.join('\\n');
100
+ }
101
+ """)
102
+ except Exception as e:
103
+ logger.warning(f"Error extracting all text: {e}")
104
+ content['all_text'] = content['text']
105
+
106
+ # Extract links
107
+ try:
108
+ content['links'] = await self.page.evaluate("""
109
+ () => {
110
+ const links = Array.from(document.querySelectorAll('a[href]'));
111
+ return links.map(a => ({text: a.textContent.trim(), href: a.href}));
112
+ }
113
+ """)
114
+ except Exception as e:
115
+ logger.warning(f"Error extracting links: {e}")
116
+ content['links'] = []
117
+
118
+ # Extract images
119
+ try:
120
+ content['images'] = await self.page.evaluate("""
121
+ () => {
122
+ const images = Array.from(document.querySelectorAll('img[src]'));
123
+ return images.map(img => ({alt: img.alt, src: img.src}));
124
+ }
125
+ """)
126
+ except Exception as e:
127
+ logger.warning(f"Error extracting images: {e}")
128
+ content['images'] = []
129
+
130
+ logger.info(f"Page loaded successfully: {content['title']}")
131
+ return content
132
+
133
+ except Exception as e:
134
+ logger.error(f"Error loading page {url}: {e}")
135
+ raise
136
+
137
+ async def click_element(self, selector: str) -> bool:
138
+ """
139
+ Click an element on the page.
140
+
141
+ Args:
142
+ selector: CSS selector
143
+
144
+ Returns:
145
+ True if successful
146
+ """
147
+ try:
148
+ await self.page.click(selector)
149
+ await asyncio.sleep(1)
150
+ return True
151
+ except Exception as e:
152
+ logger.error(f"Error clicking element {selector}: {e}")
153
+ return False
154
+
155
+ async def fill_input(self, selector: str, value: str) -> bool:
156
+ """
157
+ Fill an input field.
158
+
159
+ Args:
160
+ selector: CSS selector
161
+ value: Value to fill
162
+
163
+ Returns:
164
+ True if successful
165
+ """
166
+ try:
167
+ await self.page.fill(selector, value)
168
+ return True
169
+ except Exception as e:
170
+ logger.error(f"Error filling input {selector}: {e}")
171
+ return False
172
+
173
+ async def wait_for_element(self, selector: str, timeout: int = 10000) -> bool:
174
+ """
175
+ Wait for an element to appear.
176
+
177
+ Args:
178
+ selector: CSS selector
179
+ timeout: Timeout in milliseconds
180
+
181
+ Returns:
182
+ True if element found
183
+ """
184
+ try:
185
+ await self.page.wait_for_selector(selector, timeout=timeout)
186
+ return True
187
+ except Exception as e:
188
+ logger.warning(f"Element {selector} not found: {e}")
189
+ return False
190
+
191
+ async def evaluate_script(self, script: str) -> Any:
192
+ """
193
+ Execute JavaScript on the page.
194
+
195
+ Args:
196
+ script: JavaScript code to execute
197
+
198
+ Returns:
199
+ Result of script execution
200
+ """
201
+ try:
202
+ return await self.page.evaluate(script)
203
+ except Exception as e:
204
+ logger.error(f"Error evaluating script: {e}")
205
+ return None
206
+
207
+ async def close(self) -> None:
208
+ """Close browser and cleanup."""
209
+ try:
210
+ if self.page:
211
+ await self.page.close()
212
+ if self.context:
213
+ await self.context.close()
214
+ if self.browser:
215
+ await self.browser.close()
216
+ if self.playwright:
217
+ await self.playwright.stop()
218
+ logger.info("Browser closed")
219
+ except Exception as e:
220
+ logger.error(f"Error closing browser: {e}")
221
+
222
+
223
+ # Global browser instance
224
+ _browser: Optional[BrowserHelper] = None
225
+
226
+
227
+ async def get_browser() -> BrowserHelper:
228
+ """
229
+ Get or create a browser instance.
230
+
231
+ Returns:
232
+ BrowserHelper instance
233
+ """
234
+ global _browser
235
+ if _browser is None:
236
+ _browser = BrowserHelper()
237
+ await _browser.start()
238
+ return _browser
239
+
240
+
241
+ async def cleanup_browser() -> None:
242
+ """Cleanup browser instance."""
243
+ global _browser
244
+ if _browser:
245
+ await _browser.close()
246
+ _browser = None
247
+
app/llm.py ADDED
@@ -0,0 +1,251 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ LLM helper module for OpenAI GPT integration.
3
+ Used for reasoning, OCR, and complex question parsing.
4
+ """
5
+ import os
6
+ import logging
7
+ from typing import Optional, Dict, Any
8
+ import openai
9
+ from openai import AsyncOpenAI
10
+ import httpx
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ # Initialize OpenAI client
15
+ client: Optional[AsyncOpenAI] = None
16
+
17
+ # OpenRouter configuration
18
+ OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
19
+ OPENROUTER_BASE_URL = os.getenv("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1")
20
+ OPENROUTER_MODEL = os.getenv("OPENROUTER_MODEL", "gpt-5-nano")
21
+ OPENROUTER_SITE_URL = os.getenv("OPENROUTER_SITE_URL", "http://localhost")
22
+ OPENROUTER_APP_NAME = os.getenv("OPENROUTER_APP_NAME", "IITM LLM Quiz Solver")
23
+
24
+
25
+ def initialize_llm() -> None:
26
+ """
27
+ Initialize OpenAI client with API key from environment.
28
+ """
29
+ global client
30
+ api_key = os.getenv("OPENAI_API_KEY")
31
+ if api_key:
32
+ client = AsyncOpenAI(api_key=api_key)
33
+ logger.info("OpenAI client initialized")
34
+ else:
35
+ if OPENROUTER_API_KEY:
36
+ logger.info("OPENAI_API_KEY not set, using OpenRouter only")
37
+ else:
38
+ logger.warning("No OPENAI_API_KEY or OPENROUTER_API_KEY set, LLM features will be disabled")
39
+
40
+
41
+ async def ask_gpt(prompt: str, model: str = "gpt-4o-mini", max_tokens: int = 2000) -> Optional[str]:
42
+ """
43
+ Query OpenAI GPT model with a prompt.
44
+
45
+ Args:
46
+ prompt: The prompt/question to ask
47
+ model: Model to use (default: gpt-4o-mini)
48
+ max_tokens: Maximum tokens in response
49
+
50
+ Returns:
51
+ Response text or None if error
52
+ """
53
+ global client
54
+
55
+ try:
56
+ if client:
57
+ response = await client.chat.completions.create(
58
+ model=model,
59
+ messages=[
60
+ {"role": "system", "content": "You are a helpful assistant that solves quiz questions accurately and concisely."},
61
+ {"role": "user", "content": prompt}
62
+ ],
63
+ max_tokens=max_tokens,
64
+ temperature=0.3
65
+ )
66
+
67
+ answer = response.choices[0].message.content
68
+ logger.info(f"GPT response received (model: {model})")
69
+ return answer
70
+ else:
71
+ logger.warning("OpenAI client not initialized, attempting OpenRouter fallback")
72
+ return await ask_openrouter(prompt, max_tokens=max_tokens)
73
+
74
+ except Exception as e:
75
+ logger.error(f"Error calling OpenAI API: {e}")
76
+ # Fallback to OpenRouter if configured
77
+ fallback = await ask_openrouter(prompt, max_tokens=max_tokens)
78
+ if fallback:
79
+ return fallback
80
+ return None
81
+
82
+
83
+ async def ask_openrouter(prompt: str, model: Optional[str] = None, max_tokens: int = 2000) -> Optional[str]:
84
+ """
85
+ Query OpenRouter (e.g., GPT-5-nano) with a prompt.
86
+
87
+ Args:
88
+ prompt: Prompt text
89
+ model: Model to use (defaults to OPENROUTER_MODEL)
90
+ max_tokens: Maximum tokens
91
+
92
+ Returns:
93
+ Response text or None
94
+ """
95
+ if not OPENROUTER_API_KEY:
96
+ logger.warning("OPENROUTER_API_KEY not set, cannot call OpenRouter")
97
+ return None
98
+
99
+ if not model:
100
+ model = OPENROUTER_MODEL
101
+
102
+ url = f"{OPENROUTER_BASE_URL.rstrip('/')}/chat/completions"
103
+ headers = {
104
+ "Authorization": f"Bearer {OPENROUTER_API_KEY}",
105
+ "HTTP-Referer": OPENROUTER_SITE_URL,
106
+ "X-Title": OPENROUTER_APP_NAME,
107
+ "Content-Type": "application/json",
108
+ }
109
+ payload = {
110
+ "model": model,
111
+ "messages": [
112
+ {"role": "system", "content": "You are a helpful assistant that solves quiz questions accurately and concisely."},
113
+ {"role": "user", "content": prompt}
114
+ ],
115
+ "max_tokens": max_tokens,
116
+ "temperature": 0.2
117
+ }
118
+
119
+ try:
120
+ async with httpx.AsyncClient(timeout=60) as http_client:
121
+ response = await http_client.post(url, headers=headers, json=payload)
122
+ response.raise_for_status()
123
+ data = response.json()
124
+ answer = data["choices"][0]["message"]["content"]
125
+ logger.info(f"OpenRouter response received (model: {model})")
126
+ return answer
127
+ except Exception as e:
128
+ logger.error(f"Error calling OpenRouter API: {e}")
129
+ return None
130
+
131
+
132
+ async def parse_question_with_llm(question_text: str, context: str = "") -> Optional[Dict[str, Any]]:
133
+ """
134
+ Use LLM to parse and understand a quiz question.
135
+
136
+ Args:
137
+ question_text: The question text
138
+ context: Additional context from the page
139
+
140
+ Returns:
141
+ Parsed question structure or None
142
+ """
143
+ prompt = f"""Analyze this quiz question and provide a structured response:
144
+
145
+ Question: {question_text}
146
+
147
+ Context: {context}
148
+
149
+ Please identify:
150
+ 1. What type of question is this? (scraping, calculation, API call, data analysis, etc.)
151
+ 2. What data or resources are needed?
152
+ 3. What is the expected answer format? (JSON, number, text, etc.)
153
+
154
+ Respond in JSON format:
155
+ {{
156
+ "type": "question_type",
157
+ "requirements": ["requirement1", "requirement2"],
158
+ "answer_format": "format_type",
159
+ "reasoning": "your reasoning"
160
+ }}
161
+ """
162
+
163
+ response = await ask_gpt(prompt)
164
+ if not response:
165
+ return None
166
+
167
+ # Try to extract JSON from response
168
+ import json
169
+ import re
170
+
171
+ json_match = re.search(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', response, re.DOTALL)
172
+ if json_match:
173
+ try:
174
+ return json.loads(json_match.group())
175
+ except json.JSONDecodeError:
176
+ pass
177
+
178
+ return {"raw_response": response}
179
+
180
+
181
+ async def solve_with_llm(question: str, available_data: Dict[str, Any]) -> Optional[str]:
182
+ """
183
+ Use LLM to solve a quiz question.
184
+
185
+ Args:
186
+ question: The question text
187
+ available_data: Any data extracted from the page
188
+
189
+ Returns:
190
+ Answer or None
191
+ """
192
+ prompt = f"""Solve this quiz question:
193
+
194
+ Question: {question}
195
+
196
+ Available Data:
197
+ {available_data}
198
+
199
+ Provide a clear, concise answer. If the answer should be in JSON format, provide valid JSON.
200
+ If it's a calculation, show your work briefly.
201
+ """
202
+
203
+ return await ask_gpt(prompt, max_tokens=3000)
204
+
205
+
206
+ async def ocr_image_with_llm(image_base64: str) -> Optional[str]:
207
+ """
208
+ Use GPT-4 Vision to extract text from an image.
209
+
210
+ Note: Requires GPT-4 Vision model (gpt-4o or gpt-4-vision-preview).
211
+ gpt-4o-mini does not support vision.
212
+
213
+ Args:
214
+ image_base64: Base64 encoded image
215
+
216
+ Returns:
217
+ Extracted text or None
218
+ """
219
+ global client
220
+
221
+ if not client:
222
+ return None
223
+
224
+ # Try vision-capable models
225
+ vision_models = ["gpt-4o", "gpt-4-vision-preview"]
226
+
227
+ for model in vision_models:
228
+ try:
229
+ response = await client.chat.completions.create(
230
+ model=model,
231
+ messages=[
232
+ {
233
+ "role": "user",
234
+ "content": [
235
+ {"type": "text", "text": "Extract all text from this image. Return only the text content."},
236
+ {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}}
237
+ ]
238
+ }
239
+ ],
240
+ max_tokens=1000
241
+ )
242
+
243
+ return response.choices[0].message.content
244
+
245
+ except Exception as e:
246
+ logger.warning(f"Error with model {model}: {e}")
247
+ continue
248
+
249
+ logger.error("No vision-capable model available")
250
+ return None
251
+
app/main.py ADDED
@@ -0,0 +1,250 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ FastAPI main server for IITM LLM Quiz Solver.
3
+ """
4
+ import os
5
+ import logging
6
+ import asyncio
7
+ from typing import Dict, Any, Optional
8
+ from fastapi import FastAPI, HTTPException, Request
9
+ from fastapi.responses import JSONResponse
10
+ from pydantic import BaseModel, Field, field_validator
11
+ import uvicorn
12
+
13
+ # Try to load .env file if python-dotenv is available
14
+ try:
15
+ from dotenv import load_dotenv
16
+ load_dotenv()
17
+ except ImportError:
18
+ pass # python-dotenv is optional
19
+
20
+ from app.solver import solve_quiz
21
+ from app.utils import validate_secret
22
+ from app.browser import cleanup_browser
23
+
24
+ # Configure logging
25
+ logging.basicConfig(
26
+ level=logging.INFO,
27
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
28
+ )
29
+ logger = logging.getLogger(__name__)
30
+
31
+ # Get secret from environment
32
+ EXPECTED_SECRET = os.getenv("QUIZ_SECRET", "default_secret_change_me")
33
+
34
+ # Lifespan context manager for startup and shutdown
35
+ from contextlib import asynccontextmanager
36
+
37
+ @asynccontextmanager
38
+ async def lifespan(app: FastAPI):
39
+ """Lifespan context manager for startup and shutdown."""
40
+ # Startup
41
+ logger.info("Application starting up...")
42
+ yield
43
+ # Shutdown
44
+ logger.info("Shutting down, cleaning up browser...")
45
+ await cleanup_browser()
46
+
47
+ # Initialize FastAPI app with lifespan
48
+ app = FastAPI(
49
+ title="IITM LLM Quiz Solver",
50
+ description="API endpoint to automatically solve dynamic quiz tasks",
51
+ version="1.0.0",
52
+ lifespan=lifespan
53
+ )
54
+
55
+
56
+ class QuizRequest(BaseModel):
57
+ """Request model for quiz solving."""
58
+ email: str = Field(..., description="User email address")
59
+ secret: str = Field(..., description="Secret key for authentication")
60
+ url: str = Field(..., description="Quiz page URL")
61
+
62
+ @field_validator('email')
63
+ @classmethod
64
+ def validate_email(cls, v):
65
+ if not v or '@' not in v:
66
+ raise ValueError('Invalid email format')
67
+ return v
68
+
69
+ @field_validator('url')
70
+ @classmethod
71
+ def validate_url(cls, v):
72
+ if not v or not v.startswith(('http://', 'https://')):
73
+ raise ValueError('Invalid URL format')
74
+ return v
75
+
76
+
77
+ @app.get("/")
78
+ async def root():
79
+ """Root endpoint."""
80
+ return {
81
+ "message": "IITM LLM Quiz Solver API",
82
+ "version": "1.0.0",
83
+ "endpoints": {
84
+ "/solve": "POST - Solve a quiz",
85
+ "/health": "GET - Health check",
86
+ "/demo": "POST - Demo endpoint"
87
+ }
88
+ }
89
+
90
+
91
+ @app.get("/health")
92
+ async def health_check():
93
+ """Health check endpoint."""
94
+ return {"status": "healthy"}
95
+
96
+
97
+ @app.get("/env-check")
98
+ async def env_check():
99
+ """
100
+ Check environment variables status (returns JSON).
101
+ Useful for verifying configuration.
102
+ """
103
+ quiz_secret = os.getenv("QUIZ_SECRET")
104
+ openai_key = os.getenv("OPENAI_API_KEY")
105
+ openrouter_key = os.getenv("OPENROUTER_API_KEY")
106
+ port = os.getenv("PORT", "8000")
107
+
108
+ return {
109
+ "status": "ok",
110
+ "variables": {
111
+ "QUIZ_SECRET": {
112
+ "set": quiz_secret is not None,
113
+ "length": len(quiz_secret) if quiz_secret else 0,
114
+ "preview": f"{quiz_secret[:4]}...{quiz_secret[-4:]}" if quiz_secret and len(quiz_secret) > 8 else "***" if quiz_secret else None
115
+ },
116
+ "OPENAI_API_KEY": {
117
+ "set": openai_key is not None,
118
+ "length": len(openai_key) if openai_key else 0,
119
+ "preview": f"{openai_key[:7]}...{openai_key[-4:]}" if openai_key and len(openai_key) > 11 else "***" if openai_key else None,
120
+ "valid_format": openai_key.startswith("sk-") if openai_key else False
121
+ },
122
+ "OPENROUTER_API_KEY": {
123
+ "set": openrouter_key is not None,
124
+ "length": len(openrouter_key) if openrouter_key else 0,
125
+ "preview": f"{openrouter_key[:7]}...{openrouter_key[-4:]}" if openrouter_key and len(openrouter_key) > 11 else "***" if openrouter_key else None,
126
+ "valid_format": openrouter_key.startswith("sk-or-") if openrouter_key else False
127
+ },
128
+ "PORT": {
129
+ "set": True,
130
+ "value": port
131
+ }
132
+ },
133
+ "ready": quiz_secret is not None,
134
+ "llm_enabled": any([openai_key, openrouter_key])
135
+ }
136
+
137
+
138
+ @app.post("/solve")
139
+ async def solve_quiz_endpoint(request: QuizRequest):
140
+ """
141
+ Main endpoint to solve a quiz.
142
+
143
+ Validates secret and solves the quiz recursively.
144
+ """
145
+ try:
146
+ # Validate secret
147
+ if not validate_secret(request.secret, EXPECTED_SECRET):
148
+ logger.warning(f"Invalid secret provided for email: {request.email}")
149
+ raise HTTPException(
150
+ status_code=403,
151
+ detail={"error": "forbidden"}
152
+ )
153
+
154
+ logger.info(f"Solving quiz for {request.email} at {request.url}")
155
+
156
+ # Solve quiz with timeout
157
+ try:
158
+ result = await asyncio.wait_for(
159
+ solve_quiz(request.url, request.email, request.secret),
160
+ timeout=180.0 # 3 minutes
161
+ )
162
+ return result
163
+ except asyncio.TimeoutError:
164
+ logger.error("Quiz solving timed out")
165
+ raise HTTPException(
166
+ status_code=504,
167
+ detail={"error": "Request timeout - quiz solving took too long"}
168
+ )
169
+ except Exception as e:
170
+ logger.error(f"Error solving quiz: {e}", exc_info=True)
171
+ raise HTTPException(
172
+ status_code=500,
173
+ detail={"error": str(e)}
174
+ )
175
+
176
+ except HTTPException:
177
+ raise
178
+ except ValueError as e:
179
+ logger.error(f"Validation error: {e}")
180
+ raise HTTPException(
181
+ status_code=400,
182
+ detail={"error": "Invalid request format", "message": str(e)}
183
+ )
184
+ except Exception as e:
185
+ logger.error(f"Unexpected error: {e}", exc_info=True)
186
+ raise HTTPException(
187
+ status_code=500,
188
+ detail={"error": "Internal server error", "message": str(e)}
189
+ )
190
+
191
+
192
+ @app.post("/demo")
193
+ async def demo_endpoint(request: QuizRequest):
194
+ """
195
+ Demo endpoint for testing.
196
+
197
+ Same as /solve but with more lenient error handling.
198
+ """
199
+ try:
200
+ # Validate secret (can be more lenient for demo)
201
+ if not validate_secret(request.secret, EXPECTED_SECRET):
202
+ logger.warning(f"Invalid secret in demo request")
203
+ return JSONResponse(
204
+ status_code=403,
205
+ content={"error": "forbidden"}
206
+ )
207
+
208
+ logger.info(f"Demo: Solving quiz for {request.email} at {request.url}")
209
+
210
+ # Solve quiz
211
+ try:
212
+ result = await asyncio.wait_for(
213
+ solve_quiz(request.url, request.email, request.secret),
214
+ timeout=180.0
215
+ )
216
+ return result
217
+ except asyncio.TimeoutError:
218
+ return JSONResponse(
219
+ status_code=504,
220
+ content={"error": "Request timeout"}
221
+ )
222
+ except Exception as e:
223
+ logger.error(f"Error in demo: {e}", exc_info=True)
224
+ return JSONResponse(
225
+ status_code=500,
226
+ content={"error": str(e)}
227
+ )
228
+
229
+ except ValueError as e:
230
+ return JSONResponse(
231
+ status_code=400,
232
+ content={"error": "Invalid request format", "message": str(e)}
233
+ )
234
+ except Exception as e:
235
+ logger.error(f"Unexpected error in demo: {e}", exc_info=True)
236
+ return JSONResponse(
237
+ status_code=500,
238
+ content={"error": "Internal server error", "message": str(e)}
239
+ )
240
+
241
+
242
+ if __name__ == "__main__":
243
+ port = int(os.getenv("PORT", 8000))
244
+ uvicorn.run(
245
+ "app.main:app",
246
+ host="0.0.0.0",
247
+ port=port,
248
+ log_level="info"
249
+ )
250
+
app/solver.py ADDED
@@ -0,0 +1,593 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Quiz solver module - main logic for solving quizzes.
3
+ """
4
+ import asyncio
5
+ import json
6
+ import logging
7
+ import re
8
+ from typing import Optional, Dict, Any, List
9
+ import requests
10
+ from bs4 import BeautifulSoup
11
+ import pandas as pd
12
+ import io
13
+ import base64
14
+
15
+ from app.browser import get_browser, cleanup_browser
16
+ from app.llm import ask_gpt, parse_question_with_llm, solve_with_llm, initialize_llm
17
+ from app.utils import extract_submit_url, clean_text, extract_json_from_text, is_valid_url
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ # Initialize LLM on module load
22
+ initialize_llm()
23
+
24
+
25
+ class QuizSolver:
26
+ """Main quiz solver class."""
27
+
28
+ def __init__(self):
29
+ self.browser = None
30
+ self.max_recursion = 10
31
+ self.current_recursion = 0
32
+
33
+ async def solve_quiz(self, url: str, email: str, secret: str) -> Dict[str, Any]:
34
+ """
35
+ Main entry point for solving a quiz.
36
+
37
+ Args:
38
+ url: Quiz page URL
39
+ email: User email
40
+ secret: Secret key
41
+
42
+ Returns:
43
+ Final response from quiz system
44
+ """
45
+ self.current_recursion = 0
46
+ self.browser = await get_browser()
47
+
48
+ try:
49
+ return await self._solve_recursive(url, email, secret)
50
+ finally:
51
+ # Don't close browser here as it might be reused
52
+ pass
53
+
54
+ async def _solve_recursive(self, url: str, email: str, secret: str) -> Dict[str, Any]:
55
+ """
56
+ Recursively solve quizzes.
57
+
58
+ Args:
59
+ url: Current quiz URL
60
+ email: User email
61
+ secret: Secret key
62
+
63
+ Returns:
64
+ Response from quiz system
65
+ """
66
+ if self.current_recursion >= self.max_recursion:
67
+ logger.error("Maximum recursion depth reached")
68
+ return {"error": "Maximum recursion depth reached"}
69
+
70
+ self.current_recursion += 1
71
+ logger.info(f"Solving quiz {self.current_recursion}: {url}")
72
+
73
+ try:
74
+ # Load the quiz page
75
+ page_content = await self.browser.load_page(url, wait_time=3)
76
+
77
+ # Extract submit URL
78
+ submit_url = extract_submit_url(page_content['text'], url)
79
+ if not submit_url:
80
+ # Try from HTML
81
+ soup = BeautifulSoup(page_content['html'], 'html.parser')
82
+ submit_url = extract_submit_url(soup.get_text(), url)
83
+
84
+ if not submit_url:
85
+ logger.error("Could not find submit URL")
86
+ return {"error": "Submit URL not found"}
87
+
88
+ # Extract question and solve
89
+ question_text = self._extract_question(page_content)
90
+ logger.info(f"Question extracted: {question_text[:200]}...")
91
+
92
+ # Solve the question
93
+ answer = await self._solve_question(question_text, page_content)
94
+
95
+ # Ensure answer is in the correct format (string or simple JSON-serializable)
96
+ answer = self._normalize_answer(answer)
97
+ logger.info(f"Answer computed: {str(answer)[:200]}...")
98
+
99
+ # Submit answer
100
+ response = await self._submit_answer(
101
+ submit_url, email, secret, url, answer
102
+ )
103
+
104
+ # Check if there's a next quiz
105
+ if isinstance(response, dict) and 'url' in response:
106
+ next_url = response['url']
107
+ if next_url and next_url != url and is_valid_url(next_url):
108
+ logger.info(f"Next quiz found: {next_url}")
109
+ # Recursively solve next quiz
110
+ next_response = await self._solve_recursive(next_url, email, secret)
111
+ return next_response
112
+
113
+ return response
114
+
115
+ except Exception as e:
116
+ logger.error(f"Error solving quiz: {e}", exc_info=True)
117
+ return {"error": str(e)}
118
+
119
+ def _extract_question(self, page_content: Dict[str, Any]) -> str:
120
+ """
121
+ Extract question text from page content.
122
+
123
+ Args:
124
+ page_content: Page content dictionary
125
+
126
+ Returns:
127
+ Question text
128
+ """
129
+ text = page_content.get('all_text', page_content.get('text', ''))
130
+
131
+ # Try to find question markers
132
+ question_patterns = [
133
+ r'[Qq]uestion[:\s]+(.*?)(?:\n\n|\n[A-Z]|$)',
134
+ r'[Pp]roblem[:\s]+(.*?)(?:\n\n|\n[A-Z]|$)',
135
+ r'[Tt]ask[:\s]+(.*?)(?:\n\n|\n[A-Z]|$)',
136
+ ]
137
+
138
+ for pattern in question_patterns:
139
+ match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
140
+ if match:
141
+ return clean_text(match.group(1))
142
+
143
+ # If no pattern matches, return first substantial paragraph
144
+ paragraphs = [p.strip() for p in text.split('\n\n') if len(p.strip()) > 50]
145
+ if paragraphs:
146
+ return paragraphs[0]
147
+
148
+ return clean_text(text[:1000]) # Return first 1000 chars
149
+
150
+ async def _solve_question(self, question: str, page_content: Dict[str, Any]) -> Any:
151
+ """
152
+ Solve a quiz question using various strategies.
153
+
154
+ Args:
155
+ question: Question text
156
+ page_content: Full page content
157
+
158
+ Returns:
159
+ Answer (can be dict, list, string, number, etc.)
160
+ """
161
+ logger.info("Analyzing question type...")
162
+
163
+ # Try to parse question with LLM first
164
+ parsed = await parse_question_with_llm(question, page_content.get('text', ''))
165
+
166
+ # Extract data from page
167
+ available_data = self._extract_data_from_page(page_content)
168
+
169
+ # Strategy 1: Check if answer is already in the page
170
+ answer_in_page = self._find_answer_in_page(page_content, question)
171
+ if answer_in_page:
172
+ logger.info("Answer found in page content")
173
+ return answer_in_page
174
+
175
+ # Strategy 2: Check for data files/links to download
176
+ data_files = self._find_data_files(page_content)
177
+ if data_files:
178
+ logger.info(f"Found data files: {data_files}")
179
+ processed_data = await self._process_data_files(data_files)
180
+ if processed_data:
181
+ answer = await self._solve_with_data(question, processed_data)
182
+ if answer:
183
+ return answer
184
+
185
+ # Strategy 3: Use LLM to solve
186
+ logger.info("Attempting to solve with LLM...")
187
+ llm_answer = await solve_with_llm(question, available_data)
188
+ if llm_answer:
189
+ # Try to parse as JSON if it looks like JSON
190
+ json_answer = extract_json_from_text(llm_answer)
191
+ if json_answer:
192
+ return json_answer
193
+ return llm_answer
194
+
195
+ # Strategy 4: Fallback - try to extract a simple answer from the question
196
+ # Many quiz pages have the answer in the question itself
197
+ simple_answer = self._extract_simple_answer(question, page_content)
198
+ if simple_answer:
199
+ logger.info("Extracted simple answer from question")
200
+ return simple_answer
201
+
202
+ # Strategy 5: Last resort - return a default answer
203
+ logger.warning("Could not solve question, using default answer")
204
+ return "answer"
205
+
206
+ def _extract_data_from_page(self, page_content: Dict[str, Any]) -> Dict[str, Any]:
207
+ """
208
+ Extract structured data from page.
209
+
210
+ Args:
211
+ page_content: Page content dictionary
212
+
213
+ Returns:
214
+ Dictionary of extracted data
215
+ """
216
+ data = {
217
+ 'text': page_content.get('text', ''),
218
+ 'html': page_content.get('html', ''),
219
+ 'links': page_content.get('links', []),
220
+ 'images': page_content.get('images', []),
221
+ }
222
+
223
+ # Try to extract tables
224
+ try:
225
+ soup = BeautifulSoup(page_content.get('html', ''), 'html.parser')
226
+ tables = soup.find_all('table')
227
+ if tables:
228
+ data['tables'] = []
229
+ for table in tables:
230
+ try:
231
+ df = pd.read_html(str(table))[0]
232
+ data['tables'].append(df.to_dict('records'))
233
+ except:
234
+ pass
235
+ except Exception as e:
236
+ logger.warning(f"Error extracting tables: {e}")
237
+
238
+ # Try to extract JSON from page
239
+ json_data = extract_json_from_text(page_content.get('text', ''))
240
+ if json_data:
241
+ data['json'] = json_data
242
+
243
+ return data
244
+
245
+ def _find_answer_in_page(self, page_content: Dict[str, Any], question: str) -> Optional[Any]:
246
+ """
247
+ Check if answer is already present in page content.
248
+
249
+ Args:
250
+ page_content: Page content
251
+ question: Question text
252
+
253
+ Returns:
254
+ Answer if found, None otherwise
255
+ """
256
+ text = page_content.get('all_text', page_content.get('text', ''))
257
+
258
+ # Look for answer patterns
259
+ answer_patterns = [
260
+ r'[Aa]nswer[:\s]+(.*?)(?:\n\n|$)',
261
+ r'[Ss]olution[:\s]+(.*?)(?:\n\n|$)',
262
+ r'[Rr]esult[:\s]+(.*?)(?:\n\n|$)',
263
+ ]
264
+
265
+ for pattern in answer_patterns:
266
+ match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
267
+ if match:
268
+ answer_text = clean_text(match.group(1))
269
+ # Try to parse as JSON
270
+ json_answer = extract_json_from_text(answer_text)
271
+ if json_answer:
272
+ return json_answer
273
+ return answer_text
274
+
275
+ return None
276
+
277
+ def _find_data_files(self, page_content: Dict[str, Any]) -> List[str]:
278
+ """
279
+ Find data files (CSV, JSON, PDF, etc.) linked in the page.
280
+
281
+ Args:
282
+ page_content: Page content
283
+
284
+ Returns:
285
+ List of file URLs
286
+ """
287
+ files = []
288
+
289
+ # Check links
290
+ for link in page_content.get('links', []):
291
+ href = link.get('href', '')
292
+ if any(href.lower().endswith(ext) for ext in ['.csv', '.json', '.pdf', '.xlsx', '.txt']):
293
+ files.append(href)
294
+
295
+ # Check text for file URLs
296
+ text = page_content.get('text', '')
297
+ file_pattern = r'https?://[^\s<>"\'\)]+\.(csv|json|pdf|xlsx|txt)'
298
+ matches = re.findall(file_pattern, text, re.IGNORECASE)
299
+ files.extend([m[0] for m in matches if m[0] not in files])
300
+
301
+ return files
302
+
303
+ async def _process_data_files(self, file_urls: List[str]) -> Dict[str, Any]:
304
+ """
305
+ Download and process data files.
306
+
307
+ Args:
308
+ file_urls: List of file URLs
309
+
310
+ Returns:
311
+ Dictionary of processed data
312
+ """
313
+ processed = {}
314
+
315
+ for url in file_urls:
316
+ try:
317
+ logger.info(f"Downloading file: {url}")
318
+ response = requests.get(url, timeout=30)
319
+ response.raise_for_status()
320
+
321
+ content_type = response.headers.get('content-type', '').lower()
322
+ filename = url.split('/')[-1]
323
+
324
+ if 'csv' in content_type or filename.endswith('.csv'):
325
+ df = pd.read_csv(io.StringIO(response.text))
326
+ processed[filename] = df.to_dict('records')
327
+
328
+ elif 'json' in content_type or filename.endswith('.json'):
329
+ processed[filename] = response.json()
330
+
331
+ elif 'pdf' in content_type or filename.endswith('.pdf'):
332
+ # PDF processing - try pdfplumber first, then PyPDF2
333
+ text = None
334
+
335
+ # Try pdfplumber
336
+ try:
337
+ import pdfplumber
338
+ with pdfplumber.open(io.BytesIO(response.content)) as pdf:
339
+ text = ""
340
+ for page in pdf.pages:
341
+ page_text = page.extract_text()
342
+ if page_text:
343
+ text += page_text + "\n"
344
+ if text:
345
+ processed[filename] = text.strip()
346
+ except ImportError:
347
+ logger.debug("pdfplumber not available")
348
+ except Exception as e:
349
+ logger.warning(f"Error reading PDF with pdfplumber {filename}: {e}")
350
+
351
+ # Fallback to PyPDF2
352
+ if not text or filename not in processed:
353
+ try:
354
+ import PyPDF2
355
+ pdf_file = io.BytesIO(response.content)
356
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
357
+ text = ""
358
+ for page in pdf_reader.pages:
359
+ page_text = page.extract_text()
360
+ if page_text:
361
+ text += page_text + "\n"
362
+ if text:
363
+ processed[filename] = text.strip()
364
+ except ImportError:
365
+ logger.warning("Neither pdfplumber nor PyPDF2 available for PDF processing")
366
+ except Exception as e:
367
+ logger.warning(f"Error reading PDF with PyPDF2 {filename}: {e}")
368
+
369
+ elif filename.endswith('.txt'):
370
+ processed[filename] = response.text
371
+
372
+ except Exception as e:
373
+ logger.error(f"Error processing file {url}: {e}")
374
+ continue
375
+
376
+ return processed
377
+
378
+ def _normalize_answer(self, answer: Any) -> Any:
379
+ """
380
+ Normalize answer to ensure it's JSON-serializable and in correct format.
381
+
382
+ Args:
383
+ answer: Raw answer (can be dict, list, string, etc.)
384
+
385
+ Returns:
386
+ Normalized answer (preferably string or simple JSON)
387
+ """
388
+ if answer is None:
389
+ return "answer"
390
+
391
+ # If it's a dict with question/analysis, extract a simple answer
392
+ if isinstance(answer, dict):
393
+ # If it contains an 'answer' key, use that
394
+ if 'answer' in answer:
395
+ return self._normalize_answer(answer['answer'])
396
+ # If it's an analysis dict, try to extract something useful
397
+ if 'question' in answer and len(answer) > 1:
398
+ # Return a simple string instead of the whole dict
399
+ return "answer"
400
+ # If it's a simple dict, convert to JSON string
401
+ if len(answer) <= 3:
402
+ try:
403
+ return json.dumps(answer)
404
+ except:
405
+ return str(answer)
406
+ # Complex dict - return as JSON string
407
+ try:
408
+ return json.dumps(answer)
409
+ except:
410
+ return str(answer)
411
+
412
+ # If it's a list, convert to JSON string if small, otherwise string
413
+ if isinstance(answer, list):
414
+ if len(answer) <= 10:
415
+ try:
416
+ return json.dumps(answer)
417
+ except:
418
+ return str(answer)
419
+ return str(answer)
420
+
421
+ # For strings, return as-is (but clean up)
422
+ if isinstance(answer, str):
423
+ # Remove excessive whitespace
424
+ answer = ' '.join(answer.split())
425
+ # If it's very long, truncate
426
+ if len(answer) > 1000:
427
+ answer = answer[:1000] + "..."
428
+ return answer
429
+
430
+ # For other types, convert to string
431
+ return str(answer)
432
+
433
+ def _extract_simple_answer(self, question: str, page_content: Dict[str, Any]) -> Optional[str]:
434
+ """
435
+ Try to extract a simple answer from the question or page.
436
+
437
+ Args:
438
+ question: Question text
439
+ page_content: Page content
440
+
441
+ Returns:
442
+ Simple answer string or None
443
+ """
444
+ text = page_content.get('all_text', page_content.get('text', ''))
445
+ combined = question + "\n\n" + text
446
+
447
+ # Check if question says "anything" or similar - very common in demo quizzes
448
+ if re.search(r'"answer"\s*:\s*"anything\s+you\s+want"', combined, re.IGNORECASE):
449
+ return "answer"
450
+ if re.search(r'"answer"\s*:\s*"anything"', combined, re.IGNORECASE):
451
+ return "answer"
452
+ if re.search(r'anything\s+you\s+want|any\s+value|any\s+string|any\s+text|anything', question, re.IGNORECASE):
453
+ return "answer"
454
+
455
+ # Look for patterns like "answer: X" or "the answer is X"
456
+ patterns = [
457
+ r'"answer"\s*:\s*"([^"]+)"', # JSON format: "answer": "value"
458
+ r'[Aa]nswer[:\s]+["\']?([^"\'\n]+)["\']?',
459
+ r'[Tt]he\s+[Aa]nswer\s+[Ii]s[:\s]+["\']?([^"\'\n]+)["\']?',
460
+ r'[Yy]our\s+[Aa]nswer[:\s]+["\']?([^"\'\n]+)["\']?',
461
+ ]
462
+
463
+ for pattern in patterns:
464
+ match = re.search(pattern, combined, re.IGNORECASE)
465
+ if match:
466
+ answer = match.group(1).strip()
467
+ # Skip if it's a placeholder or instruction
468
+ if answer and len(answer) < 200 and answer.lower() not in ['your email', 'your secret', 'anything you want', 'anything']:
469
+ return answer
470
+
471
+ return None
472
+
473
+ async def _solve_with_data(self, question: str, data: Dict[str, Any]) -> Optional[Any]:
474
+ """
475
+ Solve question using processed data.
476
+
477
+ Args:
478
+ question: Question text
479
+ data: Processed data dictionary
480
+
481
+ Returns:
482
+ Answer or None
483
+ """
484
+ # Use LLM to solve with data
485
+ prompt = f"""Solve this question using the provided data:
486
+
487
+ Question: {question}
488
+
489
+ Data:
490
+ {json.dumps(data, indent=2, default=str)}
491
+
492
+ Provide the answer. If JSON format is required, return valid JSON.
493
+ """
494
+
495
+ answer = await ask_gpt(prompt, max_tokens=3000)
496
+ if answer:
497
+ json_answer = extract_json_from_text(answer)
498
+ if json_answer:
499
+ return json_answer
500
+ return answer
501
+
502
+ return None
503
+
504
+ async def _submit_answer(self, submit_url: str, email: str, secret: str,
505
+ quiz_url: str, answer: Any) -> Dict[str, Any]:
506
+ """
507
+ Submit answer to the quiz system.
508
+
509
+ Args:
510
+ submit_url: URL to submit answer to
511
+ email: User email
512
+ secret: Secret key
513
+ quiz_url: Original quiz URL
514
+ answer: Computed answer
515
+
516
+ Returns:
517
+ Response from submission endpoint
518
+ """
519
+ # Ensure answer is JSON-serializable
520
+ try:
521
+ # Try to serialize answer to check if it's valid JSON
522
+ json.dumps(answer)
523
+ except (TypeError, ValueError) as e:
524
+ logger.warning(f"Answer is not JSON-serializable, converting to string: {e}")
525
+ # Convert complex objects to string representation
526
+ if isinstance(answer, (dict, list)):
527
+ answer = json.dumps(answer)
528
+ else:
529
+ answer = str(answer)
530
+
531
+ payload = {
532
+ "email": email,
533
+ "secret": secret,
534
+ "url": quiz_url,
535
+ "answer": answer
536
+ }
537
+
538
+ try:
539
+ logger.info(f"Submitting answer to: {submit_url}")
540
+ logger.debug(f"Payload: {json.dumps(payload, indent=2, default=str)}")
541
+
542
+ response = requests.post(
543
+ submit_url,
544
+ json=payload,
545
+ headers={'Content-Type': 'application/json'},
546
+ timeout=60
547
+ )
548
+
549
+ # Log response details
550
+ logger.info(f"Response status: {response.status_code}")
551
+ logger.debug(f"Response headers: {dict(response.headers)}")
552
+
553
+ response.raise_for_status()
554
+
555
+ try:
556
+ result = response.json()
557
+ logger.info(f"Submission successful: {result}")
558
+ return result
559
+ except json.JSONDecodeError:
560
+ logger.warning(f"Response is not JSON, returning text: {response.text[:500]}")
561
+ return {"response": response.text, "status_code": response.status_code}
562
+
563
+ except requests.exceptions.HTTPError as e:
564
+ logger.error(f"HTTP error submitting answer: {e}")
565
+ if hasattr(e, 'response') and e.response is not None:
566
+ try:
567
+ error_response = e.response.json()
568
+ logger.error(f"Error response: {error_response}")
569
+ return error_response
570
+ except:
571
+ logger.error(f"Error response text: {e.response.text[:500]}")
572
+ return {"error": e.response.text, "status_code": e.response.status_code}
573
+ return {"error": str(e)}
574
+ except requests.exceptions.RequestException as e:
575
+ logger.error(f"Error submitting answer: {e}", exc_info=True)
576
+ return {"error": str(e)}
577
+
578
+
579
+ async def solve_quiz(url: str, email: str, secret: str) -> Dict[str, Any]:
580
+ """
581
+ Convenience function to solve a quiz.
582
+
583
+ Args:
584
+ url: Quiz page URL
585
+ email: User email
586
+ secret: Secret key
587
+
588
+ Returns:
589
+ Final response from quiz system
590
+ """
591
+ solver = QuizSolver()
592
+ return await solver.solve_quiz(url, email, secret)
593
+
app/utils.py ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Utility functions for the IITM LLM Quiz Solver.
3
+ """
4
+ import re
5
+ import json
6
+ import logging
7
+ from typing import Optional, Dict, Any
8
+ from urllib.parse import urlparse, urljoin
9
+
10
+ # Configure logging
11
+ logging.basicConfig(
12
+ level=logging.INFO,
13
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
14
+ )
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ def extract_submit_url(text: str, base_url: str) -> Optional[str]:
19
+ """
20
+ Extract submit URL from page text.
21
+
22
+ Looks for patterns like:
23
+ - "Submit your answer to: https://example.com/submit"
24
+ - "Submit to: https://example.com/submit"
25
+ - "URL: https://example.com/submit"
26
+
27
+ Args:
28
+ text: The page text content
29
+ base_url: Base URL for relative URL resolution
30
+
31
+ Returns:
32
+ Extracted submit URL or None
33
+ """
34
+ # Common patterns for submit URLs
35
+ patterns = [
36
+ r'[Ss]ubmit\s+(?:your\s+)?(?:answer\s+)?(?:to|at|via):\s*(https?://[^\s<>"\'\)]+)',
37
+ r'[Ss]ubmit\s+[Tt]o:\s*(https?://[^\s<>"\'\)]+)',
38
+ r'[Uu][Rr][Ll]:\s*(https?://[^\s<>"\'\)]+)',
39
+ r'[Pp]ost\s+(?:to|at):\s*(https?://[^\s<>"\'\)]+)',
40
+ r'[Ss]end\s+(?:to|at):\s*(https?://[^\s<>"\'\)]+)',
41
+ r'(https?://[^\s<>"\'\)]*submit[^\s<>"\'\)]*)',
42
+ r'(https?://[^\s<>"\'\)]*answer[^\s<>"\'\)]*)',
43
+ ]
44
+
45
+ for pattern in patterns:
46
+ matches = re.findall(pattern, text, re.IGNORECASE)
47
+ if matches:
48
+ url = matches[0].strip().rstrip('.,;:!?)}]{["\'')
49
+ # Validate URL
50
+ try:
51
+ parsed = urlparse(url)
52
+ if parsed.scheme and parsed.netloc:
53
+ logger.info(f"Found submit URL: {url}")
54
+ return url
55
+ except Exception as e:
56
+ logger.warning(f"Invalid URL pattern found: {url}, error: {e}")
57
+ continue
58
+
59
+ # Try to find any URL that might be a submit endpoint
60
+ url_pattern = r'https?://[^\s<>"\'\)]+'
61
+ all_urls = re.findall(url_pattern, text)
62
+ for url in all_urls:
63
+ url_lower = url.lower()
64
+ if 'submit' in url_lower or 'answer' in url_lower:
65
+ try:
66
+ parsed = urlparse(url)
67
+ if parsed.scheme and parsed.netloc:
68
+ logger.info(f"Found potential submit URL: {url}")
69
+ return url
70
+ except:
71
+ continue
72
+
73
+ # Try to find relative submit links (e.g. href="/submit")
74
+ rel_patterns = [
75
+ r'href=["\\\'](/[^"\\\']*submit[^"\\\']*)["\\\']',
76
+ r'(/[^\\s"<>\']*submit[^\\s"<>\']*)',
77
+ ]
78
+ for pattern in rel_patterns:
79
+ matches = re.findall(pattern, text, re.IGNORECASE)
80
+ if matches:
81
+ candidate = matches[0].strip().rstrip('.,;:!?)}]{["\'')
82
+ joined = urljoin(base_url, candidate)
83
+ logger.info(f"Found relative submit URL: {joined}")
84
+ return joined
85
+
86
+ logger.warning("No submit URL found in page text")
87
+ return None
88
+
89
+
90
+ def validate_secret(secret: str, expected_secret: str) -> bool:
91
+ """
92
+ Validate the secret key.
93
+
94
+ Args:
95
+ secret: Provided secret
96
+ expected_secret: Expected secret from environment
97
+
98
+ Returns:
99
+ True if valid, False otherwise
100
+ """
101
+ return secret == expected_secret
102
+
103
+
104
+ def clean_text(text: str) -> str:
105
+ """
106
+ Clean and normalize text content.
107
+
108
+ Args:
109
+ text: Raw text content
110
+
111
+ Returns:
112
+ Cleaned text
113
+ """
114
+ if not text:
115
+ return ""
116
+
117
+ # Remove excessive whitespace
118
+ text = re.sub(r'\s+', ' ', text)
119
+ # Remove leading/trailing whitespace
120
+ text = text.strip()
121
+
122
+ return text
123
+
124
+
125
+ def extract_json_from_text(text: str) -> Optional[Dict[str, Any]]:
126
+ """
127
+ Try to extract JSON objects from text.
128
+
129
+ Args:
130
+ text: Text that may contain JSON
131
+
132
+ Returns:
133
+ Parsed JSON dict or None
134
+ """
135
+ # Try to find JSON blocks
136
+ json_pattern = r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}'
137
+ matches = re.findall(json_pattern, text, re.DOTALL)
138
+
139
+ for match in matches:
140
+ try:
141
+ return json.loads(match)
142
+ except json.JSONDecodeError:
143
+ continue
144
+
145
+ return None
146
+
147
+
148
+ def is_valid_url(url: str) -> bool:
149
+ """
150
+ Validate if a string is a valid URL.
151
+
152
+ Args:
153
+ url: URL string to validate
154
+
155
+ Returns:
156
+ True if valid URL, False otherwise
157
+ """
158
+ try:
159
+ result = urlparse(url)
160
+ return all([result.scheme, result.netloc])
161
+ except Exception:
162
+ return False
163
+
164
+
165
+ def sanitize_filename(filename: str) -> str:
166
+ """
167
+ Sanitize a filename by removing invalid characters.
168
+
169
+ Args:
170
+ filename: Original filename
171
+
172
+ Returns:
173
+ Sanitized filename
174
+ """
175
+ # Remove invalid characters
176
+ filename = re.sub(r'[<>:"/\\|?*]', '_', filename)
177
+ # Remove leading/trailing dots and spaces
178
+ filename = filename.strip('. ')
179
+ return filename
180
+