Vjay15 commited on
Commit
ff23105
·
verified ·
1 Parent(s): ee0d98c

TRYING IF IT WORKS

Browse files
Files changed (7) hide show
  1. Dockerfile +41 -0
  2. LICENSE +21 -0
  3. agent.py +46 -0
  4. main.py +42 -0
  5. requirements.txt +20 -0
  6. solver.py +300 -0
  7. tools.py +448 -0
Dockerfile ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use Python 3.11 slim image (stable for Playwright/Agno)
2
+ FROM python:3.11-slim
3
+
4
+ # Set working directory
5
+ WORKDIR /app
6
+
7
+ # Install system dependencies required for building python packages and playwright
8
+ RUN apt-get update && apt-get install -y \
9
+ build-essential \
10
+ curl \
11
+ && rm -rf /var/lib/apt/lists/*
12
+
13
+ # Copy requirements file
14
+ COPY requirements.txt .
15
+
16
+ # Install Python dependencies
17
+ RUN pip install --no-cache-dir -r requirements.txt
18
+
19
+ # Install Playwright browsers and dependencies
20
+ # We set the path to a location accessible by the non-root user or install globally
21
+ ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
22
+ RUN mkdir /ms-playwright && \
23
+ playwright install --with-deps chromium && \
24
+ chmod -R 777 /ms-playwright
25
+
26
+ # Create a non-root user for security (HF Spaces requirement)
27
+ RUN useradd -m -u 1000 user
28
+
29
+ # Copy application code and set ownership
30
+ COPY --chown=user:user . .
31
+
32
+ # Switch to non-root user
33
+ USER user
34
+ ENV HOME=/home/user \
35
+ PATH=/home/user/.local/bin:$PATH
36
+
37
+ # Expose port 7860 (standard for HF Spaces)
38
+ EXPOSE 7860
39
+
40
+ # Run the application
41
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2025 22f3000730
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
agent.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from agno.agent import Agent
2
+ from agno.models.openai.chat import OpenAIChat
3
+ from agno.db.sqlite.sqlite import SqliteDb
4
+ import os
5
+ from dotenv import load_dotenv
6
+ from tools import fetch_page_text, fetch_page_scripts, run_python_code, transcribe_audio, understand_image, call_api, execute_python, read_pdf, read_zip, search_history
7
+ import logging
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+ load_dotenv()
12
+
13
+ def get_agent():
14
+ api_key = os.getenv("AI_TOKEN")
15
+ if not api_key:
16
+ logger.error("AI_TOKEN is missing from environment variables!")
17
+ raise ValueError("AI_TOKEN not found in environment variables")
18
+
19
+ logger.info(f"AI_TOKEN loaded. Length: {len(api_key)}")
20
+ logger.info(f"AI_TOKEN prefix: {api_key[:10]}...")
21
+
22
+ # Set env var just in case
23
+ os.environ["OPENROUTER_API_KEY"] = api_key
24
+
25
+ # Initialize the agent with OpenRouter model via custom endpoint
26
+ agent = Agent(
27
+ model=OpenAIChat(
28
+ base_url="https://aipipe.org/openrouter/v1",
29
+ api_key=api_key,
30
+ id="google/gemini-2.0-flash-lite-001"
31
+ ),
32
+ description="You are a helpful assistant that solves data-related quiz tasks.",
33
+ instructions=[
34
+ "You will be given a task description, often involving data analysis or web scraping.",
35
+ "You need to solve the task and provide the answer.",
36
+ "The answer should be in the format requested by the task.",
37
+ "If you need to download a file, write Python code to do it.",
38
+ "Be concise and accurate."
39
+ ],
40
+ tools=[fetch_page_text, fetch_page_scripts, run_python_code, transcribe_audio, understand_image, call_api, execute_python, read_pdf, read_zip, search_history],
41
+ markdown=True,
42
+ debug_mode=True,
43
+ db=SqliteDb(db_file="agent_memory.db"),
44
+ add_history_to_context=True
45
+ )
46
+ return agent
main.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException, BackgroundTasks, Request
2
+ from pydantic import BaseModel
3
+ import os
4
+ from dotenv import load_dotenv
5
+ from solver import solve_quiz
6
+ import logging
7
+
8
+ # Configure logging
9
+ logging.basicConfig(level=logging.INFO)
10
+ logger = logging.getLogger(__name__)
11
+
12
+ load_dotenv()
13
+
14
+ app = FastAPI()
15
+
16
+ class QuizRequest(BaseModel):
17
+ email: str
18
+ secret: str
19
+ url: str
20
+ class Config:
21
+ extra = "allow"
22
+
23
+ @app.post("/", status_code=200)
24
+ async def solve_quiz_endpoint(request: QuizRequest, background_tasks: BackgroundTasks):
25
+ logger.info(f"Received quiz request for URL: {request.url}")
26
+
27
+ # Verify secret
28
+ expected_secret = os.getenv("QUIZ_SECRET")
29
+ if not expected_secret:
30
+ logger.warning("QUIZ_SECRET not set in environment variables. Skipping secret validation.")
31
+ elif request.secret != expected_secret:
32
+ logger.error(f"Invalid secret provided: {request.secret} is not {expected_secret}")
33
+ raise HTTPException(status_code=403, detail="Invalid secret")
34
+
35
+ # Start solving in background
36
+ background_tasks.add_task(solve_quiz, request.url, request.email, request.secret)
37
+
38
+ return {"message": "Task received, solving started."}
39
+
40
+ @app.get("/health")
41
+ async def health_check():
42
+ return {"status": "ok"}
requirements.txt ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ requests
4
+ python-dotenv
5
+ agno
6
+ playwright
7
+ pandas
8
+ beautifulsoup4
9
+ lxml
10
+ openai
11
+ SpeechRecognition
12
+ pydub
13
+ scikit-learn
14
+ openai-whisper
15
+ matplotlib
16
+ seaborn
17
+ pypdf
18
+ duckdb
19
+ Pillow
20
+ sqlalchemy
solver.py ADDED
@@ -0,0 +1,300 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import json
3
+ import logging
4
+ import requests
5
+ from urllib.parse import urljoin
6
+ from playwright.async_api import async_playwright
7
+ from bs4 import BeautifulSoup
8
+ from agent import get_agent
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+ async def solve_quiz(initial_url: str, email: str, secret: str):
13
+ logger.info(f"Starting quiz solver workflow for {email}")
14
+
15
+ current_url = initial_url
16
+
17
+
18
+ async with async_playwright() as p:
19
+ browser = await p.chromium.launch(headless=True)
20
+ context = await browser.new_context()
21
+ page = await context.new_page()
22
+
23
+ try:
24
+ while current_url:
25
+ # Generate a NEW session ID for each task/URL to keep memory clean
26
+ import uuid
27
+ session_id = str(uuid.uuid4())
28
+ logger.info(f"Started new agent session for {current_url}: {session_id}")
29
+
30
+ logger.info(f"Navigating to {current_url}")
31
+ await page.goto(current_url)
32
+
33
+ # Wait for content
34
+ await page.wait_for_selector("body")
35
+
36
+ # Check for email input and fill it if present
37
+ # Many quizzes require entering the email to see the question
38
+ try:
39
+ email_input = await page.query_selector("input[type='email'], input[name='email'], input[placeholder*='email']")
40
+ if email_input:
41
+ logger.info(f"Found email input, filling with {email}")
42
+ await email_input.fill(email)
43
+ await email_input.press("Enter")
44
+ # Wait for potential update/navigation
45
+ await page.wait_for_load_state("networkidle")
46
+ await asyncio.sleep(2) # Extra buffer for JS updates
47
+ except Exception as e:
48
+ logger.warning(f"Error handling email input: {e}")
49
+
50
+ # Extract content
51
+
52
+ # Extract content
53
+ # Get full HTML to parse links and media
54
+ html_content = await page.content()
55
+ soup = BeautifulSoup(html_content, 'html.parser')
56
+
57
+ # Extract text
58
+ text_content = soup.get_text(separator='\n', strip=True)
59
+
60
+ # Check for <base> tag
61
+ base_url = current_url
62
+ base_tag = soup.find('base', href=True)
63
+ if base_tag:
64
+ base_url = urljoin(current_url, base_tag['href'])
65
+ logger.info(f"Found <base> tag, using base URL: {base_url}")
66
+
67
+ # Extract links and media to append to context
68
+ links = []
69
+ for a in soup.find_all('a', href=True):
70
+ href = a['href']
71
+ full_url = urljoin(base_url, href)
72
+ links.append(f"Link: [{a.get_text(strip=True)}]({full_url})")
73
+
74
+ audio_sources = []
75
+ for audio in soup.find_all('audio'):
76
+ if audio.get('src'):
77
+ src = audio['src']
78
+ full_src = urljoin(base_url, src)
79
+ audio_sources.append(f"Audio: {full_src}")
80
+ for source in audio.find_all('source', src=True):
81
+ src = source['src']
82
+ full_src = urljoin(base_url, src)
83
+ audio_sources.append(f"Audio: {full_src}")
84
+
85
+ images = []
86
+ for img in soup.find_all('img'):
87
+ src = img.get('src')
88
+ if src:
89
+ full_src = urljoin(base_url, src)
90
+ alt = img.get('alt', 'No description')
91
+ images.append(f"Image: [{alt}]({full_src})")
92
+
93
+ # Conditional Screenshot Logic
94
+ # If there are visual elements (canvas or images), capture the page state to a file.
95
+ # This allows the agent to "see" the page if needed, without cluttering context with base64.
96
+ try:
97
+ has_visuals = await page.evaluate("() => document.querySelectorAll('canvas, img').length > 0")
98
+ if has_visuals:
99
+ screenshot_path = f"/tmp/screenshot_{session_id}.jpg"
100
+ await page.screenshot(path=screenshot_path, full_page=True, type='jpeg', quality=50)
101
+ images.append(f"Image: [Page Screenshot]({screenshot_path})")
102
+ logger.info(f"Visual elements detected. Saved screenshot to {screenshot_path}")
103
+ else:
104
+ logger.info("No significant visual elements detected. Skipping screenshot.")
105
+ except Exception as e:
106
+ logger.warning(f"Error handling screenshot: {e}")
107
+
108
+ # Combine into a rich context
109
+ content = text_content + "\n\n--- Extracted Links & Media ---\n" + "\n".join(links + audio_sources + images)
110
+
111
+ # If the content is empty or loading, wait a bit
112
+ if not content.strip():
113
+ await asyncio.sleep(1)
114
+ content = await page.evaluate("document.body.innerText")
115
+
116
+ logger.info(f"Extracted content (first 100 chars): {content[:100]}")
117
+
118
+ # Use agent to solve (initialize here if needed, but we use get_agent() outside if we wanted persistent agent object,
119
+ # but we want fresh memory per task, so we rely on session_id)
120
+ agent = get_agent()
121
+
122
+ prompt = f"""
123
+ You are a highly capable Quiz Solver Agent.
124
+ Current Page URL: {current_url}
125
+
126
+ Page Content:
127
+ ---
128
+ {content.replace("{", "{{").replace("}", "}}")}
129
+ ---
130
+
131
+ **GOAL**
132
+ Solve the task on the current page.
133
+
134
+ **GUIDELINES**
135
+ - **Conciseness**: Plan and explain in **2-3 lines maximum**.
136
+ - **Action**: Respond **IMMEDIATELY** with a tool call or the final JSON. **DO NOT** output conversational text or plans like "I need to...". Just run the code.
137
+
138
+ **TOOL USAGE**
139
+ - **Secret Codes**: Return exactly as requested (no extra spaces).
140
+ - **Media**: (this is the order in which you should understand the contents of the page)
141
+ - Audio: Use `transcribe_audio(url)`.
142
+ - Images: Use `understand_image(url, prompt)`. (if /tmp is involved its stored locally and not in the url)
143
+ - PDF/ZIP: Use `read_pdf(url)` or `read_zip(url)`.
144
+ - History: Use `search_history(query)` with the previous quiz URL.
145
+ - API: Use `call_api(url)` only if explicitly mentioned to call an api and not otherwise.
146
+ - **Files**:
147
+ - **Local**: Paths starting with `/tmp/` are local. Pass them directly to tools.
148
+ - **Relative URLs**: Resolve against the Current Page URL using `urllib.parse.urljoin`.
149
+ - **Scripts**: Use `fetch_page_scripts(url)` **ONLY** if the page explicitly asks to "run javascript" or "embedded javascript".
150
+ - **Stuck?**: If content is unchanged or ambiguous, **FIRST** **CRITICAL** verify visually with `understand_image`. Only then consider scripts.
151
+
152
+ **DATA ANALYSIS**
153
+ - **Process**:
154
+ 1. **Inspect**: Download/read the file and print `df.head()` and `df.columns`. **STOP**.
155
+ 2. **Analyze**: Read the inspection output, *then* write new code to solve.
156
+ - **Downloads**: Always check `status_code == 200`. Save to `/tmp`.
157
+ - **CSV**: Use `header=None` for number lists.
158
+ - **Output**: Calculate the final answer in Python. Print **ONLY** the result. Do not print large dataframes.
159
+
160
+ **DIRECT ANSWER**
161
+ If the answer is in the text, provide it directly.
162
+ 6. If sometimes the answer is wrong when previously you submitted it in some format (integer/string/float) try submitting it in another format.
163
+ 7. If the task asks for email or uses an example email, then the email address is 22f3000730@ds.study.iitm.ac.in. You can use it if needed. But DO NOT confuse it for anything else based on your assumptions.
164
+ 8. Email number for '22f3000730@ds.study.iitm.ac.in' is 45519 using sha1 hash. You can use it if needed. But DO NOT confuse it for anything else based on your assumptions. DO NOT assume that this is the answer unless the page explicitly states to submit it.
165
+ 9. If the tasks ask for the secret, it is vjay@15
166
+ 10. Screenshots are mentioned in the prompt like this: Image: [Page Screenshot](/tmp/screenshot_12345.jpg) use understand_image(/tmp/screenshot_12345.jpg, "") to understand the screenshot.
167
+ **OUTPUT**
168
+ When you have the final answer, return a JSON object with the following structure:
169
+ {{
170
+ "answer_payload": {{"email": "...", "secret": "...", "url": "...", "answer": "..."}},
171
+ "submit_url": "...",
172
+ "reasoning": "..."
173
+ }}
174
+ If submission url is not available, use https://tds-llm-analysis.s-anand.net/submit to submit.
175
+ """
176
+
177
+ # Run agent with session_id for memory
178
+ max_retries = 3
179
+ for attempt in range(max_retries):
180
+ response = agent.run(prompt, session_id=session_id)
181
+ logger.info(f"LLM Response: {response.content}")
182
+
183
+ # Parse response
184
+ try:
185
+ response_text = response.content
186
+ logger.info(f"Raw LLM Response: {response_text}")
187
+
188
+ # Robust JSON extraction using regex
189
+ import re
190
+ json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
191
+ if json_match:
192
+ response_text = json_match.group(0)
193
+
194
+ result = json.loads(response_text)
195
+
196
+ # Check if agent returned python_code instead of final answer
197
+ if "python_code" in result and "answer_payload" not in result:
198
+ python_code = result.get("python_code")
199
+ logger.info(f"Agent provided Python code to execute")
200
+
201
+ # Execute the code
202
+ from tools import execute_python
203
+ code_output = execute_python(python_code)
204
+ logger.info(f"Python code executed, output: {code_output[:200]}...")
205
+
206
+ # Ask agent to format final JSON with code output
207
+ followup_prompt = f"""
208
+ The Python code executed successfully. Output:
209
+
210
+ {code_output.replace("{", "{{").replace("}", "}}")}
211
+
212
+ Now return the final JSON for submission:
213
+
214
+ {{
215
+ "answer_payload": {{"email": "{email}", "secret": "{secret}", "url": "{current_url}", "answer": <extract from output above>}},
216
+ "submit_url": <submit URL from original page>,
217
+ "reasoning": <brief explanation>
218
+ }}
219
+ """
220
+ response = agent.run(followup_prompt, session_id=session_id)
221
+ logger.info(f"LLM Follow-up Response: {response.content}")
222
+
223
+ # Parse follow-up response
224
+ response_text = response.content
225
+ json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
226
+ if json_match:
227
+ response_text = json_match.group(0)
228
+ result = json.loads(response_text)
229
+
230
+ answer_payload = result.get("answer_payload")
231
+ submit_url = result.get("submit_url")
232
+
233
+ if not answer_payload or not submit_url:
234
+ logger.error("Agent failed to provide answer_payload or submit_url")
235
+ if attempt < max_retries - 1:
236
+ prompt = "Error: You must return a JSON object with 'answer_payload' and 'submit_url'. Do not return conversational text."
237
+ continue
238
+ break
239
+
240
+ if answer_payload:
241
+ # Trust the LLM's payload
242
+ pass
243
+
244
+ # Resolve relative URL
245
+ submit_url = urljoin(current_url, submit_url)
246
+
247
+ logger.info(f"Solved. Submitting to {submit_url}")
248
+
249
+ # Submit answer
250
+ submission_response = submit_answer(submit_url, answer_payload)
251
+
252
+ logger.info(f"Submission Response: {json.dumps(submission_response, indent=2)}")
253
+
254
+ # Check for next URL first (priority over correctness for navigation)
255
+ next_url = submission_response.get("url")
256
+ is_correct = submission_response.get("correct")
257
+
258
+ if next_url:
259
+ logger.info(f"Received next URL: {next_url}")
260
+ if not is_correct:
261
+ logger.warning(f"Answer was incorrect, but moving to next URL as instructed.")
262
+ else:
263
+ logger.info("Answer correct! Moving to next URL.")
264
+
265
+ current_url = next_url
266
+ break # Break retry loop to process new URL
267
+
268
+ # No new URL provided
269
+ if is_correct:
270
+ logger.info("Answer correct! No new URL provided. Quiz completed!")
271
+ current_url = None # Break outer loop
272
+ break # Break retry loop
273
+ else:
274
+ logger.warning(f"Answer incorrect: {submission_response.get('reason')}")
275
+ logger.info("No new URL provided. Retrying same URL in 2 seconds...")
276
+ await asyncio.sleep(2)
277
+ # Break inner loop to refresh page and try again
278
+ break
279
+
280
+
281
+ except json.JSONDecodeError:
282
+ logger.error(f"Failed to parse agent response: {response.content}")
283
+ if attempt < max_retries - 1:
284
+ prompt = "Error: Your response was not valid JSON. Please return ONLY a JSON object. Do not include any conversational text."
285
+ continue
286
+ break
287
+
288
+ except Exception as e:
289
+ logger.error(f"Error in solver loop: {e}")
290
+ finally:
291
+ await browser.close()
292
+
293
+ def submit_answer(submit_url, payload):
294
+ try:
295
+ logger.info(f"Submitting answer to {submit_url} with payload: {json.dumps(payload, indent=2)}")
296
+ response = requests.post(submit_url, json=payload)
297
+ return response.json()
298
+ except Exception as e:
299
+ logger.error(f"Submission failed: {e}")
300
+ return {"correct": False, "reason": str(e)}
tools.py ADDED
@@ -0,0 +1,448 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import base64
3
+ import logging
4
+ from playwright.sync_api import sync_playwright
5
+ import threading
6
+ import speech_recognition as sr
7
+ from pydub import AudioSegment
8
+ import io
9
+ import tempfile
10
+ import sys
11
+ import os
12
+ import pandas as pd
13
+ import numpy as np
14
+ import speech_recognition as sr
15
+ from bs4 import BeautifulSoup
16
+ import pydub
17
+ from pydub import AudioSegment
18
+ import pypdf
19
+ import zipfile
20
+ import duckdb
21
+ from PIL import Image
22
+ import json
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+ def run_python_code(code: str) -> str:
27
+ """
28
+ Executes Python code and returns the output.
29
+ """
30
+ try:
31
+ logger.info("Executing Python code...")
32
+
33
+ # Robustly extract code from markdown blocks if present
34
+ if "```python" in code:
35
+ code = code.split("```python")[1].split("```")[0].strip()
36
+ elif "```" in code:
37
+ code = code.split("```")[1].split("```")[0].strip()
38
+
39
+ logger.info(f"Code:\n{code}")
40
+
41
+ # Create a buffer to capture stdout
42
+ old_stdout = sys.stdout
43
+ redirected_output = io.StringIO()
44
+ sys.stdout = redirected_output
45
+
46
+ # Execution context
47
+ local_scope = {
48
+ "pd": pd,
49
+ "np": np,
50
+ "requests": requests,
51
+ "io": io,
52
+ "sr": sr,
53
+ "pydub": pydub,
54
+ "sys": sys,
55
+ "os": os,
56
+ "BeautifulSoup": BeautifulSoup,
57
+ "pypdf": pypdf,
58
+ "zipfile": zipfile,
59
+ "duckdb": duckdb,
60
+ "Image": Image
61
+ }
62
+
63
+ try:
64
+ exec(code, {}, local_scope)
65
+ except Exception as exec_error:
66
+ return f"Error executing code: {exec_error}"
67
+ finally:
68
+ sys.stdout = old_stdout
69
+
70
+ output = redirected_output.getvalue()
71
+ logger.info(f"Code Output:\n{output}")
72
+ return output if output.strip() else "Code executed successfully but produced no output. Did you forget to print the result?"
73
+
74
+ except Exception as e:
75
+ logger.error(f"System error during code execution: {e}")
76
+ return f"System error: {e}"
77
+
78
+ def execute_python(code: str) -> str:
79
+ """
80
+ Executes Python code and returns the output.
81
+ Use this for math, data analysis (pandas, numpy, duckdb), and file processing.
82
+ """
83
+ return run_python_code(code)
84
+
85
+
86
+
87
+ def read_pdf(url: str) -> str:
88
+ """
89
+ Downloads a PDF from a URL and extracts its text content.
90
+ """
91
+ try:
92
+ logger.info(f"Reading PDF from: {url}")
93
+ if not url.startswith("http"):
94
+ return f"Error: URL must be absolute. Received: {url}"
95
+
96
+ response = requests.get(url)
97
+ response.raise_for_status()
98
+
99
+ with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as temp_pdf:
100
+ temp_pdf.write(response.content)
101
+ temp_pdf_path = temp_pdf.name
102
+
103
+ text = ""
104
+ try:
105
+ reader = pypdf.PdfReader(temp_pdf_path)
106
+ for page in reader.pages:
107
+ text += page.extract_text() + "\n"
108
+ except Exception as e:
109
+ return f"Error reading PDF: {e}"
110
+ finally:
111
+ os.remove(temp_pdf_path)
112
+
113
+ return text[:5000] # Truncate if too long to avoid context overflow
114
+ except Exception as e:
115
+ logger.error(f"Error downloading PDF: {e}")
116
+ return f"Error downloading PDF: {e}"
117
+
118
+ def read_zip(url: str) -> str:
119
+ """
120
+ Downloads a ZIP file from a URL, lists its contents, and extracts text from small files.
121
+ """
122
+ try:
123
+ logger.info(f"Reading ZIP from: {url}")
124
+ if not url.startswith("http"):
125
+ return f"Error: URL must be absolute. Received: {url}"
126
+
127
+ response = requests.get(url)
128
+ response.raise_for_status()
129
+
130
+ with tempfile.NamedTemporaryFile(suffix=".zip", delete=False) as temp_zip:
131
+ temp_zip.write(response.content)
132
+ temp_zip_path = temp_zip.name
133
+
134
+ result = "ZIP Contents:\n"
135
+ try:
136
+ with zipfile.ZipFile(temp_zip_path, 'r') as zip_ref:
137
+ for file_info in zip_ref.infolist():
138
+ result += f"- {file_info.filename} ({file_info.file_size} bytes)\n"
139
+ # If it's a small text file, try to read it
140
+ if file_info.file_size < 10000 and not file_info.filename.endswith(('.png', '.jpg', '.jpeg', '.gif')):
141
+ try:
142
+ with zip_ref.open(file_info) as f:
143
+ content = f.read().decode('utf-8', errors='ignore')
144
+ result += f" Content: {content[:500]}\n"
145
+ except:
146
+ pass
147
+ except Exception as e:
148
+ return f"Error reading ZIP: {e}"
149
+ finally:
150
+ os.remove(temp_zip_path)
151
+
152
+ return result
153
+ except Exception as e:
154
+ logger.error(f"Error downloading ZIP: {e}")
155
+ return f"Error downloading ZIP: {e}"
156
+
157
+ def search_history(query: str) -> str:
158
+ """
159
+ Searches the history of solved quizzes for a given query (e.g., a previous URL).
160
+ Returns the answer if found.
161
+ """
162
+ try:
163
+ history_file = "history.jsonl"
164
+ if not os.path.exists(history_file):
165
+ return "No history found."
166
+
167
+ results = []
168
+ with open(history_file, "r") as f:
169
+ for line in f:
170
+ try:
171
+ entry = json.loads(line)
172
+ if query in str(entry):
173
+ results.append(str(entry))
174
+ except:
175
+ pass
176
+
177
+ if results:
178
+ return "\n".join(results)
179
+ return "No matching history found."
180
+ except Exception as e:
181
+ return f"Error searching history: {e}"
182
+
183
+
184
+
185
+
186
+ def transcribe_audio(url: str) -> str:
187
+ """
188
+ Downloads an audio file from a URL and transcribes it using Google Speech Recognition.
189
+ Supports MP3, WAV, etc.
190
+ """
191
+ try:
192
+ logger.info(f"Transcribing audio from: {url}")
193
+
194
+ if not url.startswith("http"):
195
+ return f"Error: URL must be absolute. Received: {url}"
196
+
197
+ response = requests.get(url)
198
+ response.raise_for_status()
199
+
200
+ # Create a temporary file to save the audio
201
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_wav:
202
+ temp_wav_path = temp_wav.name
203
+
204
+ # Convert to WAV if needed (using pydub)
205
+ try:
206
+ audio_content = io.BytesIO(response.content)
207
+ audio = AudioSegment.from_file(audio_content)
208
+ audio.export(temp_wav_path, format="wav")
209
+ except Exception as e:
210
+ logger.error(f"Error converting audio: {e}")
211
+ return f"Error converting audio: {e}"
212
+
213
+ # Transcribe using local Whisper (tiny model)
214
+ # This runs entirely on-device (CPU/GPU) and does not make external API calls.
215
+ import whisper
216
+
217
+ # Load model (downloads once if not cached)
218
+ model = whisper.load_model("tiny")
219
+ result = model.transcribe(temp_wav_path)
220
+ text = result["text"]
221
+
222
+ logger.info(f"WHISPER OUTPUT: {text}")
223
+
224
+ # Clean up
225
+ os.remove(temp_wav_path)
226
+
227
+ return text
228
+ except Exception as e:
229
+ logger.error(f"Error transcribing audio: {e}")
230
+ return f"Error transcribing audio: {e}"
231
+
232
+ def understand_image(url: str, prompt: str = "Describe this image in detail") -> str:
233
+ """
234
+ Analyzes an image from a URL using the agent's vision capabilities (via API).
235
+ Returns a description of the image.
236
+ """
237
+ try:
238
+ logger.info(f"Analyzing image from: {url}")
239
+
240
+ if os.path.exists(url):
241
+ # It's a local file, convert to data URI
242
+ try:
243
+ with open(url, "rb") as image_file:
244
+ encoded_string = base64.b64encode(image_file.read()).decode('utf-8')
245
+ # Determine mime type based on extension, default to jpeg
246
+ mime_type = "image/jpeg"
247
+ if url.lower().endswith(".png"):
248
+ mime_type = "image/png"
249
+ elif url.lower().endswith(".gif"):
250
+ mime_type = "image/gif"
251
+ elif url.lower().endswith(".webp"):
252
+ mime_type = "image/webp"
253
+
254
+ url = f"data:{mime_type};base64,{encoded_string}"
255
+ except Exception as e:
256
+ return f"Error reading local image file: {e}"
257
+
258
+ if not url.startswith("http") and not url.startswith("data:"):
259
+ return f"Error: URL must be absolute (http/https), a data URI, or a valid local file path. Received: {url[:50]}..."
260
+
261
+ api_key = os.getenv("AI_TOKEN") or os.getenv("OPENROUTER_API_KEY")
262
+ if not api_key:
263
+ return "Error: AI_TOKEN not found."
264
+
265
+ # Use OpenRouter API to analyze the image
266
+ headers = {
267
+ "Authorization": f"Bearer {api_key}",
268
+ "Content-Type": "application/json"
269
+ }
270
+
271
+ payload = {
272
+ "model": "google/gemini-2.0-flash-lite-001",
273
+ "messages": [
274
+ {
275
+ "role": "user",
276
+ "content": [
277
+ {"type": "text", "text": prompt},
278
+ {"type": "image_url", "image_url": {"url": url}}
279
+ ]
280
+ }
281
+ ]
282
+ }
283
+
284
+ response = requests.post("https://aipipe.org/openrouter/v1/chat/completions", headers=headers, json=payload)
285
+ response.raise_for_status()
286
+
287
+ result = response.json()
288
+ description = result['choices'][0]['message']['content']
289
+
290
+ logger.info(f"IMAGE ANALYSIS OUTPUT: {description}")
291
+ return description
292
+
293
+ except Exception as e:
294
+ logger.error(f"Error analyzing image: {e}")
295
+ return f"Error analyzing image: {e}"
296
+
297
+ def call_api(url: str, method: str = "GET", headers: dict = None, json_data: dict = None) -> str:
298
+ """
299
+ Makes an HTTP request to an external API.
300
+ Useful for sourcing data from APIs as required by the quiz.
301
+ """
302
+ try:
303
+ logger.info(f"Calling API: {method} {url}")
304
+
305
+ if not url.startswith("http"):
306
+ return f"Error: URL must be absolute. Received: {url}"
307
+
308
+ response = requests.request(method, url, headers=headers, json=json_data)
309
+
310
+ try:
311
+ return json.dumps(response.json(), indent=2)
312
+ except:
313
+ return response.text
314
+
315
+ except Exception as e:
316
+ logger.error(f"Error calling API: {e}")
317
+ return f"Error calling API: {e}"
318
+
319
+
320
+
321
+
322
+ def fetch_page_text(url: str) -> dict:
323
+ """
324
+ Fetches the text content of a web page using Playwright.
325
+ Also extracts links, audio sources, and takes a screenshot if visual elements are present.
326
+
327
+ Args:
328
+ url (str): The URL of the page to fetch.
329
+
330
+ Returns:
331
+ dict: A dictionary containing the 'content' (text + links/media) or 'error'.
332
+ """
333
+ result = {}
334
+
335
+ try:
336
+ logger.info(f"Fetching page text from: {url}")
337
+
338
+ if not url.startswith("http"):
339
+ result["error"] = f"Error: URL must be absolute. Received: {url}"
340
+ return result
341
+
342
+ def run_playwright():
343
+ try:
344
+ with sync_playwright() as p:
345
+ browser = p.chromium.launch(headless=True)
346
+ page = browser.new_page()
347
+ page.goto(url)
348
+ page.wait_for_load_state("networkidle")
349
+ html_content = page.content()
350
+
351
+ # Screenshot Logic
352
+ images = []
353
+ try:
354
+ has_visuals = page.evaluate("() => document.querySelectorAll('canvas, img').length > 0")
355
+ if has_visuals:
356
+ import uuid
357
+ unique_id = str(uuid.uuid4())[:8]
358
+ screenshot_path = f"/tmp/screenshot_{unique_id}.jpg"
359
+ page.screenshot(path=screenshot_path, full_page=True, type='jpeg', quality=50)
360
+ images.append(f"Image: [Page Screenshot]({screenshot_path})")
361
+ logger.info(f"Visual elements detected. Saved screenshot to {screenshot_path}")
362
+ except Exception as e:
363
+ logger.warning(f"Error handling screenshot in fetch_page_text: {e}")
364
+
365
+ browser.close()
366
+
367
+ # Parse with BS4
368
+ soup = BeautifulSoup(html_content, 'html.parser')
369
+ text_content = soup.get_text(separator='\n', strip=True)
370
+
371
+ links = []
372
+ for a in soup.find_all('a', href=True):
373
+ href = a['href']
374
+ links.append(f"Link: [{a.get_text(strip=True)}]({href})")
375
+
376
+ audio_sources = []
377
+ for audio in soup.find_all('audio'):
378
+ if audio.get('src'):
379
+ audio_sources.append(f"Audio: {audio['src']}")
380
+ for source in audio.find_all('source', src=True):
381
+ audio_sources.append(f"Audio: {source['src']}")
382
+
383
+ result["content"] = text_content + "\n\n--- Extracted Links & Media ---\n" + "\n".join(links + audio_sources + images)
384
+
385
+ except Exception as e:
386
+ result["error"] = str(e)
387
+
388
+ thread = threading.Thread(target=run_playwright)
389
+ thread.start()
390
+ thread.join()
391
+
392
+ if "error" in result:
393
+ logger.error(f"Error fetching page: {result['error']}")
394
+ return f"Error fetching page: {result['error']}"
395
+
396
+ content = result.get("content", "")
397
+ logger.info(f"Fetched content (first 100 chars): {content[:100]}")
398
+ return content
399
+ except Exception as e:
400
+ logger.error(f"Error fetching page: {e}")
401
+ return f"Error fetching page: {e}"
402
+
403
+
404
+ def fetch_page_scripts(url: str) -> str:
405
+ """
406
+ Fetches only the scripts (inline and src) from a web page.
407
+ Useful when the page mentions embedded logic or hidden code.
408
+ """
409
+ result = {}
410
+ try:
411
+ logger.info(f"Fetching page scripts from: {url}")
412
+
413
+ if not url.startswith("http"):
414
+ return f"Error: URL must be absolute. Received: {url}"
415
+
416
+ def run_playwright():
417
+ try:
418
+ with sync_playwright() as p:
419
+ browser = p.chromium.launch(headless=True)
420
+ page = browser.new_page()
421
+ page.goto(url)
422
+ page.wait_for_load_state("networkidle")
423
+ html_content = page.content()
424
+ browser.close()
425
+
426
+ soup = BeautifulSoup(html_content, 'html.parser')
427
+ scripts = []
428
+ for script in soup.find_all('script'):
429
+ if script.get('src'):
430
+ scripts.append(f"Script Source: {script['src']}")
431
+ elif script.string and script.string.strip():
432
+ scripts.append(f"Inline Script: {script.string.strip()[:2000]}")
433
+
434
+ result["content"] = "--- Extracted Scripts ---\n" + "\n".join(scripts)
435
+ except Exception as e:
436
+ result["error"] = str(e)
437
+
438
+ thread = threading.Thread(target=run_playwright)
439
+ thread.start()
440
+ thread.join()
441
+
442
+ if "error" in result:
443
+ return f"Error fetching scripts: {result['error']}"
444
+
445
+ return result.get("content", "No scripts found.")
446
+ except Exception as e:
447
+ return f"Error fetching scripts: {e}"
448
+