Spaces:

Vjay15
/

Quiz_Solver

Sleeping

App Files Files Community

Vjay15 commited on Nov 29, 2025

Commit

ff23105

verified ·

1 Parent(s): ee0d98c

TRYING IF IT WORKS

Browse files

Files changed (7) hide show

Dockerfile +41 -0
LICENSE +21 -0
agent.py +46 -0
main.py +42 -0
requirements.txt +20 -0
solver.py +300 -0
tools.py +448 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,41 @@

+# Use Python 3.11 slim image (stable for Playwright/Agno)
+FROM python:3.11-slim
+# Set working directory
+WORKDIR /app
+# Install system dependencies required for building python packages and playwright
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements file
+COPY requirements.txt .
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Install Playwright browsers and dependencies
+# We set the path to a location accessible by the non-root user or install globally
+ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
+RUN mkdir /ms-playwright && \
+    playwright install --with-deps chromium && \
+    chmod -R 777 /ms-playwright
+# Create a non-root user for security (HF Spaces requirement)
+RUN useradd -m -u 1000 user
+# Copy application code and set ownership
+COPY --chown=user:user . .
+# Switch to non-root user
+USER user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH
+# Expose port 7860 (standard for HF Spaces)
+EXPOSE 7860
+# Run the application
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2025 22f3000730
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

agent.py ADDED Viewed

	@@ -0,0 +1,46 @@

+from agno.agent import Agent
+from agno.models.openai.chat import OpenAIChat
+from agno.db.sqlite.sqlite import SqliteDb
+import os
+from dotenv import load_dotenv
+from tools import fetch_page_text, fetch_page_scripts, run_python_code, transcribe_audio, understand_image, call_api, execute_python, read_pdf, read_zip, search_history
+import logging
+logger = logging.getLogger(__name__)
+load_dotenv()
+def get_agent():
+    api_key = os.getenv("AI_TOKEN")
+    if not api_key:
+        logger.error("AI_TOKEN is missing from environment variables!")
+        raise ValueError("AI_TOKEN not found in environment variables")
+    logger.info(f"AI_TOKEN loaded. Length: {len(api_key)}")
+    logger.info(f"AI_TOKEN prefix: {api_key[:10]}...")
+    # Set env var just in case
+    os.environ["OPENROUTER_API_KEY"] = api_key
+    # Initialize the agent with OpenRouter model via custom endpoint
+    agent = Agent(
+        model=OpenAIChat(
+            base_url="https://aipipe.org/openrouter/v1",
+            api_key=api_key,
+            id="google/gemini-2.0-flash-lite-001"
+        ),
+        description="You are a helpful assistant that solves data-related quiz tasks.",
+        instructions=[
+            "You will be given a task description, often involving data analysis or web scraping.",
+            "You need to solve the task and provide the answer.",
+            "The answer should be in the format requested by the task.",
+            "If you need to download a file, write Python code to do it.",
+            "Be concise and accurate."
+        ],
+        tools=[fetch_page_text, fetch_page_scripts, run_python_code, transcribe_audio, understand_image, call_api, execute_python, read_pdf, read_zip, search_history],
+        markdown=True,
+        debug_mode=True,
+        db=SqliteDb(db_file="agent_memory.db"),
+        add_history_to_context=True
+    )
+    return agent

main.py ADDED Viewed

	@@ -0,0 +1,42 @@

+from fastapi import FastAPI, HTTPException, BackgroundTasks, Request
+from pydantic import BaseModel
+import os
+from dotenv import load_dotenv
+from solver import solve_quiz
+import logging
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+load_dotenv()
+app = FastAPI()
+class QuizRequest(BaseModel):
+    email: str
+    secret: str
+    url: str
+    class Config:
+        extra = "allow"
+@app.post("/", status_code=200)
+async def solve_quiz_endpoint(request: QuizRequest, background_tasks: BackgroundTasks):
+    logger.info(f"Received quiz request for URL: {request.url}")
+    # Verify secret
+    expected_secret = os.getenv("QUIZ_SECRET")
+    if not expected_secret:
+        logger.warning("QUIZ_SECRET not set in environment variables. Skipping secret validation.")
+    elif request.secret != expected_secret:
+        logger.error(f"Invalid secret provided: {request.secret} is not {expected_secret}")
+        raise HTTPException(status_code=403, detail="Invalid secret")
+    # Start solving in background
+    background_tasks.add_task(solve_quiz, request.url, request.email, request.secret)
+    return {"message": "Task received, solving started."}
+@app.get("/health")
+async def health_check():
+    return {"status": "ok"}

requirements.txt ADDED Viewed

	@@ -0,0 +1,20 @@

+fastapi
+uvicorn
+requests
+python-dotenv
+agno
+playwright
+pandas
+beautifulsoup4
+lxml
+openai
+SpeechRecognition
+pydub
+scikit-learn
+openai-whisper
+matplotlib
+seaborn
+pypdf
+duckdb
+Pillow
+sqlalchemy

solver.py ADDED Viewed

	@@ -0,0 +1,300 @@

+import asyncio
+import json
+import logging
+import requests
+from urllib.parse import urljoin
+from playwright.async_api import async_playwright
+from bs4 import BeautifulSoup
+from agent import get_agent
+logger = logging.getLogger(__name__)
+async def solve_quiz(initial_url: str, email: str, secret: str):
+    logger.info(f"Starting quiz solver workflow for {email}")
+    current_url = initial_url
+    async with async_playwright() as p:
+        browser = await p.chromium.launch(headless=True)
+        context = await browser.new_context()
+        page = await context.new_page()
+        try:
+            while current_url:
+                # Generate a NEW session ID for each task/URL to keep memory clean
+                import uuid
+                session_id = str(uuid.uuid4())
+                logger.info(f"Started new agent session for {current_url}: {session_id}")
+                logger.info(f"Navigating to {current_url}")
+                await page.goto(current_url)
+                # Wait for content
+                await page.wait_for_selector("body")
+                # Check for email input and fill it if present
+                # Many quizzes require entering the email to see the question
+                try:
+                    email_input = await page.query_selector("input[type='email'], input[name='email'], input[placeholder*='email']")
+                    if email_input:
+                        logger.info(f"Found email input, filling with {email}")
+                        await email_input.fill(email)
+                        await email_input.press("Enter")
+                        # Wait for potential update/navigation
+                        await page.wait_for_load_state("networkidle")
+                        await asyncio.sleep(2) # Extra buffer for JS updates
+                except Exception as e:
+                    logger.warning(f"Error handling email input: {e}")
+                # Extract content
+                # Extract content
+                # Get full HTML to parse links and media
+                html_content = await page.content()
+                soup = BeautifulSoup(html_content, 'html.parser')
+                # Extract text
+                text_content = soup.get_text(separator='\n', strip=True)
+                # Check for <base> tag
+                base_url = current_url
+                base_tag = soup.find('base', href=True)
+                if base_tag:
+                    base_url = urljoin(current_url, base_tag['href'])
+                    logger.info(f"Found <base> tag, using base URL: {base_url}")
+                # Extract links and media to append to context
+                links = []
+                for a in soup.find_all('a', href=True):
+                    href = a['href']
+                    full_url = urljoin(base_url, href)
+                    links.append(f"Link: [{a.get_text(strip=True)}]({full_url})")
+                audio_sources = []
+                for audio in soup.find_all('audio'):
+                    if audio.get('src'):
+                        src = audio['src']
+                        full_src = urljoin(base_url, src)
+                        audio_sources.append(f"Audio: {full_src}")
+                    for source in audio.find_all('source', src=True):
+                        src = source['src']
+                        full_src = urljoin(base_url, src)
+                        audio_sources.append(f"Audio: {full_src}")
+                images = []
+                for img in soup.find_all('img'):
+                    src = img.get('src')
+                    if src:
+                        full_src = urljoin(base_url, src)
+                        alt = img.get('alt', 'No description')
+                        images.append(f"Image: [{alt}]({full_src})")
+                # Conditional Screenshot Logic
+                # If there are visual elements (canvas or images), capture the page state to a file.
+                # This allows the agent to "see" the page if needed, without cluttering context with base64.
+                try:
+                    has_visuals = await page.evaluate("() => document.querySelectorAll('canvas, img').length > 0")
+                    if has_visuals:
+                        screenshot_path = f"/tmp/screenshot_{session_id}.jpg"
+                        await page.screenshot(path=screenshot_path, full_page=True, type='jpeg', quality=50)
+                        images.append(f"Image: [Page Screenshot]({screenshot_path})")
+                        logger.info(f"Visual elements detected. Saved screenshot to {screenshot_path}")
+                    else:
+                        logger.info("No significant visual elements detected. Skipping screenshot.")
+                except Exception as e:
+                    logger.warning(f"Error handling screenshot: {e}")
+                # Combine into a rich context
+                content = text_content + "\n\n--- Extracted Links & Media ---\n" + "\n".join(links + audio_sources + images)
+                # If the content is empty or loading, wait a bit
+                if not content.strip():
+                    await asyncio.sleep(1)
+                    content = await page.evaluate("document.body.innerText")
+                logger.info(f"Extracted content (first 100 chars): {content[:100]}")
+                # Use agent to solve (initialize here if needed, but we use get_agent() outside if we wanted persistent agent object,
+                # but we want fresh memory per task, so we rely on session_id)
+                agent = get_agent()
+                prompt = f"""
+You are a highly capable Quiz Solver Agent.
+Current Page URL: {current_url}
+Page Content:
+---
+{content.replace("{", "{{").replace("}", "}}")}
+---
+**GOAL**
+Solve the task on the current page.
+**GUIDELINES**
+- **Conciseness**: Plan and explain in **2-3 lines maximum**.
+- **Action**: Respond **IMMEDIATELY** with a tool call or the final JSON. **DO NOT** output conversational text or plans like "I need to...". Just run the code.
+**TOOL USAGE**
+- **Secret Codes**: Return exactly as requested (no extra spaces).
+- **Media**: (this is the order in which you should understand the contents of the page)
+  - Audio: Use `transcribe_audio(url)`.
+  - Images: Use `understand_image(url, prompt)`. (if /tmp is involved its stored locally and not in the url)
+  - PDF/ZIP: Use `read_pdf(url)` or `read_zip(url)`.
+  - History: Use `search_history(query)` with the previous quiz URL.
+  - API: Use `call_api(url)` only if explicitly mentioned to call an api and not otherwise.
+- **Files**:
+  - **Local**: Paths starting with `/tmp/` are local. Pass them directly to tools.
+  - **Relative URLs**: Resolve against the Current Page URL using `urllib.parse.urljoin`.
+- **Scripts**: Use `fetch_page_scripts(url)` **ONLY** if the page explicitly asks to "run javascript" or "embedded javascript".
+- **Stuck?**: If content is unchanged or ambiguous, **FIRST** **CRITICAL** verify visually with `understand_image`. Only then consider scripts.
+**DATA ANALYSIS**
+- **Process**:
+  1. **Inspect**: Download/read the file and print `df.head()` and `df.columns`. **STOP**.
+  2. **Analyze**: Read the inspection output, *then* write new code to solve.
+- **Downloads**: Always check `status_code == 200`. Save to `/tmp`.
+- **CSV**: Use `header=None` for number lists.
+- **Output**: Calculate the final answer in Python. Print **ONLY** the result. Do not print large dataframes.
+**DIRECT ANSWER**
+If the answer is in the text, provide it directly.
+6. If sometimes the answer is wrong when previously you submitted it in some format (integer/string/float) try submitting it in another format.
+7. If the task asks for email or uses an example email, then the email address is 22f3000730@ds.study.iitm.ac.in. You can use it if needed. But DO NOT confuse it for anything else based on your assumptions.
+8. Email number for '22f3000730@ds.study.iitm.ac.in' is 45519 using sha1 hash. You can use it if needed. But DO NOT confuse it for anything else based on your assumptions. DO NOT assume that this is the answer unless the page explicitly states to submit it.
+9. If the tasks ask for the secret, it is vjay@15
+10. Screenshots are mentioned in the prompt like this: Image: [Page Screenshot](/tmp/screenshot_12345.jpg) use understand_image(/tmp/screenshot_12345.jpg, "") to understand the screenshot.
+**OUTPUT**
+When you have the final answer, return a JSON object with the following structure:
+{{
+    "answer_payload": {{"email": "...", "secret": "...", "url": "...", "answer": "..."}},
+    "submit_url": "...",
+    "reasoning": "..."
+}}
+If submission url is not available, use https://tds-llm-analysis.s-anand.net/submit to submit.
+"""
+                # Run agent with session_id for memory
+                max_retries = 3
+                for attempt in range(max_retries):
+                    response = agent.run(prompt, session_id=session_id)
+                    logger.info(f"LLM Response: {response.content}")
+                    # Parse response
+                    try:
+                        response_text = response.content
+                        logger.info(f"Raw LLM Response: {response_text}")
+                        # Robust JSON extraction using regex
+                        import re
+                        json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
+                        if json_match:
+                            response_text = json_match.group(0)
+                        result = json.loads(response_text)
+                        # Check if agent returned python_code instead of final answer
+                        if "python_code" in result and "answer_payload" not in result:
+                            python_code = result.get("python_code")
+                            logger.info(f"Agent provided Python code to execute")
+                            # Execute the code
+                            from tools import execute_python
+                            code_output = execute_python(python_code)
+                            logger.info(f"Python code executed, output: {code_output[:200]}...")
+                            # Ask agent to format final JSON with code output
+                            followup_prompt = f"""
+The Python code executed successfully. Output:
+{code_output.replace("{", "{{").replace("}", "}}")}
+Now return the final JSON for submission:
+{{
+    "answer_payload": {{"email": "{email}", "secret": "{secret}", "url": "{current_url}", "answer": <extract from output above>}},
+    "submit_url": <submit URL from original page>,
+    "reasoning": <brief explanation>
+}}
+"""
+                            response = agent.run(followup_prompt, session_id=session_id)
+                            logger.info(f"LLM Follow-up Response: {response.content}")
+                            # Parse follow-up response
+                            response_text = response.content
+                            json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
+                            if json_match:
+                                response_text = json_match.group(0)
+                            result = json.loads(response_text)
+                        answer_payload = result.get("answer_payload")
+                        submit_url = result.get("submit_url")
+                        if not answer_payload or not submit_url:
+                            logger.error("Agent failed to provide answer_payload or submit_url")
+                            if attempt < max_retries - 1:
+                                prompt = "Error: You must return a JSON object with 'answer_payload' and 'submit_url'. Do not return conversational text."
+                                continue
+                            break
+                        if answer_payload:
+                            # Trust the LLM's payload
+                            pass
+                        # Resolve relative URL
+                        submit_url = urljoin(current_url, submit_url)
+                        logger.info(f"Solved. Submitting to {submit_url}")
+                        # Submit answer
+                        submission_response = submit_answer(submit_url, answer_payload)
+                        logger.info(f"Submission Response: {json.dumps(submission_response, indent=2)}")
+                        # Check for next URL first (priority over correctness for navigation)
+                        next_url = submission_response.get("url")
+                        is_correct = submission_response.get("correct")
+                        if next_url:
+                            logger.info(f"Received next URL: {next_url}")
+                            if not is_correct:
+                                logger.warning(f"Answer was incorrect, but moving to next URL as instructed.")
+                            else:
+                                logger.info("Answer correct! Moving to next URL.")
+                            current_url = next_url
+                            break # Break retry loop to process new URL
+                        # No new URL provided
+                        if is_correct:
+                            logger.info("Answer correct! No new URL provided. Quiz completed!")
+                            current_url = None # Break outer loop
+                            break # Break retry loop
+                        else:
+                            logger.warning(f"Answer incorrect: {submission_response.get('reason')}")
+                            logger.info("No new URL provided. Retrying same URL in 2 seconds...")
+                            await asyncio.sleep(2)
+                            # Break inner loop to refresh page and try again
+                            break
+                    except json.JSONDecodeError:
+                        logger.error(f"Failed to parse agent response: {response.content}")
+                        if attempt < max_retries - 1:
+                            prompt = "Error: Your response was not valid JSON. Please return ONLY a JSON object. Do not include any conversational text."
+                            continue
+                        break
+        except Exception as e:
+            logger.error(f"Error in solver loop: {e}")
+        finally:
+            await browser.close()
+def submit_answer(submit_url, payload):
+    try:
+        logger.info(f"Submitting answer to {submit_url} with payload: {json.dumps(payload, indent=2)}")
+        response = requests.post(submit_url, json=payload)
+        return response.json()
+    except Exception as e:
+        logger.error(f"Submission failed: {e}")
+        return {"correct": False, "reason": str(e)}

tools.py ADDED Viewed

	@@ -0,0 +1,448 @@

+import requests
+import base64
+import logging
+from playwright.sync_api import sync_playwright
+import threading
+import speech_recognition as sr
+from pydub import AudioSegment
+import io
+import tempfile
+import sys
+import os
+import pandas as pd
+import numpy as np
+import speech_recognition as sr
+from bs4 import BeautifulSoup
+import pydub
+from pydub import AudioSegment
+import pypdf
+import zipfile
+import duckdb
+from PIL import Image
+import json
+logger = logging.getLogger(__name__)
+def run_python_code(code: str) -> str:
+    """
+    Executes Python code and returns the output.
+    """
+    try:
+        logger.info("Executing Python code...")
+        # Robustly extract code from markdown blocks if present
+        if "```python" in code:
+            code = code.split("```python")[1].split("```")[0].strip()
+        elif "```" in code:
+            code = code.split("```")[1].split("```")[0].strip()
+        logger.info(f"Code:\n{code}")
+        # Create a buffer to capture stdout
+        old_stdout = sys.stdout
+        redirected_output = io.StringIO()
+        sys.stdout = redirected_output
+        # Execution context
+        local_scope = {
+            "pd": pd,
+            "np": np,
+            "requests": requests,
+            "io": io,
+            "sr": sr,
+            "pydub": pydub,
+            "sys": sys,
+            "os": os,
+            "BeautifulSoup": BeautifulSoup,
+            "pypdf": pypdf,
+            "zipfile": zipfile,
+            "duckdb": duckdb,
+            "Image": Image
+        }
+        try:
+            exec(code, {}, local_scope)
+        except Exception as exec_error:
+            return f"Error executing code: {exec_error}"
+        finally:
+            sys.stdout = old_stdout
+        output = redirected_output.getvalue()
+        logger.info(f"Code Output:\n{output}")
+        return output if output.strip() else "Code executed successfully but produced no output. Did you forget to print the result?"
+    except Exception as e:
+        logger.error(f"System error during code execution: {e}")
+        return f"System error: {e}"
+def execute_python(code: str) -> str:
+    """
+    Executes Python code and returns the output.
+    Use this for math, data analysis (pandas, numpy, duckdb), and file processing.
+    """
+    return run_python_code(code)
+def read_pdf(url: str) -> str:
+    """
+    Downloads a PDF from a URL and extracts its text content.
+    """
+    try:
+        logger.info(f"Reading PDF from: {url}")
+        if not url.startswith("http"):
+             return f"Error: URL must be absolute. Received: {url}"
+        response = requests.get(url)
+        response.raise_for_status()
+        with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as temp_pdf:
+            temp_pdf.write(response.content)
+            temp_pdf_path = temp_pdf.name
+        text = ""
+        try:
+            reader = pypdf.PdfReader(temp_pdf_path)
+            for page in reader.pages:
+                text += page.extract_text() + "\n"
+        except Exception as e:
+            return f"Error reading PDF: {e}"
+        finally:
+            os.remove(temp_pdf_path)
+        return text[:5000] # Truncate if too long to avoid context overflow
+    except Exception as e:
+        logger.error(f"Error downloading PDF: {e}")
+        return f"Error downloading PDF: {e}"
+def read_zip(url: str) -> str:
+    """
+    Downloads a ZIP file from a URL, lists its contents, and extracts text from small files.
+    """
+    try:
+        logger.info(f"Reading ZIP from: {url}")
+        if not url.startswith("http"):
+             return f"Error: URL must be absolute. Received: {url}"
+        response = requests.get(url)
+        response.raise_for_status()
+        with tempfile.NamedTemporaryFile(suffix=".zip", delete=False) as temp_zip:
+            temp_zip.write(response.content)
+            temp_zip_path = temp_zip.name
+        result = "ZIP Contents:\n"
+        try:
+            with zipfile.ZipFile(temp_zip_path, 'r') as zip_ref:
+                for file_info in zip_ref.infolist():
+                    result += f"- {file_info.filename} ({file_info.file_size} bytes)\n"
+                    # If it's a small text file, try to read it
+                    if file_info.file_size < 10000 and not file_info.filename.endswith(('.png', '.jpg', '.jpeg', '.gif')):
+                        try:
+                            with zip_ref.open(file_info) as f:
+                                content = f.read().decode('utf-8', errors='ignore')
+                                result += f"  Content: {content[:500]}\n"
+                        except:
+                            pass
+        except Exception as e:
+            return f"Error reading ZIP: {e}"
+        finally:
+            os.remove(temp_zip_path)
+        return result
+    except Exception as e:
+        logger.error(f"Error downloading ZIP: {e}")
+        return f"Error downloading ZIP: {e}"
+def search_history(query: str) -> str:
+    """
+    Searches the history of solved quizzes for a given query (e.g., a previous URL).
+    Returns the answer if found.
+    """
+    try:
+        history_file = "history.jsonl"
+        if not os.path.exists(history_file):
+            return "No history found."
+        results = []
+        with open(history_file, "r") as f:
+            for line in f:
+                try:
+                    entry = json.loads(line)
+                    if query in str(entry):
+                        results.append(str(entry))
+                except:
+                    pass
+        if results:
+            return "\n".join(results)
+        return "No matching history found."
+    except Exception as e:
+        return f"Error searching history: {e}"
+def transcribe_audio(url: str) -> str:
+    """
+    Downloads an audio file from a URL and transcribes it using Google Speech Recognition.
+    Supports MP3, WAV, etc.
+    """
+    try:
+        logger.info(f"Transcribing audio from: {url}")
+        if not url.startswith("http"):
+             return f"Error: URL must be absolute. Received: {url}"
+        response = requests.get(url)
+        response.raise_for_status()
+        # Create a temporary file to save the audio
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_wav:
+            temp_wav_path = temp_wav.name
+        # Convert to WAV if needed (using pydub)
+        try:
+            audio_content = io.BytesIO(response.content)
+            audio = AudioSegment.from_file(audio_content)
+            audio.export(temp_wav_path, format="wav")
+        except Exception as e:
+            logger.error(f"Error converting audio: {e}")
+            return f"Error converting audio: {e}"
+        # Transcribe using local Whisper (tiny model)
+        # This runs entirely on-device (CPU/GPU) and does not make external API calls.
+        import whisper
+        # Load model (downloads once if not cached)
+        model = whisper.load_model("tiny")
+        result = model.transcribe(temp_wav_path)
+        text = result["text"]
+        logger.info(f"WHISPER OUTPUT: {text}")
+        # Clean up
+        os.remove(temp_wav_path)
+        return text
+    except Exception as e:
+        logger.error(f"Error transcribing audio: {e}")
+        return f"Error transcribing audio: {e}"
+def understand_image(url: str, prompt: str = "Describe this image in detail") -> str:
+    """
+    Analyzes an image from a URL using the agent's vision capabilities (via API).
+    Returns a description of the image.
+    """
+    try:
+        logger.info(f"Analyzing image from: {url}")
+        if os.path.exists(url):
+            # It's a local file, convert to data URI
+            try:
+                with open(url, "rb") as image_file:
+                    encoded_string = base64.b64encode(image_file.read()).decode('utf-8')
+                    # Determine mime type based on extension, default to jpeg
+                    mime_type = "image/jpeg"
+                    if url.lower().endswith(".png"):
+                        mime_type = "image/png"
+                    elif url.lower().endswith(".gif"):
+                        mime_type = "image/gif"
+                    elif url.lower().endswith(".webp"):
+                        mime_type = "image/webp"
+                    url = f"data:{mime_type};base64,{encoded_string}"
+            except Exception as e:
+                return f"Error reading local image file: {e}"
+        if not url.startswith("http") and not url.startswith("data:"):
+             return f"Error: URL must be absolute (http/https), a data URI, or a valid local file path. Received: {url[:50]}..."
+        api_key = os.getenv("AI_TOKEN") or os.getenv("OPENROUTER_API_KEY")
+        if not api_key:
+            return "Error: AI_TOKEN not found."
+        # Use OpenRouter API to analyze the image
+        headers = {
+            "Authorization": f"Bearer {api_key}",
+            "Content-Type": "application/json"
+        }
+        payload = {
+            "model": "google/gemini-2.0-flash-lite-001",
+            "messages": [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": prompt},
+                        {"type": "image_url", "image_url": {"url": url}}
+                    ]
+                }
+            ]
+        }
+        response = requests.post("https://aipipe.org/openrouter/v1/chat/completions", headers=headers, json=payload)
+        response.raise_for_status()
+        result = response.json()
+        description = result['choices'][0]['message']['content']
+        logger.info(f"IMAGE ANALYSIS OUTPUT: {description}")
+        return description
+    except Exception as e:
+        logger.error(f"Error analyzing image: {e}")
+        return f"Error analyzing image: {e}"
+def call_api(url: str, method: str = "GET", headers: dict = None, json_data: dict = None) -> str:
+    """
+    Makes an HTTP request to an external API.
+    Useful for sourcing data from APIs as required by the quiz.
+    """
+    try:
+        logger.info(f"Calling API: {method} {url}")
+        if not url.startswith("http"):
+             return f"Error: URL must be absolute. Received: {url}"
+        response = requests.request(method, url, headers=headers, json=json_data)
+        try:
+            return json.dumps(response.json(), indent=2)
+        except:
+            return response.text
+    except Exception as e:
+        logger.error(f"Error calling API: {e}")
+        return f"Error calling API: {e}"
+def fetch_page_text(url: str) -> dict:
+    """
+    Fetches the text content of a web page using Playwright.
+    Also extracts links, audio sources, and takes a screenshot if visual elements are present.
+    Args:
+        url (str): The URL of the page to fetch.
+    Returns:
+        dict: A dictionary containing the 'content' (text + links/media) or 'error'.
+    """
+    result = {}
+    try:
+        logger.info(f"Fetching page text from: {url}")
+        if not url.startswith("http"):
+            result["error"] = f"Error: URL must be absolute. Received: {url}"
+            return result
+        def run_playwright():
+            try:
+                with sync_playwright() as p:
+                    browser = p.chromium.launch(headless=True)
+                    page = browser.new_page()
+                    page.goto(url)
+                    page.wait_for_load_state("networkidle")
+                    html_content = page.content()
+                    # Screenshot Logic
+                    images = []
+                    try:
+                        has_visuals = page.evaluate("() => document.querySelectorAll('canvas, img').length > 0")
+                        if has_visuals:
+                            import uuid
+                            unique_id = str(uuid.uuid4())[:8]
+                            screenshot_path = f"/tmp/screenshot_{unique_id}.jpg"
+                            page.screenshot(path=screenshot_path, full_page=True, type='jpeg', quality=50)
+                            images.append(f"Image: [Page Screenshot]({screenshot_path})")
+                            logger.info(f"Visual elements detected. Saved screenshot to {screenshot_path}")
+                    except Exception as e:
+                        logger.warning(f"Error handling screenshot in fetch_page_text: {e}")
+                    browser.close()
+                    # Parse with BS4
+                    soup = BeautifulSoup(html_content, 'html.parser')
+                    text_content = soup.get_text(separator='\n', strip=True)
+                    links = []
+                    for a in soup.find_all('a', href=True):
+                        href = a['href']
+                        links.append(f"Link: [{a.get_text(strip=True)}]({href})")
+                    audio_sources = []
+                    for audio in soup.find_all('audio'):
+                        if audio.get('src'):
+                            audio_sources.append(f"Audio: {audio['src']}")
+                        for source in audio.find_all('source', src=True):
+                            audio_sources.append(f"Audio: {source['src']}")
+                    result["content"] = text_content + "\n\n--- Extracted Links & Media ---\n" + "\n".join(links + audio_sources + images)
+            except Exception as e:
+                result["error"] = str(e)
+        thread = threading.Thread(target=run_playwright)
+        thread.start()
+        thread.join()
+        if "error" in result:
+            logger.error(f"Error fetching page: {result['error']}")
+            return f"Error fetching page: {result['error']}"
+        content = result.get("content", "")
+        logger.info(f"Fetched content (first 100 chars): {content[:100]}")
+        return content
+    except Exception as e:
+        logger.error(f"Error fetching page: {e}")
+        return f"Error fetching page: {e}"
+def fetch_page_scripts(url: str) -> str:
+    """
+    Fetches only the scripts (inline and src) from a web page.
+    Useful when the page mentions embedded logic or hidden code.
+    """
+    result = {}
+    try:
+        logger.info(f"Fetching page scripts from: {url}")
+        if not url.startswith("http"):
+            return f"Error: URL must be absolute. Received: {url}"
+        def run_playwright():
+            try:
+                with sync_playwright() as p:
+                    browser = p.chromium.launch(headless=True)
+                    page = browser.new_page()
+                    page.goto(url)
+                    page.wait_for_load_state("networkidle")
+                    html_content = page.content()
+                    browser.close()
+                    soup = BeautifulSoup(html_content, 'html.parser')
+                    scripts = []
+                    for script in soup.find_all('script'):
+                        if script.get('src'):
+                            scripts.append(f"Script Source: {script['src']}")
+                        elif script.string and script.string.strip():
+                            scripts.append(f"Inline Script: {script.string.strip()[:2000]}")
+                    result["content"] = "--- Extracted Scripts ---\n" + "\n".join(scripts)
+            except Exception as e:
+                result["error"] = str(e)
+        thread = threading.Thread(target=run_playwright)
+        thread.start()
+        thread.join()
+        if "error" in result:
+            return f"Error fetching scripts: {result['error']}"
+        return result.get("content", "No scripts found.")
+    except Exception as e:
+        return f"Error fetching scripts: {e}"