Spaces:

Navyssh
/

TDS-Project2

Sleeping

App Files Files Community

Navyssh commited on Nov 28, 2025

Commit

29f4cdf

verified ·

1 Parent(s): fb78f9c

Upload 22 files

Browse files

Files changed (22) hide show

Dockerfile +33 -0
agent.py +156 -0
main.py +55 -0
requirements.txt +96 -0
tools/__init__.py +5 -0
tools/__pycache__/__init__.cpython-312.pyc +0 -0
tools/__pycache__/__init__.cpython-313.pyc +0 -0
tools/__pycache__/add_dependencies.cpython-312.pyc +0 -0
tools/__pycache__/add_dependencies.cpython-313.pyc +0 -0
tools/__pycache__/download_file.cpython-312.pyc +0 -0
tools/__pycache__/download_file.cpython-313.pyc +0 -0
tools/__pycache__/run_code.cpython-312.pyc +0 -0
tools/__pycache__/run_code.cpython-313.pyc +0 -0
tools/__pycache__/send_request.cpython-312.pyc +0 -0
tools/__pycache__/send_request.cpython-313.pyc +0 -0
tools/__pycache__/web_scraper.cpython-312.pyc +0 -0
tools/__pycache__/web_scraper.cpython-313.pyc +0 -0
tools/add_dependencies.py +38 -0
tools/download_file.py +31 -0
tools/run_code.py +70 -0
tools/send_request.py +64 -0
tools/web_scraper.py +46 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,33 @@

+FROM python:3.13-slim
+# --- System deps required by Playwright browsers ---
+RUN apt-get update && apt-get install -y \
+    wget gnupg ca-certificates curl unzip \
+    libnss3 libatk1.0-0 libatk-bridge2.0-0 libcups2 libxkbcommon0 \
+    libgtk-3-0 libgbm1 libasound2 libxcomposite1 libxdamage1 libxrandr2 \
+    libxfixes3 libpango-1.0-0 libcairo2 \
+    && rm -rf /var/lib/apt/lists/*
+# --- Install Playwright + Chromium ---
+RUN pip install playwright && playwright install --with-deps chromium
+# --- Install uv package manager ---
+RUN pip install uv
+# --- Copy app to container ---
+WORKDIR /app
+COPY . .
+ENV PYTHONUNBUFFERED=1
+ENV PYTHONIOENCODING=utf-8
+# --- Install project dependencies using uv ---
+RUN uv sync --frozen
+# HuggingFace Spaces exposes port 7860
+EXPOSE 7860
+# --- Run your FastAPI app ---
+# uvicorn must be in pyproject dependencies
+CMD ["uv", "run", "main.py"]

agent.py ADDED Viewed

	@@ -0,0 +1,156 @@

+from langgraph.graph import StateGraph, END, START
+from langchain_core.rate_limiters import InMemoryRateLimiter
+from langgraph.prebuilt import ToolNode
+from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
+from tools import get_rendered_html, download_file, post_request, run_code, add_dependencies
+from typing import TypedDict, Annotated, List, Any
+from langchain.chat_models import init_chat_model
+from langgraph.graph.message import add_messages
+import os
+from dotenv import load_dotenv
+load_dotenv()
+EMAIL = os.getenv("EMAIL")
+SECRET = os.getenv("SECRET")
+RECURSION_LIMIT =  5000
+# -------------------------------------------------
+# STATE
+# -------------------------------------------------
+class AgentState(TypedDict):
+    messages: Annotated[List, add_messages]
+TOOLS = [run_code, get_rendered_html, download_file, post_request, add_dependencies]
+# -------------------------------------------------
+# GEMINI LLM
+# -------------------------------------------------
+rate_limiter = InMemoryRateLimiter(
+    requests_per_second=9/60,
+    check_every_n_seconds=1,
+    max_bucket_size=9
+)
+llm = init_chat_model(
+   model_provider="google_genai",
+   model="gemini-2.5-flash",
+   rate_limiter=rate_limiter
+).bind_tools(TOOLS)
+# -------------------------------------------------
+# SYSTEM PROMPT
+# -------------------------------------------------
+SYSTEM_PROMPT = f"""
+You are an autonomous quiz-solving agent.
+Your job is to:
+1. Load the quiz page from the given URL.
+2. Extract ALL instructions, required parameters, submission rules, and the submit endpoint.
+3. Solve the task exactly as required.
+4. Submit the answer ONLY to the endpoint specified on the current page (never make up URLs).
+5. Read the server response and:
+   - If it contains a new quiz URL → fetch it immediately and continue.
+   - If no new URL is present → return "END".
+STRICT RULES — FOLLOW EXACTLY:
+GENERAL RULES:
+- NEVER stop early. Continue solving tasks until no new URL is provided.
+- NEVER hallucinate URLs, endpoints, fields, values, or JSON structure.
+- NEVER shorten or modify URLs. Always submit the full URL.
+- NEVER re-submit unless the server explicitly allows or it's within the 3-minute limit.
+- ALWAYS inspect the server response before deciding what to do next.
+- ALWAYS use the tools provided to fetch, scrape, download, render HTML, or send requests.
+TIME LIMIT RULES:
+- Each task has a hard 3-minute limit.
+- The server response includes a "delay" field indicating elapsed time.
+- If your answer is wrong retry again.
+STOPPING CONDITION:
+- Only return "END" when a server response explicitly contains NO new URL.
+- DO NOT return END under any other condition.
+ADDITIONAL INFORMATION YOU MUST INCLUDE WHEN REQUIRED:
+- Email: {EMAIL}
+- Secret: {SECRET}
+YOUR JOB:
+- Follow pages exactly.
+- Extract data reliably.
+- Never guess.
+- Submit correct answers.
+- Continue until no new URL.
+- Then respond with: END
+"""
+prompt = ChatPromptTemplate.from_messages([
+    ("system", SYSTEM_PROMPT),
+    MessagesPlaceholder(variable_name="messages")
+])
+llm_with_prompt = prompt | llm
+# -------------------------------------------------
+# AGENT NODE
+# -------------------------------------------------
+def agent_node(state: AgentState):
+    result = llm_with_prompt.invoke({"messages": state["messages"]})
+    return {"messages": state["messages"] + [result]}
+# -------------------------------------------------
+# GRAPH
+# -------------------------------------------------
+def route(state):
+    last = state["messages"][-1]
+    # support both objects (with attributes) and plain dicts
+    tool_calls = None
+    if hasattr(last, "tool_calls"):
+        tool_calls = getattr(last, "tool_calls", None)
+    elif isinstance(last, dict):
+        tool_calls = last.get("tool_calls")
+    if tool_calls:
+        return "tools"
+    # get content robustly
+    content = None
+    if hasattr(last, "content"):
+        content = getattr(last, "content", None)
+    elif isinstance(last, dict):
+        content = last.get("content")
+    if isinstance(content, str) and content.strip() == "END":
+        return END
+    if isinstance(content, list) and content[0].get("text").strip() == "END":
+        return END
+    return "agent"
+graph = StateGraph(AgentState)
+graph.add_node("agent", agent_node)
+graph.add_node("tools", ToolNode(TOOLS))
+graph.add_edge(START, "agent")
+graph.add_edge("tools", "agent")
+graph.add_conditional_edges(
+    "agent",
+    route
+)
+app = graph.compile()
+# -------------------------------------------------
+# TEST
+# -------------------------------------------------
+def run_agent(url: str) -> str:
+    app.invoke({
+        "messages": [{"role": "user", "content": url}]},
+        config={"recursion_limit": RECURSION_LIMIT},
+    )
+    print("Tasks completed succesfully")

main.py ADDED Viewed

	@@ -0,0 +1,55 @@

+from fastapi import FastAPI, Request, BackgroundTasks
+from fastapi.responses import JSONResponse
+from fastapi.exceptions import HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from agent import run_agent
+from dotenv import load_dotenv
+import uvicorn
+import os
+import time
+load_dotenv()
+EMAIL = os.getenv("EMAIL")
+SECRET = os.getenv("SECRET")
+app = FastAPI()
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # or specific domains
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+START_TIME = time.time()
+@app.get("/healthz")
+def healthz():
+    """Simple liveness check."""
+    return {
+        "status": "ok",
+        "uptime_seconds": int(time.time() - START_TIME)
+    }
+@app.post("/solve")
+async def solve(request: Request, background_tasks: BackgroundTasks):
+    try:
+        data = await request.json()
+    except Exception:
+        raise HTTPException(status_code=400, detail="Invalid JSON")
+    if not data:
+        raise HTTPException(status_code=400, detail="Invalid JSON")
+    url = data.get("url")
+    secret = data.get("secret")
+    if not url or not secret:
+        raise HTTPException(status_code=400, detail="Invalid JSON")
+    if secret != SECRET:
+        raise HTTPException(status_code=403, detail="Invalid secret")
+    print("Verified starting the task...")
+    background_tasks.add_task(run_agent, url)
+    return JSONResponse(status_code=200, content={"status": "ok"})
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=7860)

requirements.txt ADDED Viewed

	@@ -0,0 +1,96 @@

+annotated-doc==0.0.4
+annotated-types==0.7.0
+anyio==4.11.0
+beautifulsoup4==4.14.2
+cachetools==6.2.2
+certifi==2025.11.12
+charset-normalizer==3.4.4
+click==8.3.1
+cohere==5.20.0
+contourpy==1.3.3
+cycler==0.12.1
+distro==1.9.0
+fastapi==0.122.0
+fastavro==1.12.1
+filelock==3.20.0
+filetype==1.2.0
+fonttools==4.60.1
+fsspec==2025.10.0
+google-ai-generativelanguage==0.9.0
+google-api-core==2.28.1
+google-api-python-client==2.187.0
+google-auth==2.43.0
+google-auth-httplib2==0.2.1
+google-genai==1.52.0
+google-generativeai==0.8.5
+googleapis-common-protos==1.72.0
+greenlet==3.2.4
+groq==0.36.0
+grpcio==1.76.0
+grpcio-status==1.71.2
+h11==0.16.0
+hf-xet==1.2.0
+httpcore==1.0.9
+httplib2==0.31.0
+httpx==0.28.1
+httpx-sse==0.4.0
+huggingface_hub==1.1.5
+idna==3.11
+jiter==0.12.0
+jsonpatch==1.33
+jsonpointer==3.0.0
+kiwisolver==1.4.9
+langchain==1.1.0
+langchain-core==1.1.0
+langchain-google-genai==3.2.0
+langgraph==1.0.4
+langgraph-checkpoint==3.0.1
+langgraph-prebuilt==1.0.5
+langgraph-sdk==0.2.10
+langsmith==0.4.49
+markdownify==1.2.2
+matplotlib==3.10.7
+numpy==2.3.5
+openai==2.8.1
+orjson==3.11.4
+ormsgpack==1.12.0
+packaging==25.0
+pandas==2.3.3
+pillow==12.0.0
+playwright==1.56.0
+proto-plus==1.26.1
+protobuf==5.29.5
+pyasn1==0.6.1
+pyasn1_modules==0.4.2
+pydantic==2.12.5
+pydantic_core==2.41.5
+pyee==13.0.0
+pyparsing==3.2.5
+PyPDF2==3.0.1
+python-dateutil==2.9.0.post0
+python-dotenv==1.2.1
+pytz==2025.2
+PyYAML==6.0.3
+requests==2.32.5
+requests-toolbelt==1.0.0
+rsa==4.9.1
+shellingham==1.5.4
+six==1.17.0
+sniffio==1.3.1
+soupsieve==2.8
+starlette==0.50.0
+tabula-py==2.10.0
+tenacity==9.1.2
+tokenizers==0.22.1
+tqdm==4.67.1
+typer-slim==0.20.0
+types-requests==2.32.4.20250913
+typing-inspection==0.4.2
+typing_extensions==4.15.0
+tzdata==2025.2
+uritemplate==4.2.0
+urllib3==2.5.0
+uvicorn==0.38.0
+websockets==15.0.1
+xxhash==3.6.0
+zstandard==0.25.0

tools/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from .web_scraper import get_rendered_html
+from .run_code import run_code
+from .send_request import post_request
+from .download_file import download_file
+from .add_dependencies import add_dependencies

tools/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (383 Bytes). View file

tools/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (380 Bytes). View file

tools/__pycache__/add_dependencies.cpython-312.pyc ADDED Viewed

Binary file (1.64 kB). View file

tools/__pycache__/add_dependencies.cpython-313.pyc ADDED Viewed

Binary file (1.61 kB). View file

tools/__pycache__/download_file.cpython-312.pyc ADDED Viewed

Binary file (1.66 kB). View file

tools/__pycache__/download_file.cpython-313.pyc ADDED Viewed

Binary file (1.64 kB). View file

tools/__pycache__/run_code.cpython-312.pyc ADDED Viewed

Binary file (2.56 kB). View file

tools/__pycache__/run_code.cpython-313.pyc ADDED Viewed

Binary file (2.51 kB). View file

tools/__pycache__/send_request.cpython-312.pyc ADDED Viewed

Binary file (3.15 kB). View file

tools/__pycache__/send_request.cpython-313.pyc ADDED Viewed

Binary file (3.11 kB). View file

tools/__pycache__/web_scraper.cpython-312.pyc ADDED Viewed

Binary file (2.01 kB). View file

tools/__pycache__/web_scraper.cpython-313.pyc ADDED Viewed

Binary file (1.94 kB). View file

tools/add_dependencies.py ADDED Viewed

	@@ -0,0 +1,38 @@

+from typing import List
+from langchain_core.tools import tool
+import subprocess
+@tool
+def add_dependencies(dependencies: List[str]) -> str:
+    """
+    Install the given Python packages into the environment.
+    Parameters:
+        dependencies (List[str]):
+            A list of Python package names to install. Each name must match the
+            corresponding package name on PyPI.
+    Returns:
+        str:
+            A message indicating success or failure.
+    """
+    try:
+        subprocess.check_call(
+            ["uv", "add"] + dependencies,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            text=True
+        )
+        return "Successfully installed dependencies: " + ", ".join(dependencies)
+    except subprocess.CalledProcessError as e:
+        return (
+            "Dependency installation failed.\n"
+            f"Exit code: {e.returncode}\n"
+            f"Error: {e.stderr or 'No error output.'}"
+        )
+    except Exception as e:
+        return f"Unexpected error while installing dependencies: {e}"

tools/download_file.py ADDED Viewed

	@@ -0,0 +1,31 @@

+from langchain_core.tools import tool
+import requests
+import os
+@tool
+def download_file(url: str, filename: str) -> str:
+    """
+    Download a file from a URL and save it with the given filename
+    in the current working directory.
+    Args:
+        url (str): Direct URL to the file.
+        filename (str): The filename to save the downloaded content as.
+    Returns:
+        str: Full path to the saved file.
+    """
+    try:
+        response = requests.get(url, stream=True)
+        response.raise_for_status()
+        directory_name = "LLMFiles"
+        os.makedirs(directory_name, exist_ok=True)
+        path = os.path.join(directory_name, filename)
+        with open(path, "wb") as f:
+            for chunk in response.iter_content(chunk_size=8192):
+                if chunk:
+                    f.write(chunk)
+        return filename
+    except Exception as e:
+        return f"Error downloading file: {str(e)}"

tools/run_code.py ADDED Viewed

	@@ -0,0 +1,70 @@

+from google import genai
+import subprocess
+from langchain_core.tools import tool
+from dotenv import load_dotenv
+import os
+from google.genai import types
+load_dotenv()
+client = genai.Client()
+def strip_code_fences(code: str) -> str:
+    code = code.strip()
+    # Remove ```python ... ``` or ``` ... ```
+    if code.startswith("```"):
+        # remove first line (```python or ```)
+        code = code.split("\n", 1)[1]
+    if code.endswith("```"):
+        code = code.rsplit("\n", 1)[0]
+    return code.strip()
+@tool
+def run_code(code: str) -> dict:
+    """
+    Executes a Python code
+    This tool:
+      1. Takes in python code as input
+      3. Writes code into a temporary .py file
+      4. Executes the file
+      5. Returns its output
+    Parameters
+    ----------
+    code : str
+        Python source code to execute.
+    Returns
+    -------
+    dict
+        {
+            "stdout": <program output>,
+            "stderr": <errors if any>,
+            "return_code": <exit code>
+        }
+    """
+    try:
+        filename = "runner.py"
+        os.makedirs("LLMFiles", exist_ok=True)
+        with open(os.path.join("LLMFiles", filename), "w") as f:
+            f.write(code)
+        proc = subprocess.Popen(
+            ["uv", "run", filename],
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            text=True,
+            cwd="LLMFiles"
+        )
+        stdout, stderr = proc.communicate()
+        # --- Step 4: Return everything ---
+        return {
+            "stdout": stdout,
+            "stderr": stderr,
+            "return_code": proc.returncode
+        }
+    except Exception as e:
+        return {
+            "stdout": "",
+            "stderr": str(e),
+            "return_code": -1
+        }

tools/send_request.py ADDED Viewed

	@@ -0,0 +1,64 @@

+from langchain_core.tools import tool
+import requests
+import json
+from typing import Any, Dict, Optional
+@tool
+def post_request(url: str, payload: Dict[str, Any], headers: Optional[Dict[str, str]] = None) -> Any:
+    """
+    Send an HTTP POST request to the given URL with the provided payload.
+    This function is designed for LangGraph applications, where it can be wrapped
+    as a Tool or used inside a Runnable to call external APIs, webhooks, or backend
+    services during graph execution.
+    REMEMBER: This a blocking function so it may take a while to return. Wait for the response.
+    Args:
+        url (str): The endpoint to send the POST request to.
+        payload (Dict[str, Any]): The JSON-serializable request body.
+        headers (Optional[Dict[str, str]]): Optional HTTP headers to include
+            in the request. If omitted, a default JSON header is applied.
+    Returns:
+        Any: The response body. If the server returns JSON, a parsed dict is
+        returned. Otherwise, the raw text response is returned.
+    Raises:
+        requests.HTTPError: If the server responds with an unsuccessful status.
+        requests.RequestException: For network-related errors.
+    """
+    headers = headers or {"Content-Type": "application/json"}
+    try:
+        print(f"\nSending Answer \n{json.dumps(payload, indent=4)}\n to url: {url}")
+        response = requests.post(url, json=payload, headers=headers)
+        # Raise on 4xx/5xx
+        response.raise_for_status()
+        # Try to return JSON, fallback to raw text
+        data = response.json()
+        delay = data.get("delay", 0)
+        delay = delay if isinstance(delay, (int, float)) else 0
+        correct = data.get("correct")
+        if not correct and delay < 180:
+            del data["url"]
+        if delay >= 180:
+            data = {
+                "url": data.get("url")
+            }
+        print("Got the response: \n", json.dumps(data, indent=4), '\n')
+        return data
+    except requests.HTTPError as e:
+        # Extract server’s error response
+        err_resp = e.response
+        try:
+            err_data = err_resp.json()
+        except ValueError:
+            err_data = err_resp.text
+        print("HTTP Error Response:\n", err_data)
+        return err_data
+    except Exception as e:
+        print("Unexpected error:", e)
+        return str(e)

tools/web_scraper.py ADDED Viewed

	@@ -0,0 +1,46 @@

+from langchain_core.tools import tool
+from playwright.sync_api import sync_playwright
+from bs4 import BeautifulSoup
+@tool
+def get_rendered_html(url: str) -> str:
+    """
+    Fetch and return the fully rendered HTML of a webpage.
+    This function uses Playwright to load a webpage in a headless Chromium
+    browser, allowing all JavaScript on the page to execute. Use this for
+    dynamic websites that require rendering.
+    IMPORTANT RESTRICTIONS:
+    - ONLY use this for actual HTML webpages (articles, documentation, dashboards).
+    - DO NOT use this for direct file links (URLs ending in .csv, .pdf, .zip, .png).
+      Playwright cannot render these and will crash. Use the 'download_file' tool instead.
+    Parameters
+    ----------
+    url : str
+        The URL of the webpage to retrieve and render.
+    Returns
+    -------
+    str
+        The fully rendered and cleaned HTML content.
+    """
+    # ... existing code ...
+    print("\nFetching and rendering:", url)
+    try:
+        with sync_playwright() as p:
+            browser = p.chromium.launch(headless=True)
+            page = browser.new_page()
+            # Load the page (let JS execute)
+            page.goto(url, wait_until="networkidle")
+            # Extract rendered HTML
+            content = page.content()
+            browser.close()
+            return content
+    except Exception as e:
+        return f"Error fetching/rendering page: {str(e)}"