Navyssh commited on
Commit
29f4cdf
·
verified ·
1 Parent(s): fb78f9c

Upload 22 files

Browse files
Dockerfile ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.13-slim
2
+
3
+ # --- System deps required by Playwright browsers ---
4
+ RUN apt-get update && apt-get install -y \
5
+ wget gnupg ca-certificates curl unzip \
6
+ libnss3 libatk1.0-0 libatk-bridge2.0-0 libcups2 libxkbcommon0 \
7
+ libgtk-3-0 libgbm1 libasound2 libxcomposite1 libxdamage1 libxrandr2 \
8
+ libxfixes3 libpango-1.0-0 libcairo2 \
9
+ && rm -rf /var/lib/apt/lists/*
10
+
11
+ # --- Install Playwright + Chromium ---
12
+ RUN pip install playwright && playwright install --with-deps chromium
13
+
14
+ # --- Install uv package manager ---
15
+ RUN pip install uv
16
+
17
+ # --- Copy app to container ---
18
+ WORKDIR /app
19
+
20
+ COPY . .
21
+
22
+ ENV PYTHONUNBUFFERED=1
23
+ ENV PYTHONIOENCODING=utf-8
24
+
25
+ # --- Install project dependencies using uv ---
26
+ RUN uv sync --frozen
27
+
28
+ # HuggingFace Spaces exposes port 7860
29
+ EXPOSE 7860
30
+
31
+ # --- Run your FastAPI app ---
32
+ # uvicorn must be in pyproject dependencies
33
+ CMD ["uv", "run", "main.py"]
agent.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langgraph.graph import StateGraph, END, START
2
+ from langchain_core.rate_limiters import InMemoryRateLimiter
3
+ from langgraph.prebuilt import ToolNode
4
+ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
5
+ from tools import get_rendered_html, download_file, post_request, run_code, add_dependencies
6
+ from typing import TypedDict, Annotated, List, Any
7
+ from langchain.chat_models import init_chat_model
8
+ from langgraph.graph.message import add_messages
9
+ import os
10
+ from dotenv import load_dotenv
11
+ load_dotenv()
12
+
13
+ EMAIL = os.getenv("EMAIL")
14
+ SECRET = os.getenv("SECRET")
15
+ RECURSION_LIMIT = 5000
16
+ # -------------------------------------------------
17
+ # STATE
18
+ # -------------------------------------------------
19
+ class AgentState(TypedDict):
20
+ messages: Annotated[List, add_messages]
21
+
22
+
23
+ TOOLS = [run_code, get_rendered_html, download_file, post_request, add_dependencies]
24
+
25
+
26
+ # -------------------------------------------------
27
+ # GEMINI LLM
28
+ # -------------------------------------------------
29
+ rate_limiter = InMemoryRateLimiter(
30
+ requests_per_second=9/60,
31
+ check_every_n_seconds=1,
32
+ max_bucket_size=9
33
+ )
34
+ llm = init_chat_model(
35
+ model_provider="google_genai",
36
+ model="gemini-2.5-flash",
37
+ rate_limiter=rate_limiter
38
+ ).bind_tools(TOOLS)
39
+
40
+
41
+ # -------------------------------------------------
42
+ # SYSTEM PROMPT
43
+ # -------------------------------------------------
44
+ SYSTEM_PROMPT = f"""
45
+ You are an autonomous quiz-solving agent.
46
+
47
+ Your job is to:
48
+ 1. Load the quiz page from the given URL.
49
+ 2. Extract ALL instructions, required parameters, submission rules, and the submit endpoint.
50
+ 3. Solve the task exactly as required.
51
+ 4. Submit the answer ONLY to the endpoint specified on the current page (never make up URLs).
52
+ 5. Read the server response and:
53
+ - If it contains a new quiz URL → fetch it immediately and continue.
54
+ - If no new URL is present → return "END".
55
+
56
+ STRICT RULES — FOLLOW EXACTLY:
57
+
58
+ GENERAL RULES:
59
+ - NEVER stop early. Continue solving tasks until no new URL is provided.
60
+ - NEVER hallucinate URLs, endpoints, fields, values, or JSON structure.
61
+ - NEVER shorten or modify URLs. Always submit the full URL.
62
+ - NEVER re-submit unless the server explicitly allows or it's within the 3-minute limit.
63
+ - ALWAYS inspect the server response before deciding what to do next.
64
+ - ALWAYS use the tools provided to fetch, scrape, download, render HTML, or send requests.
65
+
66
+ TIME LIMIT RULES:
67
+ - Each task has a hard 3-minute limit.
68
+ - The server response includes a "delay" field indicating elapsed time.
69
+ - If your answer is wrong retry again.
70
+
71
+ STOPPING CONDITION:
72
+ - Only return "END" when a server response explicitly contains NO new URL.
73
+ - DO NOT return END under any other condition.
74
+
75
+ ADDITIONAL INFORMATION YOU MUST INCLUDE WHEN REQUIRED:
76
+ - Email: {EMAIL}
77
+ - Secret: {SECRET}
78
+
79
+ YOUR JOB:
80
+ - Follow pages exactly.
81
+ - Extract data reliably.
82
+ - Never guess.
83
+ - Submit correct answers.
84
+ - Continue until no new URL.
85
+ - Then respond with: END
86
+ """
87
+
88
+ prompt = ChatPromptTemplate.from_messages([
89
+ ("system", SYSTEM_PROMPT),
90
+ MessagesPlaceholder(variable_name="messages")
91
+ ])
92
+
93
+ llm_with_prompt = prompt | llm
94
+
95
+
96
+ # -------------------------------------------------
97
+ # AGENT NODE
98
+ # -------------------------------------------------
99
+ def agent_node(state: AgentState):
100
+ result = llm_with_prompt.invoke({"messages": state["messages"]})
101
+ return {"messages": state["messages"] + [result]}
102
+
103
+
104
+ # -------------------------------------------------
105
+ # GRAPH
106
+ # -------------------------------------------------
107
+ def route(state):
108
+ last = state["messages"][-1]
109
+ # support both objects (with attributes) and plain dicts
110
+ tool_calls = None
111
+ if hasattr(last, "tool_calls"):
112
+ tool_calls = getattr(last, "tool_calls", None)
113
+ elif isinstance(last, dict):
114
+ tool_calls = last.get("tool_calls")
115
+
116
+ if tool_calls:
117
+ return "tools"
118
+ # get content robustly
119
+ content = None
120
+ if hasattr(last, "content"):
121
+ content = getattr(last, "content", None)
122
+ elif isinstance(last, dict):
123
+ content = last.get("content")
124
+
125
+ if isinstance(content, str) and content.strip() == "END":
126
+ return END
127
+ if isinstance(content, list) and content[0].get("text").strip() == "END":
128
+ return END
129
+ return "agent"
130
+ graph = StateGraph(AgentState)
131
+
132
+ graph.add_node("agent", agent_node)
133
+ graph.add_node("tools", ToolNode(TOOLS))
134
+
135
+
136
+
137
+ graph.add_edge(START, "agent")
138
+ graph.add_edge("tools", "agent")
139
+ graph.add_conditional_edges(
140
+ "agent",
141
+ route
142
+ )
143
+
144
+ app = graph.compile()
145
+
146
+
147
+ # -------------------------------------------------
148
+ # TEST
149
+ # -------------------------------------------------
150
+ def run_agent(url: str) -> str:
151
+ app.invoke({
152
+ "messages": [{"role": "user", "content": url}]},
153
+ config={"recursion_limit": RECURSION_LIMIT},
154
+ )
155
+ print("Tasks completed succesfully")
156
+
main.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Request, BackgroundTasks
2
+ from fastapi.responses import JSONResponse
3
+ from fastapi.exceptions import HTTPException
4
+ from fastapi.middleware.cors import CORSMiddleware
5
+ from agent import run_agent
6
+ from dotenv import load_dotenv
7
+ import uvicorn
8
+ import os
9
+ import time
10
+
11
+ load_dotenv()
12
+
13
+ EMAIL = os.getenv("EMAIL")
14
+ SECRET = os.getenv("SECRET")
15
+
16
+ app = FastAPI()
17
+ app.add_middleware(
18
+ CORSMiddleware,
19
+ allow_origins=["*"], # or specific domains
20
+ allow_credentials=True,
21
+ allow_methods=["*"],
22
+ allow_headers=["*"],
23
+ )
24
+ START_TIME = time.time()
25
+ @app.get("/healthz")
26
+ def healthz():
27
+ """Simple liveness check."""
28
+ return {
29
+ "status": "ok",
30
+ "uptime_seconds": int(time.time() - START_TIME)
31
+ }
32
+
33
+ @app.post("/solve")
34
+ async def solve(request: Request, background_tasks: BackgroundTasks):
35
+ try:
36
+ data = await request.json()
37
+ except Exception:
38
+ raise HTTPException(status_code=400, detail="Invalid JSON")
39
+ if not data:
40
+ raise HTTPException(status_code=400, detail="Invalid JSON")
41
+ url = data.get("url")
42
+ secret = data.get("secret")
43
+ if not url or not secret:
44
+ raise HTTPException(status_code=400, detail="Invalid JSON")
45
+
46
+ if secret != SECRET:
47
+ raise HTTPException(status_code=403, detail="Invalid secret")
48
+ print("Verified starting the task...")
49
+ background_tasks.add_task(run_agent, url)
50
+
51
+ return JSONResponse(status_code=200, content={"status": "ok"})
52
+
53
+
54
+ if __name__ == "__main__":
55
+ uvicorn.run(app, host="0.0.0.0", port=7860)
requirements.txt ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ annotated-doc==0.0.4
2
+ annotated-types==0.7.0
3
+ anyio==4.11.0
4
+ beautifulsoup4==4.14.2
5
+ cachetools==6.2.2
6
+ certifi==2025.11.12
7
+ charset-normalizer==3.4.4
8
+ click==8.3.1
9
+ cohere==5.20.0
10
+ contourpy==1.3.3
11
+ cycler==0.12.1
12
+ distro==1.9.0
13
+ fastapi==0.122.0
14
+ fastavro==1.12.1
15
+ filelock==3.20.0
16
+ filetype==1.2.0
17
+ fonttools==4.60.1
18
+ fsspec==2025.10.0
19
+ google-ai-generativelanguage==0.9.0
20
+ google-api-core==2.28.1
21
+ google-api-python-client==2.187.0
22
+ google-auth==2.43.0
23
+ google-auth-httplib2==0.2.1
24
+ google-genai==1.52.0
25
+ google-generativeai==0.8.5
26
+ googleapis-common-protos==1.72.0
27
+ greenlet==3.2.4
28
+ groq==0.36.0
29
+ grpcio==1.76.0
30
+ grpcio-status==1.71.2
31
+ h11==0.16.0
32
+ hf-xet==1.2.0
33
+ httpcore==1.0.9
34
+ httplib2==0.31.0
35
+ httpx==0.28.1
36
+ httpx-sse==0.4.0
37
+ huggingface_hub==1.1.5
38
+ idna==3.11
39
+ jiter==0.12.0
40
+ jsonpatch==1.33
41
+ jsonpointer==3.0.0
42
+ kiwisolver==1.4.9
43
+ langchain==1.1.0
44
+ langchain-core==1.1.0
45
+ langchain-google-genai==3.2.0
46
+ langgraph==1.0.4
47
+ langgraph-checkpoint==3.0.1
48
+ langgraph-prebuilt==1.0.5
49
+ langgraph-sdk==0.2.10
50
+ langsmith==0.4.49
51
+ markdownify==1.2.2
52
+ matplotlib==3.10.7
53
+ numpy==2.3.5
54
+ openai==2.8.1
55
+ orjson==3.11.4
56
+ ormsgpack==1.12.0
57
+ packaging==25.0
58
+ pandas==2.3.3
59
+ pillow==12.0.0
60
+ playwright==1.56.0
61
+ proto-plus==1.26.1
62
+ protobuf==5.29.5
63
+ pyasn1==0.6.1
64
+ pyasn1_modules==0.4.2
65
+ pydantic==2.12.5
66
+ pydantic_core==2.41.5
67
+ pyee==13.0.0
68
+ pyparsing==3.2.5
69
+ PyPDF2==3.0.1
70
+ python-dateutil==2.9.0.post0
71
+ python-dotenv==1.2.1
72
+ pytz==2025.2
73
+ PyYAML==6.0.3
74
+ requests==2.32.5
75
+ requests-toolbelt==1.0.0
76
+ rsa==4.9.1
77
+ shellingham==1.5.4
78
+ six==1.17.0
79
+ sniffio==1.3.1
80
+ soupsieve==2.8
81
+ starlette==0.50.0
82
+ tabula-py==2.10.0
83
+ tenacity==9.1.2
84
+ tokenizers==0.22.1
85
+ tqdm==4.67.1
86
+ typer-slim==0.20.0
87
+ types-requests==2.32.4.20250913
88
+ typing-inspection==0.4.2
89
+ typing_extensions==4.15.0
90
+ tzdata==2025.2
91
+ uritemplate==4.2.0
92
+ urllib3==2.5.0
93
+ uvicorn==0.38.0
94
+ websockets==15.0.1
95
+ xxhash==3.6.0
96
+ zstandard==0.25.0
tools/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from .web_scraper import get_rendered_html
2
+ from .run_code import run_code
3
+ from .send_request import post_request
4
+ from .download_file import download_file
5
+ from .add_dependencies import add_dependencies
tools/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (383 Bytes). View file
 
tools/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (380 Bytes). View file
 
tools/__pycache__/add_dependencies.cpython-312.pyc ADDED
Binary file (1.64 kB). View file
 
tools/__pycache__/add_dependencies.cpython-313.pyc ADDED
Binary file (1.61 kB). View file
 
tools/__pycache__/download_file.cpython-312.pyc ADDED
Binary file (1.66 kB). View file
 
tools/__pycache__/download_file.cpython-313.pyc ADDED
Binary file (1.64 kB). View file
 
tools/__pycache__/run_code.cpython-312.pyc ADDED
Binary file (2.56 kB). View file
 
tools/__pycache__/run_code.cpython-313.pyc ADDED
Binary file (2.51 kB). View file
 
tools/__pycache__/send_request.cpython-312.pyc ADDED
Binary file (3.15 kB). View file
 
tools/__pycache__/send_request.cpython-313.pyc ADDED
Binary file (3.11 kB). View file
 
tools/__pycache__/web_scraper.cpython-312.pyc ADDED
Binary file (2.01 kB). View file
 
tools/__pycache__/web_scraper.cpython-313.pyc ADDED
Binary file (1.94 kB). View file
 
tools/add_dependencies.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+ from langchain_core.tools import tool
3
+ import subprocess
4
+
5
+
6
+ @tool
7
+ def add_dependencies(dependencies: List[str]) -> str:
8
+ """
9
+ Install the given Python packages into the environment.
10
+
11
+ Parameters:
12
+ dependencies (List[str]):
13
+ A list of Python package names to install. Each name must match the
14
+ corresponding package name on PyPI.
15
+
16
+ Returns:
17
+ str:
18
+ A message indicating success or failure.
19
+ """
20
+
21
+ try:
22
+ subprocess.check_call(
23
+ ["uv", "add"] + dependencies,
24
+ stdout=subprocess.PIPE,
25
+ stderr=subprocess.PIPE,
26
+ text=True
27
+ )
28
+ return "Successfully installed dependencies: " + ", ".join(dependencies)
29
+
30
+ except subprocess.CalledProcessError as e:
31
+ return (
32
+ "Dependency installation failed.\n"
33
+ f"Exit code: {e.returncode}\n"
34
+ f"Error: {e.stderr or 'No error output.'}"
35
+ )
36
+
37
+ except Exception as e:
38
+ return f"Unexpected error while installing dependencies: {e}"
tools/download_file.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.tools import tool
2
+ import requests
3
+ import os
4
+
5
+ @tool
6
+ def download_file(url: str, filename: str) -> str:
7
+ """
8
+ Download a file from a URL and save it with the given filename
9
+ in the current working directory.
10
+
11
+ Args:
12
+ url (str): Direct URL to the file.
13
+ filename (str): The filename to save the downloaded content as.
14
+
15
+ Returns:
16
+ str: Full path to the saved file.
17
+ """
18
+ try:
19
+ response = requests.get(url, stream=True)
20
+ response.raise_for_status()
21
+ directory_name = "LLMFiles"
22
+ os.makedirs(directory_name, exist_ok=True)
23
+ path = os.path.join(directory_name, filename)
24
+ with open(path, "wb") as f:
25
+ for chunk in response.iter_content(chunk_size=8192):
26
+ if chunk:
27
+ f.write(chunk)
28
+
29
+ return filename
30
+ except Exception as e:
31
+ return f"Error downloading file: {str(e)}"
tools/run_code.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from google import genai
2
+ import subprocess
3
+ from langchain_core.tools import tool
4
+ from dotenv import load_dotenv
5
+ import os
6
+ from google.genai import types
7
+ load_dotenv()
8
+ client = genai.Client()
9
+
10
+ def strip_code_fences(code: str) -> str:
11
+ code = code.strip()
12
+ # Remove ```python ... ``` or ``` ... ```
13
+ if code.startswith("```"):
14
+ # remove first line (```python or ```)
15
+ code = code.split("\n", 1)[1]
16
+ if code.endswith("```"):
17
+ code = code.rsplit("\n", 1)[0]
18
+ return code.strip()
19
+
20
+ @tool
21
+ def run_code(code: str) -> dict:
22
+ """
23
+ Executes a Python code
24
+ This tool:
25
+ 1. Takes in python code as input
26
+ 3. Writes code into a temporary .py file
27
+ 4. Executes the file
28
+ 5. Returns its output
29
+
30
+ Parameters
31
+ ----------
32
+ code : str
33
+ Python source code to execute.
34
+
35
+ Returns
36
+ -------
37
+ dict
38
+ {
39
+ "stdout": <program output>,
40
+ "stderr": <errors if any>,
41
+ "return_code": <exit code>
42
+ }
43
+ """
44
+ try:
45
+ filename = "runner.py"
46
+ os.makedirs("LLMFiles", exist_ok=True)
47
+ with open(os.path.join("LLMFiles", filename), "w") as f:
48
+ f.write(code)
49
+
50
+ proc = subprocess.Popen(
51
+ ["uv", "run", filename],
52
+ stdout=subprocess.PIPE,
53
+ stderr=subprocess.PIPE,
54
+ text=True,
55
+ cwd="LLMFiles"
56
+ )
57
+ stdout, stderr = proc.communicate()
58
+
59
+ # --- Step 4: Return everything ---
60
+ return {
61
+ "stdout": stdout,
62
+ "stderr": stderr,
63
+ "return_code": proc.returncode
64
+ }
65
+ except Exception as e:
66
+ return {
67
+ "stdout": "",
68
+ "stderr": str(e),
69
+ "return_code": -1
70
+ }
tools/send_request.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.tools import tool
2
+ import requests
3
+ import json
4
+ from typing import Any, Dict, Optional
5
+
6
+ @tool
7
+ def post_request(url: str, payload: Dict[str, Any], headers: Optional[Dict[str, str]] = None) -> Any:
8
+ """
9
+ Send an HTTP POST request to the given URL with the provided payload.
10
+
11
+ This function is designed for LangGraph applications, where it can be wrapped
12
+ as a Tool or used inside a Runnable to call external APIs, webhooks, or backend
13
+ services during graph execution.
14
+ REMEMBER: This a blocking function so it may take a while to return. Wait for the response.
15
+ Args:
16
+ url (str): The endpoint to send the POST request to.
17
+ payload (Dict[str, Any]): The JSON-serializable request body.
18
+ headers (Optional[Dict[str, str]]): Optional HTTP headers to include
19
+ in the request. If omitted, a default JSON header is applied.
20
+
21
+ Returns:
22
+ Any: The response body. If the server returns JSON, a parsed dict is
23
+ returned. Otherwise, the raw text response is returned.
24
+
25
+ Raises:
26
+ requests.HTTPError: If the server responds with an unsuccessful status.
27
+ requests.RequestException: For network-related errors.
28
+ """
29
+ headers = headers or {"Content-Type": "application/json"}
30
+ try:
31
+ print(f"\nSending Answer \n{json.dumps(payload, indent=4)}\n to url: {url}")
32
+ response = requests.post(url, json=payload, headers=headers)
33
+
34
+ # Raise on 4xx/5xx
35
+ response.raise_for_status()
36
+
37
+ # Try to return JSON, fallback to raw text
38
+ data = response.json()
39
+ delay = data.get("delay", 0)
40
+ delay = delay if isinstance(delay, (int, float)) else 0
41
+ correct = data.get("correct")
42
+ if not correct and delay < 180:
43
+ del data["url"]
44
+ if delay >= 180:
45
+ data = {
46
+ "url": data.get("url")
47
+ }
48
+ print("Got the response: \n", json.dumps(data, indent=4), '\n')
49
+ return data
50
+ except requests.HTTPError as e:
51
+ # Extract server’s error response
52
+ err_resp = e.response
53
+
54
+ try:
55
+ err_data = err_resp.json()
56
+ except ValueError:
57
+ err_data = err_resp.text
58
+
59
+ print("HTTP Error Response:\n", err_data)
60
+ return err_data
61
+
62
+ except Exception as e:
63
+ print("Unexpected error:", e)
64
+ return str(e)
tools/web_scraper.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.tools import tool
2
+ from playwright.sync_api import sync_playwright
3
+ from bs4 import BeautifulSoup
4
+
5
+ @tool
6
+ def get_rendered_html(url: str) -> str:
7
+ """
8
+ Fetch and return the fully rendered HTML of a webpage.
9
+
10
+ This function uses Playwright to load a webpage in a headless Chromium
11
+ browser, allowing all JavaScript on the page to execute. Use this for
12
+ dynamic websites that require rendering.
13
+
14
+ IMPORTANT RESTRICTIONS:
15
+ - ONLY use this for actual HTML webpages (articles, documentation, dashboards).
16
+ - DO NOT use this for direct file links (URLs ending in .csv, .pdf, .zip, .png).
17
+ Playwright cannot render these and will crash. Use the 'download_file' tool instead.
18
+
19
+ Parameters
20
+ ----------
21
+ url : str
22
+ The URL of the webpage to retrieve and render.
23
+
24
+ Returns
25
+ -------
26
+ str
27
+ The fully rendered and cleaned HTML content.
28
+ """
29
+ # ... existing code ...
30
+ print("\nFetching and rendering:", url)
31
+ try:
32
+ with sync_playwright() as p:
33
+ browser = p.chromium.launch(headless=True)
34
+ page = browser.new_page()
35
+
36
+ # Load the page (let JS execute)
37
+ page.goto(url, wait_until="networkidle")
38
+
39
+ # Extract rendered HTML
40
+ content = page.content()
41
+
42
+ browser.close()
43
+ return content
44
+
45
+ except Exception as e:
46
+ return f"Error fetching/rendering page: {str(e)}"