Spaces:
Paused
Paused
Soham Waghmare
commited on
Commit
·
1986dac
1
Parent(s):
fd3de6a
feat: add optimized scrape tool
Browse files
langgraph_backend/agent_tools.py
CHANGED
|
@@ -9,14 +9,14 @@ from langgraph.checkpoint.memory import MemorySaver
|
|
| 9 |
from langgraph.prebuilt import create_react_agent
|
| 10 |
from langgraph.types import Command, interrupt
|
| 11 |
|
| 12 |
-
from tools_tools import calc
|
| 13 |
|
| 14 |
logger = logging.getLogger(__name__)
|
| 15 |
logging.basicConfig(level=logging.INFO)
|
| 16 |
load_dotenv()
|
| 17 |
|
| 18 |
checkpointer = MemorySaver()
|
| 19 |
-
tools = [calc]
|
| 20 |
|
| 21 |
# --- LangChain LLM setup (Gemini, correct usage) ---
|
| 22 |
model = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=os.getenv("GOOGLE_API_KEY"))
|
|
|
|
| 9 |
from langgraph.prebuilt import create_react_agent
|
| 10 |
from langgraph.types import Command, interrupt
|
| 11 |
|
| 12 |
+
from tools_tools import calc, scrape
|
| 13 |
|
| 14 |
logger = logging.getLogger(__name__)
|
| 15 |
logging.basicConfig(level=logging.INFO)
|
| 16 |
load_dotenv()
|
| 17 |
|
| 18 |
checkpointer = MemorySaver()
|
| 19 |
+
tools = [calc, scrape]
|
| 20 |
|
| 21 |
# --- LangChain LLM setup (Gemini, correct usage) ---
|
| 22 |
model = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=os.getenv("GOOGLE_API_KEY"))
|
langgraph_backend/scraper.py
CHANGED
|
@@ -16,7 +16,7 @@ class CrawlForAIScraper:
|
|
| 16 |
self.session = requests.Session()
|
| 17 |
self.base_browser = BrowserConfig(
|
| 18 |
browser_type="chromium",
|
| 19 |
-
headless=
|
| 20 |
viewport_width=1920,
|
| 21 |
viewport_height=1080,
|
| 22 |
accept_downloads=False,
|
|
|
|
| 16 |
self.session = requests.Session()
|
| 17 |
self.base_browser = BrowserConfig(
|
| 18 |
browser_type="chromium",
|
| 19 |
+
headless=False,
|
| 20 |
viewport_width=1920,
|
| 21 |
viewport_height=1080,
|
| 22 |
accept_downloads=False,
|
langgraph_backend/tools_tools.py
CHANGED
|
@@ -1,4 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from langchain_core.tools import tool
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
@tool
|
| 4 |
def calc(a: int, b: int) -> int:
|
|
@@ -6,3 +20,28 @@ def calc(a: int, b: int) -> int:
|
|
| 6 |
Takes in two integers and returns their integer sum.
|
| 7 |
"""
|
| 8 |
return str(a + b)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
import os
|
| 3 |
+
from typing import Annotated, Any, Dict, List, Literal, Optional, TypedDict
|
| 4 |
+
|
| 5 |
+
from dotenv import load_dotenv
|
| 6 |
from langchain_core.tools import tool
|
| 7 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
| 8 |
+
|
| 9 |
+
from prompts import SITE_SUMMARY_PROMPT
|
| 10 |
+
from scraper import CrawlForAIScraper
|
| 11 |
+
|
| 12 |
+
load_dotenv()
|
| 13 |
+
scraper_inst = CrawlForAIScraper()
|
| 14 |
+
model = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=os.getenv("GOOGLE_API_KEY"))
|
| 15 |
+
|
| 16 |
|
| 17 |
@tool
|
| 18 |
def calc(a: int, b: int) -> int:
|
|
|
|
| 20 |
Takes in two integers and returns their integer sum.
|
| 21 |
"""
|
| 22 |
return str(a + b)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
@tool
|
| 26 |
+
async def scrape(query: str, num_sites_per_query: int) -> List[Dict[str, Any]]:
|
| 27 |
+
"""
|
| 28 |
+
Search in a search engine.
|
| 29 |
+
|
| 30 |
+
Args:
|
| 31 |
+
query: string query for the search engine.
|
| 32 |
+
num_sites_per_query: number of sites to read after searching.
|
| 33 |
+
|
| 34 |
+
Returns:
|
| 35 |
+
Results related to the search.
|
| 36 |
+
"""
|
| 37 |
+
sites = await scraper_inst.search_and_scrape(query, num_sites_per_query)
|
| 38 |
+
# Add data to context
|
| 39 |
+
# src [1] : https://...
|
| 40 |
+
# content...
|
| 41 |
+
agg_sites_ctx = ["\n\n---\n\n".join([f"src [{i + 1}] : {d['url']}\n{d['text']}" for i, d in enumerate(sites)])]
|
| 42 |
+
summ_sites_ctx = []
|
| 43 |
+
for idx in range(0, len(sites), 3):
|
| 44 |
+
summary = model.invoke(SITE_SUMMARY_PROMPT.format(query=query, findings=agg_sites_ctx), config={"temperature": 0.2}).text()
|
| 45 |
+
summ_sites_ctx.append(summary)
|
| 46 |
+
|
| 47 |
+
return "\n\n---\n\n".join(summ_sites_ctx)
|