Soham Waghmare commited on
Commit
1986dac
·
1 Parent(s): fd3de6a

feat: add optimized scrape tool

Browse files
langgraph_backend/agent_tools.py CHANGED
@@ -9,14 +9,14 @@ from langgraph.checkpoint.memory import MemorySaver
9
  from langgraph.prebuilt import create_react_agent
10
  from langgraph.types import Command, interrupt
11
 
12
- from tools_tools import calc
13
 
14
  logger = logging.getLogger(__name__)
15
  logging.basicConfig(level=logging.INFO)
16
  load_dotenv()
17
 
18
  checkpointer = MemorySaver()
19
- tools = [calc]
20
 
21
  # --- LangChain LLM setup (Gemini, correct usage) ---
22
  model = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=os.getenv("GOOGLE_API_KEY"))
 
9
  from langgraph.prebuilt import create_react_agent
10
  from langgraph.types import Command, interrupt
11
 
12
+ from tools_tools import calc, scrape
13
 
14
  logger = logging.getLogger(__name__)
15
  logging.basicConfig(level=logging.INFO)
16
  load_dotenv()
17
 
18
  checkpointer = MemorySaver()
19
+ tools = [calc, scrape]
20
 
21
  # --- LangChain LLM setup (Gemini, correct usage) ---
22
  model = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=os.getenv("GOOGLE_API_KEY"))
langgraph_backend/scraper.py CHANGED
@@ -16,7 +16,7 @@ class CrawlForAIScraper:
16
  self.session = requests.Session()
17
  self.base_browser = BrowserConfig(
18
  browser_type="chromium",
19
- headless=True,
20
  viewport_width=1920,
21
  viewport_height=1080,
22
  accept_downloads=False,
 
16
  self.session = requests.Session()
17
  self.base_browser = BrowserConfig(
18
  browser_type="chromium",
19
+ headless=False,
20
  viewport_width=1920,
21
  viewport_height=1080,
22
  accept_downloads=False,
langgraph_backend/tools_tools.py CHANGED
@@ -1,4 +1,18 @@
 
 
 
 
 
1
  from langchain_core.tools import tool
 
 
 
 
 
 
 
 
 
2
 
3
  @tool
4
  def calc(a: int, b: int) -> int:
@@ -6,3 +20,28 @@ def calc(a: int, b: int) -> int:
6
  Takes in two integers and returns their integer sum.
7
  """
8
  return str(a + b)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import os
3
+ from typing import Annotated, Any, Dict, List, Literal, Optional, TypedDict
4
+
5
+ from dotenv import load_dotenv
6
  from langchain_core.tools import tool
7
+ from langchain_google_genai import ChatGoogleGenerativeAI
8
+
9
+ from prompts import SITE_SUMMARY_PROMPT
10
+ from scraper import CrawlForAIScraper
11
+
12
+ load_dotenv()
13
+ scraper_inst = CrawlForAIScraper()
14
+ model = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=os.getenv("GOOGLE_API_KEY"))
15
+
16
 
17
  @tool
18
  def calc(a: int, b: int) -> int:
 
20
  Takes in two integers and returns their integer sum.
21
  """
22
  return str(a + b)
23
+
24
+
25
+ @tool
26
+ async def scrape(query: str, num_sites_per_query: int) -> List[Dict[str, Any]]:
27
+ """
28
+ Search in a search engine.
29
+
30
+ Args:
31
+ query: string query for the search engine.
32
+ num_sites_per_query: number of sites to read after searching.
33
+
34
+ Returns:
35
+ Results related to the search.
36
+ """
37
+ sites = await scraper_inst.search_and_scrape(query, num_sites_per_query)
38
+ # Add data to context
39
+ # src [1] : https://...
40
+ # content...
41
+ agg_sites_ctx = ["\n\n---\n\n".join([f"src [{i + 1}] : {d['url']}\n{d['text']}" for i, d in enumerate(sites)])]
42
+ summ_sites_ctx = []
43
+ for idx in range(0, len(sites), 3):
44
+ summary = model.invoke(SITE_SUMMARY_PROMPT.format(query=query, findings=agg_sites_ctx), config={"temperature": 0.2}).text()
45
+ summ_sites_ctx.append(summary)
46
+
47
+ return "\n\n---\n\n".join(summ_sites_ctx)