Spaces:

AakashJammula
/

backend_port

Sleeping

App Files Files Community

Aakash jammula commited on May 21, 2025

Commit

5e6bfdb

1 Parent(s): ed250d7

:deep reasearch

Browse files

Files changed (1) hide show

deep_research.py +22 -52

deep_research.py CHANGED Viewed

@@ -10,7 +10,11 @@ from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_core.messages import HumanMessage, SystemMessage
 from typing import Any, List ,Dict, Any, List, Optional
 import os
 max_web_research_loops=3
 fetch_full_page: bool =False
@@ -169,8 +173,9 @@ def format_sources(search_results):
         for source in search_results['results']
     )
-def duckduckgo_search(query: str, max_results: int = 3, fetch_full_page: bool = False) -> Dict[str, List[Dict[str, str]]]:
-    """Search the web using DuckDuckGo.
     Args:
         query (str): The search query to execute
@@ -182,57 +187,24 @@ def duckduckgo_search(query: str, max_results: int = 3, fetch_full_page: bool =
                 - title (str): Title of the search result
                 - url (str): URL of the search result
                 - content (str): Snippet/summary of the content
-                - raw_content (str): Same as content since DDG doesn't provide full page content
     """
     try:
-        with DDGS() as ddgs:
-            results = []
-            search_results = list(ddgs.text(query, max_results=max_results))
-            for r in search_results:
-                url = r.get('href')
-                title = r.get('title')
-                content = r.get('body')
-                if not all([url, title, content]):
-                    print(f"Warning: Incomplete result from DuckDuckGo: {r}")
-                    continue
-                raw_content = content
-                if fetch_full_page:
-                    try:
-                        # Add headers to mimic a browser request
-                        headers = {
-                            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
-                        }
-                        # Use requests instead of urllib for better header support
-                        import requests
-                        from bs4 import BeautifulSoup
-                        response = requests.get(url, headers=headers)
-                        response.raise_for_status()  # Raise an error for bad status codes
-                        soup = BeautifulSoup(response.text, 'html.parser')
-                        raw_content = soup.get_text()
-                    except Exception as e:
-                        print(f"Warning: Failed to fetch full page content for {url}: {str(e)}")
-                        raw_content = content  # Fallback to the snippet if full page fetch fails
-                # Add result to list
-                result = {
-                    "title": title,
-                    "url": url,
-                    "content": content,
-                    "raw_content": raw_content
-                }
-                results.append(result)
-            return {"results": results}
     except Exception as e:
-        print(f"Error in DuckDuckGo search: {str(e)}")
-        print(f"Full error details: {type(e).__name__}")
         return {"results": []}
 @dataclass(kw_only=True)
@@ -309,10 +281,8 @@ def reflect_on_summary(state: SummaryState):
         return {"search_query": f"Tell me more about {state.research_topic}"}  # Fallback query
 def web_research(state: SummaryState):
-    search_results = duckduckgo_search(state.search_query, max_results=3, fetch_full_page=fetch_full_page)
     search_str = deduplicate_and_format_sources(search_results, max_tokens_per_source=1000, include_raw_content=True)
     return {"sources_gathered": [format_sources(search_results)], "research_loop_count": state.research_loop_count + 1, "web_research_results": [search_str]}
 def summarize_sources(state: SummaryState):

 from langchain_core.messages import HumanMessage, SystemMessage
 from typing import Any, List ,Dict, Any, List, Optional
 import os
+from tavily import TavilyClient
+# Initialize Tavily client
+tavily_api_key = os.getenv("TAVILY_API_KEY")
+tavily_client = TavilyClient(api_key=tavily_api_key)
 max_web_research_loops=3
 fetch_full_page: bool =False
         for source in search_results['results']
     )
+def tavily_search(query: str, max_results: int = 3, fetch_full_page: bool = False) -> Dict[str, List[Dict[str, str]]]:
+    """Search the web using Tavily.
     Args:
         query (str): The search query to execute
                 - title (str): Title of the search result
                 - url (str): URL of the search result
                 - content (str): Snippet/summary of the content
+                - raw_content (str): Full content if available, else same as content
     """
     try:
+        response = tavily_client.search(query=query, max_results=max_results, include_raw_content=fetch_full_page)
+        results = []
+        for r in response["results"]:
+            result = {
+                "title": r.get("title"),
+                "url": r.get("url"),
+                "content": r.get("content"),
+                "raw_content": r.get("raw_content", r.get("content"))
+            }
+            results.append(result)
+        return {"results": results}
     except Exception as e:
+        print(f"Error in Tavily search: {str(e)}")
         return {"results": []}
 @dataclass(kw_only=True)
         return {"search_query": f"Tell me more about {state.research_topic}"}  # Fallback query
 def web_research(state: SummaryState):
+    search_results = tavily_search(state.search_query, max_results=3, fetch_full_page=fetch_full_page)
     search_str = deduplicate_and_format_sources(search_results, max_tokens_per_source=1000, include_raw_content=True)
     return {"sources_gathered": [format_sources(search_results)], "research_loop_count": state.research_loop_count + 1, "web_research_results": [search_str]}
 def summarize_sources(state: SummaryState):