Delete deep_research.py
Browse files- deep_research.py +0 -185
deep_research.py
DELETED
|
@@ -1,185 +0,0 @@
|
|
| 1 |
-
from pydantic_graph import BaseNode, End, GraphRunContext, Graph
|
| 2 |
-
from pydantic_ai import Agent
|
| 3 |
-
from pydantic_ai.common_tools.tavily import tavily_search_tool
|
| 4 |
-
from dataclasses import dataclass
|
| 5 |
-
from pydantic import Field, BaseModel
|
| 6 |
-
from typing import List, Dict, Optional, Any
|
| 7 |
-
from pydantic_ai.models.gemini import GeminiModel
|
| 8 |
-
from pydantic_ai.providers.google_gla import GoogleGLAProvider
|
| 9 |
-
from dotenv import load_dotenv
|
| 10 |
-
import os
|
| 11 |
-
from tavily import TavilyClient
|
| 12 |
-
from IPython.display import Image, display
|
| 13 |
-
import requests
|
| 14 |
-
import time
|
| 15 |
-
|
| 16 |
-
load_dotenv()
|
| 17 |
-
google_api_key=os.getenv('google_api_key')
|
| 18 |
-
tavily_key=os.getenv('tavily_key')
|
| 19 |
-
tavily_client = TavilyClient(api_key=tavily_key)
|
| 20 |
-
llm=GeminiModel('gemini-2.0-flash', provider=GoogleGLAProvider(api_key=google_api_key))
|
| 21 |
-
pse=os.getenv('pse')
|
| 22 |
-
|
| 23 |
-
@dataclass
|
| 24 |
-
class State:
|
| 25 |
-
query:str
|
| 26 |
-
preliminary_research: str
|
| 27 |
-
research_plan: Dict
|
| 28 |
-
research_results: Dict
|
| 29 |
-
validation : str
|
| 30 |
-
final: Dict
|
| 31 |
-
class paragraph_content(BaseModel):
|
| 32 |
-
title: str = Field(description='the title of the paragraph')
|
| 33 |
-
content: str = Field(description='the content of the paragraph')
|
| 34 |
-
|
| 35 |
-
class paragraph(BaseModel):
|
| 36 |
-
title: str = Field(description='the title of the paragraph')
|
| 37 |
-
should_include: str = Field(description='a description of what the paragraph should include')
|
| 38 |
-
class Paper_layout(BaseModel):
|
| 39 |
-
title: str = Field(description='the title of the paper')
|
| 40 |
-
paragraphs: List[paragraph]= Field(description='the list of paragraphs of the paper')
|
| 41 |
-
|
| 42 |
-
paper_layout_agent=Agent(llm, result_type=Paper_layout, system_prompt="generate a paper layout based on the query, preliminary_search, search_results,include a Title for the paper, for the paragraphs only include the title, no content, no image, no table, start with introduction and end with conclusion")
|
| 43 |
-
paragraph_gen_agent=Agent(llm, result_type=paragraph_content, system_prompt="generate a paragraph synthesizing the research_results based on the title,what the paragraph should include, and what has already been written to avoid repetition")
|
| 44 |
-
class PaperGen_node(BaseNode[State]):
|
| 45 |
-
async def run(self, ctx: GraphRunContext[State])->End:
|
| 46 |
-
prompt=(f'query:{ctx.state.query}, preliminary_search:{ctx.state.preliminary_research},search_results:{ctx.state.research_results.research_results}')
|
| 47 |
-
result=await paper_layout_agent.run(prompt)
|
| 48 |
-
paragraphs=[]
|
| 49 |
-
for i in result.data.paragraphs:
|
| 50 |
-
time.sleep(2)
|
| 51 |
-
paragraph_data=await paragraph_gen_agent.run(f'title:{i.title}, should_include:{i.should_include}, research_results:{ctx.state.research_results.research_results}, already_written:{paragraphs}')
|
| 52 |
-
paragraphs.append(paragraph_data.data.model_dump())
|
| 53 |
-
|
| 54 |
-
paper={'title':result.data.title,
|
| 55 |
-
'image_url':ctx.state.research_results.image_url if ctx.state.research_results.image_url else None,
|
| 56 |
-
'paragraphs':paragraphs,
|
| 57 |
-
'table':ctx.state.research_results.table if ctx.state.research_results.table else None,
|
| 58 |
-
'references':ctx.state.research_results.references if ctx.state.research_results.references else None}
|
| 59 |
-
|
| 60 |
-
ctx.state.final=paper
|
| 61 |
-
|
| 62 |
-
return End(ctx.state.final)
|
| 63 |
-
|
| 64 |
-
def google_image_search(query:str):
|
| 65 |
-
"""Search for images using Google Custom Search API
|
| 66 |
-
args: query
|
| 67 |
-
return: image url
|
| 68 |
-
"""
|
| 69 |
-
# Define the API endpoint for Google Custom Search
|
| 70 |
-
url = "https://www.googleapis.com/customsearch/v1"
|
| 71 |
-
|
| 72 |
-
params = {
|
| 73 |
-
"q": query,
|
| 74 |
-
"cx": pse,
|
| 75 |
-
"key": google_api_key,
|
| 76 |
-
"searchType": "image", # Search for images
|
| 77 |
-
"num": 1 # Number of results to fetch
|
| 78 |
-
}
|
| 79 |
-
|
| 80 |
-
# Make the request to the Google Custom Search API
|
| 81 |
-
response = requests.get(url, params=params)
|
| 82 |
-
data = response.json()
|
| 83 |
-
|
| 84 |
-
# Check if the response contains image results
|
| 85 |
-
if 'items' in data:
|
| 86 |
-
# Extract the first image result
|
| 87 |
-
image_url = data['items'][0]['link']
|
| 88 |
-
return image_url
|
| 89 |
-
|
| 90 |
-
class Table_row(BaseModel):
|
| 91 |
-
data: List[str] = Field(description='the data of the row')
|
| 92 |
-
class Table(BaseModel):
|
| 93 |
-
rows: List[Table_row] = Field(description='the rows of the table')
|
| 94 |
-
columns: List[str] = Field(description='the columns of the table')
|
| 95 |
-
|
| 96 |
-
class Research_results(BaseModel):
|
| 97 |
-
research_results: List[str] = Field(default_factory=None,description='the research results')
|
| 98 |
-
image_url: str = Field(default_factory=None,description='the image url if needed else return None')
|
| 99 |
-
table: dict = Field(default_factory=None,description='the table dataframe in a dictionary format')
|
| 100 |
-
references: str = Field(default_factory=None,description='the references (urls) of the research_results')
|
| 101 |
-
|
| 102 |
-
table_agent=Agent(llm, result_type=Table, system_prompt="generate a detailed table in dictionary format based on the research and the query")
|
| 103 |
-
|
| 104 |
-
class Research_node(BaseNode[State]):
|
| 105 |
-
async def run(self, ctx: GraphRunContext[State])->PaperGen_node:
|
| 106 |
-
research_results=Research_results(research_results=[], image_url='', table={}, references='')
|
| 107 |
-
|
| 108 |
-
for i in ctx.state.research_plan.search_queries:
|
| 109 |
-
response = tavily_client.search(i.search_query)
|
| 110 |
-
data=[]
|
| 111 |
-
for i in response.get('results'):
|
| 112 |
-
if i.get('score')>0.50:
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
data.append(i.get('url'))
|
| 116 |
-
research_results.research_results.append(i.get('content'))
|
| 117 |
-
research_results.research_results=list(set(research_results.research_results))
|
| 118 |
-
research_results.references=list(set(data))
|
| 119 |
-
research_results.references=', '.join(research_results.references)
|
| 120 |
-
ctx.state.research_results=research_results
|
| 121 |
-
if ctx.state.research_plan.image_search_query:
|
| 122 |
-
image_url=google_image_search(ctx.state.research_plan.image_search_query)
|
| 123 |
-
ctx.state.research_results.image_url=image_url
|
| 124 |
-
|
| 125 |
-
if ctx.state.research_plan.table:
|
| 126 |
-
result=await table_agent.run(f'research_results:{ctx.state.research_results.research_results},query:{ctx.state.query}')
|
| 127 |
-
ctx.state.research_results.table={'data':[row.data for row in result.data.rows], 'columns':result.data.columns}
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
return PaperGen_node()
|
| 131 |
-
|
| 132 |
-
class search_query(BaseModel):
|
| 133 |
-
search_query: str = Field(description='the detailed web search query for the research')
|
| 134 |
-
|
| 135 |
-
class Research_plan(BaseModel):
|
| 136 |
-
search_queries: List[search_query] = Field(description='the detailed web search queries for the research')
|
| 137 |
-
table: Optional[str] = Field(default_factory=None,description='if a table is needed, return yes else return None')
|
| 138 |
-
image_search_query: Optional[str] = Field(default_factory=None,description='if image is needed, generate a image search query, optional')
|
| 139 |
-
|
| 140 |
-
research_plan_agent=Agent(llm, result_type=Research_plan, system_prompt='generate a detailed research plan breaking down the research into smaller parts based on the query and the preliminary search, include a table and image search query if the user wants it')
|
| 141 |
-
|
| 142 |
-
class Research_plan_node(BaseNode[State]):
|
| 143 |
-
async def run(self, ctx: GraphRunContext[State])->Research_node:
|
| 144 |
-
|
| 145 |
-
prompt=(f'query:{ctx.state.query}, preliminary_search:{ctx.state.preliminary_research}')
|
| 146 |
-
result=await research_plan_agent.run(prompt)
|
| 147 |
-
ctx.state.research_plan=result.data
|
| 148 |
-
return Research_node()
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
search_agent=Agent(llm, tools=[tavily_search_tool(tavily_key)], system_prompt="do a websearch based on the query")
|
| 152 |
-
|
| 153 |
-
class preliminary_search_node(BaseNode[State]):
|
| 154 |
-
async def run(self, ctx: GraphRunContext[State]) -> Research_plan_node:
|
| 155 |
-
prompt = (' Do a preliminary search to get a global idea of the subject that the user wants to do reseach on as well as the necessary informations to do a search on.\n'
|
| 156 |
-
f'The subject is based on the query: {ctx.state.query}, return the results of the search.')
|
| 157 |
-
result=await search_agent.run(prompt)
|
| 158 |
-
ctx.state.preliminary_research=result.data
|
| 159 |
-
return Research_plan_node()
|
| 160 |
-
|
| 161 |
-
class Deep_research_engine:
|
| 162 |
-
def __init__(self):
|
| 163 |
-
self.graph=Graph(nodes=[preliminary_search_node, Research_plan_node, Research_node, PaperGen_node])
|
| 164 |
-
self.state=State(query='', preliminary_research='', research_plan=[], research_results=[], validation='', final='')
|
| 165 |
-
|
| 166 |
-
async def chat(self,query:str):
|
| 167 |
-
"""Chat with the deep research engine,
|
| 168 |
-
Args:
|
| 169 |
-
query (str): The query to search for
|
| 170 |
-
Returns:
|
| 171 |
-
str: The response from the deep research engine
|
| 172 |
-
"""
|
| 173 |
-
self.state.query=query
|
| 174 |
-
response=await self.graph.run(preliminary_search_node(),state=self.state)
|
| 175 |
-
return response.output
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
def display_graph(self):
|
| 179 |
-
"""Display the graph of the deep research engine
|
| 180 |
-
Returns:
|
| 181 |
-
Image: The image of the graph
|
| 182 |
-
"""
|
| 183 |
-
image=self.graph.mermaid_image()
|
| 184 |
-
return display(Image(image))
|
| 185 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|