wolf1997 commited on
Commit
554ebd9
·
verified ·
1 Parent(s): 66e6450

Delete deep_research.py

Browse files
Files changed (1) hide show
  1. deep_research.py +0 -185
deep_research.py DELETED
@@ -1,185 +0,0 @@
1
- from pydantic_graph import BaseNode, End, GraphRunContext, Graph
2
- from pydantic_ai import Agent
3
- from pydantic_ai.common_tools.tavily import tavily_search_tool
4
- from dataclasses import dataclass
5
- from pydantic import Field, BaseModel
6
- from typing import List, Dict, Optional, Any
7
- from pydantic_ai.models.gemini import GeminiModel
8
- from pydantic_ai.providers.google_gla import GoogleGLAProvider
9
- from dotenv import load_dotenv
10
- import os
11
- from tavily import TavilyClient
12
- from IPython.display import Image, display
13
- import requests
14
- import time
15
-
16
- load_dotenv()
17
- google_api_key=os.getenv('google_api_key')
18
- tavily_key=os.getenv('tavily_key')
19
- tavily_client = TavilyClient(api_key=tavily_key)
20
- llm=GeminiModel('gemini-2.0-flash', provider=GoogleGLAProvider(api_key=google_api_key))
21
- pse=os.getenv('pse')
22
-
23
- @dataclass
24
- class State:
25
- query:str
26
- preliminary_research: str
27
- research_plan: Dict
28
- research_results: Dict
29
- validation : str
30
- final: Dict
31
- class paragraph_content(BaseModel):
32
- title: str = Field(description='the title of the paragraph')
33
- content: str = Field(description='the content of the paragraph')
34
-
35
- class paragraph(BaseModel):
36
- title: str = Field(description='the title of the paragraph')
37
- should_include: str = Field(description='a description of what the paragraph should include')
38
- class Paper_layout(BaseModel):
39
- title: str = Field(description='the title of the paper')
40
- paragraphs: List[paragraph]= Field(description='the list of paragraphs of the paper')
41
-
42
- paper_layout_agent=Agent(llm, result_type=Paper_layout, system_prompt="generate a paper layout based on the query, preliminary_search, search_results,include a Title for the paper, for the paragraphs only include the title, no content, no image, no table, start with introduction and end with conclusion")
43
- paragraph_gen_agent=Agent(llm, result_type=paragraph_content, system_prompt="generate a paragraph synthesizing the research_results based on the title,what the paragraph should include, and what has already been written to avoid repetition")
44
- class PaperGen_node(BaseNode[State]):
45
- async def run(self, ctx: GraphRunContext[State])->End:
46
- prompt=(f'query:{ctx.state.query}, preliminary_search:{ctx.state.preliminary_research},search_results:{ctx.state.research_results.research_results}')
47
- result=await paper_layout_agent.run(prompt)
48
- paragraphs=[]
49
- for i in result.data.paragraphs:
50
- time.sleep(2)
51
- paragraph_data=await paragraph_gen_agent.run(f'title:{i.title}, should_include:{i.should_include}, research_results:{ctx.state.research_results.research_results}, already_written:{paragraphs}')
52
- paragraphs.append(paragraph_data.data.model_dump())
53
-
54
- paper={'title':result.data.title,
55
- 'image_url':ctx.state.research_results.image_url if ctx.state.research_results.image_url else None,
56
- 'paragraphs':paragraphs,
57
- 'table':ctx.state.research_results.table if ctx.state.research_results.table else None,
58
- 'references':ctx.state.research_results.references if ctx.state.research_results.references else None}
59
-
60
- ctx.state.final=paper
61
-
62
- return End(ctx.state.final)
63
-
64
- def google_image_search(query:str):
65
- """Search for images using Google Custom Search API
66
- args: query
67
- return: image url
68
- """
69
- # Define the API endpoint for Google Custom Search
70
- url = "https://www.googleapis.com/customsearch/v1"
71
-
72
- params = {
73
- "q": query,
74
- "cx": pse,
75
- "key": google_api_key,
76
- "searchType": "image", # Search for images
77
- "num": 1 # Number of results to fetch
78
- }
79
-
80
- # Make the request to the Google Custom Search API
81
- response = requests.get(url, params=params)
82
- data = response.json()
83
-
84
- # Check if the response contains image results
85
- if 'items' in data:
86
- # Extract the first image result
87
- image_url = data['items'][0]['link']
88
- return image_url
89
-
90
- class Table_row(BaseModel):
91
- data: List[str] = Field(description='the data of the row')
92
- class Table(BaseModel):
93
- rows: List[Table_row] = Field(description='the rows of the table')
94
- columns: List[str] = Field(description='the columns of the table')
95
-
96
- class Research_results(BaseModel):
97
- research_results: List[str] = Field(default_factory=None,description='the research results')
98
- image_url: str = Field(default_factory=None,description='the image url if needed else return None')
99
- table: dict = Field(default_factory=None,description='the table dataframe in a dictionary format')
100
- references: str = Field(default_factory=None,description='the references (urls) of the research_results')
101
-
102
- table_agent=Agent(llm, result_type=Table, system_prompt="generate a detailed table in dictionary format based on the research and the query")
103
-
104
- class Research_node(BaseNode[State]):
105
- async def run(self, ctx: GraphRunContext[State])->PaperGen_node:
106
- research_results=Research_results(research_results=[], image_url='', table={}, references='')
107
-
108
- for i in ctx.state.research_plan.search_queries:
109
- response = tavily_client.search(i.search_query)
110
- data=[]
111
- for i in response.get('results'):
112
- if i.get('score')>0.50:
113
-
114
-
115
- data.append(i.get('url'))
116
- research_results.research_results.append(i.get('content'))
117
- research_results.research_results=list(set(research_results.research_results))
118
- research_results.references=list(set(data))
119
- research_results.references=', '.join(research_results.references)
120
- ctx.state.research_results=research_results
121
- if ctx.state.research_plan.image_search_query:
122
- image_url=google_image_search(ctx.state.research_plan.image_search_query)
123
- ctx.state.research_results.image_url=image_url
124
-
125
- if ctx.state.research_plan.table:
126
- result=await table_agent.run(f'research_results:{ctx.state.research_results.research_results},query:{ctx.state.query}')
127
- ctx.state.research_results.table={'data':[row.data for row in result.data.rows], 'columns':result.data.columns}
128
-
129
-
130
- return PaperGen_node()
131
-
132
- class search_query(BaseModel):
133
- search_query: str = Field(description='the detailed web search query for the research')
134
-
135
- class Research_plan(BaseModel):
136
- search_queries: List[search_query] = Field(description='the detailed web search queries for the research')
137
- table: Optional[str] = Field(default_factory=None,description='if a table is needed, return yes else return None')
138
- image_search_query: Optional[str] = Field(default_factory=None,description='if image is needed, generate a image search query, optional')
139
-
140
- research_plan_agent=Agent(llm, result_type=Research_plan, system_prompt='generate a detailed research plan breaking down the research into smaller parts based on the query and the preliminary search, include a table and image search query if the user wants it')
141
-
142
- class Research_plan_node(BaseNode[State]):
143
- async def run(self, ctx: GraphRunContext[State])->Research_node:
144
-
145
- prompt=(f'query:{ctx.state.query}, preliminary_search:{ctx.state.preliminary_research}')
146
- result=await research_plan_agent.run(prompt)
147
- ctx.state.research_plan=result.data
148
- return Research_node()
149
-
150
-
151
- search_agent=Agent(llm, tools=[tavily_search_tool(tavily_key)], system_prompt="do a websearch based on the query")
152
-
153
- class preliminary_search_node(BaseNode[State]):
154
- async def run(self, ctx: GraphRunContext[State]) -> Research_plan_node:
155
- prompt = (' Do a preliminary search to get a global idea of the subject that the user wants to do reseach on as well as the necessary informations to do a search on.\n'
156
- f'The subject is based on the query: {ctx.state.query}, return the results of the search.')
157
- result=await search_agent.run(prompt)
158
- ctx.state.preliminary_research=result.data
159
- return Research_plan_node()
160
-
161
- class Deep_research_engine:
162
- def __init__(self):
163
- self.graph=Graph(nodes=[preliminary_search_node, Research_plan_node, Research_node, PaperGen_node])
164
- self.state=State(query='', preliminary_research='', research_plan=[], research_results=[], validation='', final='')
165
-
166
- async def chat(self,query:str):
167
- """Chat with the deep research engine,
168
- Args:
169
- query (str): The query to search for
170
- Returns:
171
- str: The response from the deep research engine
172
- """
173
- self.state.query=query
174
- response=await self.graph.run(preliminary_search_node(),state=self.state)
175
- return response.output
176
-
177
-
178
- def display_graph(self):
179
- """Display the graph of the deep research engine
180
- Returns:
181
- Image: The image of the graph
182
- """
183
- image=self.graph.mermaid_image()
184
- return display(Image(image))
185
-