hhhhmmmm commited on
Commit
18d59ce
·
verified ·
1 Parent(s): 51fbf7d

Create agent.py

Browse files
Files changed (1) hide show
  1. agent.py +361 -0
agent.py ADDED
@@ -0,0 +1,361 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+ import time
4
+ import re
5
+ import json
6
+ from typing import List, Optional, Dict, Any
7
+ from urllib.parse import urlparse
8
+ import requests
9
+ import yt_dlp
10
+ from bs4 import BeautifulSoup
11
+ from difflib import SequenceMatcher
12
+
13
+ from langchain_core.messages import HumanMessage, SystemMessage
14
+ from langchain_google_genai import ChatGoogleGenerativeAI
15
+ from langchain_community.utilities import DuckDuckGoSearchAPIWrapper, WikipediaAPIWrapper
16
+ from langchain.agents import Tool, AgentExecutor, ConversationalAgent, initialize_agent, AgentType
17
+ from langchain.memory import ConversationBufferMemory
18
+ from langchain.prompts import MessagesPlaceholder
19
+ from langchain.tools import BaseTool, Tool, tool
20
+ from google.generativeai.types import HarmCategory, HarmBlockThreshold
21
+ from PIL import Image
22
+ import google.generativeai as genai
23
+ from pydantic import Field
24
+
25
+ from smolagents import WikipediaSearchTool
26
+
27
+ class SmolagentToolWrapper(BaseTool):
28
+ """Wrapper for smolagents tools to make them compatible with LangChain."""
29
+
30
+ wrapped_tool: object = Field(description="The wrapped smolagents tool")
31
+
32
+ def __init__(self, tool):
33
+ """Initialize the wrapper with a smolagents tool."""
34
+ super().__init__(
35
+ name=tool.name,
36
+ description=tool.description,
37
+ return_direct=False,
38
+ wrapped_tool=tool
39
+ )
40
+
41
+ def _run(self, query: str) -> str:
42
+ """Use the wrapped tool to execute the query."""
43
+ try:
44
+ # For WikipediaSearchTool
45
+ if hasattr(self.wrapped_tool, 'search'):
46
+ return self.wrapped_tool.search(query)
47
+ # For DuckDuckGoSearchTool and others
48
+ return self.wrapped_tool(query)
49
+ except Exception as e:
50
+ return f"Error using tool: {str(e)}"
51
+
52
+ def _arun(self, query: str) -> str:
53
+ """Async version - just calls sync version since smolagents tools don't support async."""
54
+ return self._run(query)
55
+
56
+
57
+
58
+ class BasicAgent:
59
+ def __init__(self, api_key: str, model_name: str = "gemini-2.0-flash"):
60
+ # Suppress warnings
61
+ import warnings
62
+ warnings.filterwarnings("ignore", category=UserWarning)
63
+ warnings.filterwarnings("ignore", category=DeprecationWarning)
64
+ warnings.filterwarnings("ignore", message=".*will be deprecated.*")
65
+ warnings.filterwarnings("ignore", "LangChain.*")
66
+
67
+ self.api_key = api_key
68
+ self.model_name = model_name
69
+
70
+ # Configure Gemini
71
+ genai.configure(api_key=api_key)
72
+
73
+ # Initialize the LLM
74
+ self.llm = self._setup_llm()
75
+
76
+ # Setup tools
77
+ self.tools = [
78
+ SmolagentToolWrapper(WikipediaSearchTool()),
79
+ Tool(
80
+ name="analyze_video",
81
+ func=self._analyze_video,
82
+ description="Analyze YouTube video content directly"
83
+ ),
84
+ Tool(
85
+ name="analyze_image",
86
+ func=self._analyze_image,
87
+ description="Analyze image content"
88
+ ),
89
+ Tool(
90
+ name="analyze_table",
91
+ func=self._analyze_table,
92
+ description="Analyze table or matrix data"
93
+ ),
94
+ Tool(
95
+ name="analyze_list",
96
+ func=self._analyze_list,
97
+ description="Analyze and categorize list items"
98
+ ),
99
+ Tool(
100
+ name="web_search",
101
+ func=self._web_search,
102
+ description="Search the web for information"
103
+ )
104
+ ]
105
+
106
+ # Setup memory
107
+ self.memory = ConversationBufferMemory(
108
+ memory_key="chat_history",
109
+ return_messages=True
110
+ )
111
+
112
+ # Initialize agent
113
+ self.agent = self._setup_agent()
114
+
115
+
116
+ def run(self, query: str) -> str:
117
+ """Run the agent on a query with incremental retries."""
118
+ max_retries = 3
119
+ base_sleep = 1 # Start with 1 second sleep
120
+
121
+ for attempt in range(max_retries):
122
+ try:
123
+
124
+ # If no match found in answer bank, use the agent
125
+ response = self.agent.run(query)
126
+ return response
127
+
128
+ except Exception as e:
129
+ sleep_time = base_sleep * (attempt + 1) # Incremental sleep: 1s, 2s, 3s
130
+ if attempt < max_retries - 1:
131
+ print(f"Attempt {attempt + 1} failed. Retrying in {sleep_time} seconds...")
132
+ time.sleep(sleep_time)
133
+ continue
134
+ return f"Error processing query after {max_retries} attempts: {str(e)}"
135
+
136
+ print("Agent processed all queries!")
137
+
138
+ def _clean_response(self, response: str) -> str:
139
+ """Clean up the response from the agent."""
140
+ # Remove any tool invocation artifacts
141
+ cleaned = re.sub(r'> Entering new AgentExecutor chain...|> Finished chain.', '', response)
142
+ cleaned = re.sub(r'Thought:.*?Action:.*?Action Input:.*?Observation:.*?\n', '', cleaned, flags=re.DOTALL)
143
+ return cleaned.strip()
144
+
145
+ def run_interactive(self):
146
+ print("AI Assistant Ready! (Type 'exit' to quit)")
147
+
148
+ while True:
149
+ query = input("You: ").strip()
150
+ if query.lower() == 'exit':
151
+ print("Goodbye!")
152
+ break
153
+
154
+ print("Assistant:", self.run(query))
155
+
156
+ def _web_search(self, query: str, domain: Optional[str] = None) -> str:
157
+ """Perform web search with rate limiting and retries."""
158
+ try:
159
+ # Use DuckDuckGo API wrapper for more reliable results
160
+ search = DuckDuckGoSearchAPIWrapper(max_results=5)
161
+ results = search.run(f"{query} {f'site:{domain}' if domain else ''}")
162
+
163
+ if not results or results.strip() == "":
164
+ return "No search results found."
165
+
166
+ return results
167
+
168
+ except Exception as e:
169
+ return f"Search error: {str(e)}"
170
+
171
+ def _analyze_video(self, url: str) -> str:
172
+ """Analyze video content using Gemini's video understanding capabilities."""
173
+ try:
174
+ # Validate URL
175
+ parsed_url = urlparse(url)
176
+ if not all([parsed_url.scheme, parsed_url.netloc]):
177
+ return "Please provide a valid video URL with http:// or https:// prefix."
178
+
179
+ # Check if it's a YouTube URL
180
+ if 'youtube.com' not in url and 'youtu.be' not in url:
181
+ return "Only YouTube videos are supported at this time."
182
+
183
+ try:
184
+ # Configure yt-dlp with minimal extraction
185
+ ydl_opts = {
186
+ 'quiet': True,
187
+ 'no_warnings': True,
188
+ 'extract_flat': True,
189
+ 'no_playlist': True,
190
+ 'youtube_include_dash_manifest': False
191
+ }
192
+
193
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
194
+ try:
195
+ # Try basic info extraction
196
+ info = ydl.extract_info(url, download=False, process=False)
197
+ if not info:
198
+ return "Could not extract video information."
199
+
200
+ title = info.get('title', 'Unknown')
201
+ description = info.get('description', '')
202
+
203
+ # Create a detailed prompt with available metadata
204
+ prompt = f"""Please analyze this YouTube video:
205
+ Title: {title}
206
+ URL: {url}
207
+ Description: {description}
208
+ Please provide a detailed analysis focusing on:
209
+ 1. Main topic and key points from the title and description
210
+ 2. Expected visual elements and scenes
211
+ 3. Overall message or purpose
212
+ 4. Target audience"""
213
+
214
+ # Use the LLM with proper message format
215
+ messages = [HumanMessage(content=prompt)]
216
+ response = self.llm.invoke(messages)
217
+ return response.content if hasattr(response, 'content') else str(response)
218
+
219
+ except Exception as e:
220
+ if 'Sign in to confirm' in str(e):
221
+ return "This video requires age verification or sign-in. Please provide a different video URL."
222
+ return f"Error accessing video: {str(e)}"
223
+
224
+ except Exception as e:
225
+ return f"Error extracting video info: {str(e)}"
226
+
227
+ except Exception as e:
228
+ return f"Error analyzing video: {str(e)}"
229
+
230
+ def _analyze_table(self, table_data: str) -> str:
231
+ """Analyze table or matrix data."""
232
+ try:
233
+ if not table_data or not isinstance(table_data, str):
234
+ return "Please provide valid table data for analysis."
235
+
236
+ prompt = f"""Please analyze this table:
237
+ {table_data}
238
+ Provide a detailed analysis including:
239
+ 1. Structure and format
240
+ 2. Key patterns or relationships
241
+ 3. Notable findings
242
+ 4. Any mathematical properties (if applicable)"""
243
+
244
+ messages = [HumanMessage(content=prompt)]
245
+ response = self.llm.invoke(messages)
246
+ return response.content if hasattr(response, 'content') else str(response)
247
+
248
+ except Exception as e:
249
+ return f"Error analyzing table: {str(e)}"
250
+
251
+ def _analyze_image(self, image_data: str) -> str:
252
+ """Analyze image content."""
253
+ try:
254
+ if not image_data or not isinstance(image_data, str):
255
+ return "Please provide a valid image for analysis."
256
+
257
+ prompt = f"""Please analyze this image:
258
+ {image_data}
259
+ Focus on:
260
+ 1. Visual elements and objects
261
+ 2. Colors and composition
262
+ 3. Text or numbers (if present)
263
+ 4. Overall context and meaning"""
264
+
265
+ messages = [HumanMessage(content=prompt)]
266
+ response = self.llm.invoke(messages)
267
+ return response.content if hasattr(response, 'content') else str(response)
268
+
269
+ except Exception as e:
270
+ return f"Error analyzing image: {str(e)}"
271
+
272
+ def _analyze_list(self, list_data: str) -> str:
273
+ """Analyze and categorize list items."""
274
+ if not list_data:
275
+ return "No list data provided."
276
+ try:
277
+ items = [x.strip() for x in list_data.split(',')]
278
+ if not items:
279
+ return "Please provide a comma-separated list of items."
280
+ # Add list analysis logic here
281
+ return "Please provide the list items for analysis."
282
+ except Exception as e:
283
+ return f"Error analyzing list: {str(e)}"
284
+
285
+ def _setup_llm(self):
286
+ """Set up the language model."""
287
+ # Set up model with video capabilities
288
+ generation_config = {
289
+ "temperature": 0.0,
290
+ "max_output_tokens": 2000,
291
+ "candidate_count": 1,
292
+ }
293
+
294
+ safety_settings = {
295
+ HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
296
+ HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
297
+ HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
298
+ HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
299
+ }
300
+
301
+ return ChatGoogleGenerativeAI(
302
+ model="gemini-2.0-flash",
303
+ google_api_key=self.api_key,
304
+ temperature=0,
305
+ max_output_tokens=2000,
306
+ generation_config=generation_config,
307
+ safety_settings=safety_settings,
308
+ system_message=SystemMessage(content=(
309
+ "You are a precise AI assistant that helps users find information and analyze content. "
310
+ "You can directly understand and analyze YouTube videos, images, and other content. "
311
+ "When analyzing videos, focus on relevant details like dialogue, text, and key visual elements. "
312
+ "For lists, tables, and structured data, ensure proper formatting and organization. "
313
+ "If you need additional context, clearly explain what is needed."
314
+ ))
315
+ )
316
+
317
+ def _setup_agent(self) -> AgentExecutor:
318
+ """Set up the agent with tools and system message."""
319
+
320
+ # Define the system message template
321
+ PREFIX = """You are a helpful AI assistant that can use various tools to answer questions and analyze content. You have access to tools for web search, Wikipedia lookup, and multimedia analysis.
322
+ TOOLS:
323
+ ------
324
+ You have access to the following tools:"""
325
+
326
+ FORMAT_INSTRUCTIONS = """To use a tool, use the following format:
327
+ Thought: Do I need to use a tool? Yes
328
+ Action: the action to take, should be one of [{tool_names}]
329
+ Action Input: the input to the action
330
+ Observation: the result of the action
331
+ When you have a response to say to the Human, or if you do not need to use a tool, you MUST use the format:
332
+ Thought: Do I need to use a tool? No
333
+ Final Answer: [your response here]
334
+ Begin! Remember to ALWAYS include 'Thought:', 'Action:', 'Action Input:', and 'Final Answer:' in your responses."""
335
+
336
+ SUFFIX = """Previous conversation history:
337
+ {chat_history}
338
+ New question: {input}
339
+ {agent_scratchpad}"""
340
+
341
+ # Create the base agent
342
+ agent = ConversationalAgent.from_llm_and_tools(
343
+ llm=self.llm,
344
+ tools=self.tools,
345
+ prefix=PREFIX,
346
+ format_instructions=FORMAT_INSTRUCTIONS,
347
+ suffix=SUFFIX,
348
+ input_variables=["input", "chat_history", "agent_scratchpad", "tool_names"],
349
+ handle_parsing_errors=True
350
+ )
351
+
352
+ # Initialize agent executor with custom output handling
353
+ return AgentExecutor.from_agent_and_tools(
354
+ agent=agent,
355
+ tools=self.tools,
356
+ memory=self.memory,
357
+ max_iterations=5,
358
+ verbose=True,
359
+ handle_parsing_errors=True,
360
+ return_only_outputs=True # This ensures we only get the final output
361
+ )