blazingbunny commited on
Commit
413f406
·
verified ·
1 Parent(s): fcf1e29

Upload 3 files

Browse files
Files changed (3) hide show
  1. agent.py +166 -20
  2. app.py +25 -2
  3. requirements.txt +15 -1
agent.py CHANGED
@@ -7,9 +7,25 @@ from langchain_google_genai import ChatGoogleGenerativeAI
7
  from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
8
  from langgraph.graph import StateGraph, END, START
9
  from langgraph.prebuilt import ToolNode
10
- from langchain_tavily import TavilySearch
11
  from langchain_core.tools import tool
12
- from langchain_community.document_loaders import YoutubeLoader
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  from dotenv import load_dotenv
14
 
15
  load_dotenv()
@@ -40,23 +56,124 @@ except Exception as e:
40
  class AgentState(TypedDict):
41
  messages: Annotated[List[BaseMessage], operator.add]
42
 
 
 
 
 
 
 
 
 
 
 
 
43
  # 2. Define the tools
44
  @tool
45
  def get_youtube_transcript(url: str) -> str:
46
  """Retrieves the transcript of a YouTube video given its URL."""
47
  try:
48
- loader = YoutubeLoader.from_youtube_url(url, add_video_info=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  docs = loader.load()
50
- return "\n".join([doc.page_content for doc in docs])
 
 
51
  except Exception as e:
52
- return f"Error getting transcript: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
- tools = [TavilySearch(max_results=3), get_youtube_transcript]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  tool_node = ToolNode(tools)
56
 
57
  # 3. Define the model
58
- LLM = "gemini-3-flash-preview"
59
- model = ChatGoogleGenerativeAI(model=LLM, temperature=0, max_retries=5)
 
 
 
 
 
60
  model = model.bind_tools(tools)
61
 
62
  # 4. Define the agent node
@@ -88,7 +205,23 @@ class LangGraphAgent:
88
 
89
  def __call__(self, question: str, task_id: str = None) -> str:
90
  messages = [
91
- SystemMessage(content="You are a helpful assistant. Answer the user's question directly and concisely. Do not include any introductory text or 'Final Answer:'. Just output the answer. If you have gathered sufficient information to answer the question, STOP SEARCHING and provide the answer immediately. Do not perform redundant searches. If the question involves an image or video provided in the context, analyze it to answer."),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  ]
93
 
94
  content = []
@@ -99,25 +232,38 @@ class LangGraphAgent:
99
  try:
100
  # Check headers first
101
  response = requests.head(image_url, timeout=5)
102
- if response.status_code == 200 and "image" in response.headers.get("Content-Type", ""):
103
- # Fetch the image
 
 
 
104
  img_response = requests.get(image_url, timeout=10)
105
  if img_response.status_code == 200:
106
- image_data = base64.b64encode(img_response.content).decode("utf-8")
107
- # Determine MIME type from header or default to jpeg
108
- mime_type = response.headers.get("Content-Type", "image/jpeg")
109
  content.append({
110
- "type": "image_url",
111
- "image_url": {"url": f"data:{mime_type};base64,{image_data}"}
112
  })
113
  except Exception as e:
114
- print(f"Error checking/fetching image: {e}")
115
 
116
  messages.append(HumanMessage(content=content))
117
 
118
  inputs = {"messages": messages}
119
  final_state = self.app.invoke(inputs)
120
  result = final_state['messages'][-1].content
121
- if isinstance(result, list):
122
- return " ".join([str(c) for c in result])
123
- return str(result)
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
8
  from langgraph.graph import StateGraph, END, START
9
  from langgraph.prebuilt import ToolNode
 
10
  from langchain_core.tools import tool
11
+ from langchain_community.document_loaders import YoutubeLoader, WikipediaLoader
12
+ from langchain_community.tools import WikipediaQueryRun
13
+ from langchain_community.utilities import WikipediaAPIWrapper
14
+ from langchain_experimental.utilities import PythonREPL
15
+ from langchain_chroma import Chroma
16
+ from langchain_huggingface import HuggingFaceEmbeddings
17
+ from langchain.tools import tool
18
+ from langchain_community.tools import YouTubeSearchTool
19
+ # Playwright Imports (Optional)
20
+ try:
21
+ from langchain_community.agent_toolkits import PlaywrightBrowserToolkit
22
+ from langchain_community.tools.playwright.utils import create_sync_playwright_browser
23
+ except ImportError:
24
+ PlaywrightBrowserToolkit = None
25
+ create_sync_playwright_browser = None
26
+
27
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
28
+ from langchain_core.documents import Document
29
  from dotenv import load_dotenv
30
 
31
  load_dotenv()
 
56
  class AgentState(TypedDict):
57
  messages: Annotated[List[BaseMessage], operator.add]
58
 
59
+ # Helper to split and save documents to Chroma
60
+ def save_to_chroma(docs):
61
+ if 'vector_store' in globals() and vector_store and docs:
62
+ try:
63
+ splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
64
+ splits = splitter.split_documents(docs)
65
+ if splits:
66
+ vector_store.add_documents(splits)
67
+ except Exception as e:
68
+ print(f"Error saving to Chroma: {e}")
69
+
70
  # 2. Define the tools
71
  @tool
72
  def get_youtube_transcript(url: str) -> str:
73
  """Retrieves the transcript of a YouTube video given its URL."""
74
  try:
75
+ loader = YoutubeLoader.from_youtube_url(url, add_video_info=True)
76
+ docs = loader.load()
77
+ if not docs:
78
+ return "No transcript found. Please search Google for the video title or ID."
79
+
80
+ # Save to Chroma
81
+ save_to_chroma(docs)
82
+
83
+ return "\n\n".join([f"Metadata: {d.metadata}\nContent: {d.page_content}" for d in docs])
84
+ except Exception as e:
85
+ return f"Error getting transcript: {e}. Please try searching Google for the video URL or ID."
86
+
87
+ @tool
88
+ def calculator(expression: str) -> str:
89
+ """Calculates a mathematical expression using Python. Example: '2 + 2', '34 * 5', 'import math; math.sqrt(2)'"""
90
+ try:
91
+ repl = PythonREPL()
92
+ if "print" not in expression:
93
+ expression = f"print({expression})"
94
+ return repl.run(expression)
95
+ except Exception as e:
96
+ return f"Error calculating: {e}"
97
+
98
+ @tool
99
+ def search_wikipedia(query: str) -> str:
100
+ """Search Wikipedia for a query. Useful for factual lists and biographies."""
101
+ try:
102
+ loader = WikipediaLoader(query=query, load_max_docs=3)
103
  docs = loader.load()
104
+ # Save to Chroma
105
+ save_to_chroma(docs)
106
+ return "\n\n".join([d.page_content[:10000] for d in docs])
107
  except Exception as e:
108
+ return f"Error searching Wikipedia: {e}"
109
+
110
+ # ChromaDB RAG Tool
111
+ vector_store = None
112
+ try:
113
+ embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
114
+ vector_store = Chroma(
115
+ collection_name="agent_memory",
116
+ embedding_function=embeddings,
117
+ persist_directory="./chroma_db"
118
+ )
119
+ except Exception as e:
120
+ print(f"Warning: ChromaDB initialization failed. RAG features disabled. Error: {e}")
121
 
122
+ @tool
123
+ def search_knowledge_base(query: str) -> str:
124
+ """Searches for relevant documents in the persistent knowledge base (memory of previous searches)."""
125
+ try:
126
+ retriever = vector_store.as_retriever()
127
+ docs = retriever.invoke(query)
128
+ if not docs:
129
+ return "No relevant information found."
130
+ return "\n".join([d.page_content for d in docs])
131
+ except Exception as e:
132
+ return f"Error searching knowledge base: {e}"
133
+
134
+ @tool
135
+ def browse_page(url: str) -> str:
136
+ """Browses a web page and extracts text using Playwright. Use this to read content from specific URLs."""
137
+ if not create_sync_playwright_browser:
138
+ return "Browsing unavailable (Playwright not installed)."
139
+ try:
140
+ browser = create_sync_playwright_browser(headless=True)
141
+ page = browser.new_page()
142
+ page.goto(url)
143
+ text = page.inner_text("body")
144
+ browser.close()
145
+ # Save to Chroma
146
+ if 'vector_store' in globals() and vector_store:
147
+ splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
148
+ docs = [Document(page_content=text, metadata={"source": url})]
149
+ splits = splitter.split_documents(docs)
150
+ vector_store.add_documents(splits)
151
+ return text[:10000]
152
+ except Exception as e:
153
+ return f"Error browsing: {e}"
154
+
155
+ @tool
156
+ def search_youtube_videos(query: str) -> str:
157
+ """Search for YouTube videos. Provide only the search keywords."""
158
+ try:
159
+ tool = YouTubeSearchTool()
160
+ return tool.run(f"{query}, 3")
161
+ except Exception as e:
162
+ return f"Error searching YouTube: {e}"
163
+
164
+ # Combine Tools (Native Google Search is enabled via model param)
165
+ # Removed rag_tool/knowledge_base as it was empty -> Adding it back now
166
+ tools = [get_youtube_transcript, calculator, search_wikipedia, search_knowledge_base, search_youtube_videos, browse_page]
167
  tool_node = ToolNode(tools)
168
 
169
  # 3. Define the model
170
+ LLM = "gemini-2.0-flash"
171
+ model = ChatGoogleGenerativeAI(
172
+ model=LLM,
173
+ temperature=0,
174
+ max_retries=5,
175
+ google_search_retrieval=True
176
+ )
177
  model = model.bind_tools(tools)
178
 
179
  # 4. Define the agent node
 
205
 
206
  def __call__(self, question: str, task_id: str = None) -> str:
207
  messages = [
208
+ SystemMessage(content="""You are a helpful assistant with multimodal capabilities (Vision, Audio, PDF analysis).
209
+ Step 1: ALWAYS START by performing a Google Search (or using Wikipedia/YouTube) to gather up-to-date information. Do not answer from memory.
210
+ Step 2: If a URL is provided, search for the **EXACT URL** string on Google first to identify the video/page title. Do not add keywords yet. **DO NOT use the 'youtube_search' tool for this step; use Google Search.**
211
+ Step 3: Once you have the title, search for that title to find descriptions or summaries.
212
+ Step 4: Analyze the information found. If you cannot access a specific page or video directly (e.g. empty transcript), DO NOT GIVE UP. Use Google Search to find descriptions, summaries, or discussions from reliable sources.
213
+ Step 5: If you identify relevant Wikipedia pages or YouTube videos, use the specific tools ('search_wikipedia', 'get_youtube_transcript') to ingest them into your Knowledge Base.
214
+ Step 6: Reason to find the exact answer. Verify your findings by cross-referencing multiple sources if possible. You can use 'search_knowledge_base' to connect facts you have saved.
215
+ Step 7: Output the final answer strictly in this format:
216
+ FINAL ANSWER: [ANSWER]
217
+
218
+ Do not include "FINAL ANSWER:" in the [ANSWER] part itself.
219
+ Example:
220
+ Thinking: ...
221
+ FINAL ANSWER: 3
222
+
223
+ If the question involves an image, video, or audio file provided in the context, analyze it to answer.
224
+ """),
225
  ]
226
 
227
  content = []
 
232
  try:
233
  # Check headers first
234
  response = requests.head(image_url, timeout=5)
235
+ mime_type = response.headers.get("Content-Type", "")
236
+
237
+ # Allow images, audio, video, pdf
238
+ if response.status_code == 200 and any(t in mime_type for t in ["image/", "audio/", "video/", "application/pdf"]):
239
+ # Fetch the file
240
  img_response = requests.get(image_url, timeout=10)
241
  if img_response.status_code == 200:
242
+ file_data = base64.b64encode(img_response.content).decode("utf-8")
 
 
243
  content.append({
244
+ "type": "image_url", # LangChain uses this key for multimodal data URI
245
+ "image_url": {"url": f"data:{mime_type};base64,{file_data}"}
246
  })
247
  except Exception as e:
248
+ print(f"Error checking/fetching file: {e}")
249
 
250
  messages.append(HumanMessage(content=content))
251
 
252
  inputs = {"messages": messages}
253
  final_state = self.app.invoke(inputs)
254
  result = final_state['messages'][-1].content
255
+
256
+ def extract_text(content):
257
+ if isinstance(content, str):
258
+ return content
259
+ if isinstance(content, list):
260
+ return " ".join([extract_text(c) for c in content])
261
+ if isinstance(content, dict):
262
+ return content.get('text', str(content))
263
+ return str(content)
264
+
265
+ text_result = extract_text(result)
266
+ if "FINAL ANSWER:" in text_result:
267
+ return text_result.split("FINAL ANSWER:")[-1].strip()
268
+ return text_result
269
+
app.py CHANGED
@@ -84,8 +84,10 @@ def run_and_submit_all(profile: gr.OAuthProfile | None, *args):
84
  continue
85
  try:
86
  submitted_answer = agent(question_text, task_id=task_id)
87
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer.strip()})
88
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
89
  except Exception as e:
90
  print(f"Error running agent on task {task_id}: {e}")
91
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
@@ -225,6 +227,27 @@ with gr.Blocks() as demo:
225
  outputs=[file_output]
226
  )
227
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
  if __name__ == "__main__":
229
  print("\n" + "-"*30 + " App Starting " + "-"*30)
230
  # Check for SPACE_HOST and SPACE_ID at startup for information
 
84
  continue
85
  try:
86
  submitted_answer = agent(question_text, task_id=task_id)
87
+ # Clean answer if agent included "FINAL ANSWER:"
88
+ clean_answer = submitted_answer.replace("FINAL ANSWER:", "").strip()
89
+ answers_payload.append({"task_id": task_id, "submitted_answer": clean_answer})
90
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer}) # Log original
91
  except Exception as e:
92
  print(f"Error running agent on task {task_id}: {e}")
93
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
227
  outputs=[file_output]
228
  )
229
 
230
+ with gr.Tab("Diagnostics"):
231
+ gr.Markdown("### Check Playwright")
232
+ pw_btn = gr.Button("Test Playwright")
233
+ pw_out = gr.Textbox(label="Result")
234
+
235
+ def test_playwright_btn():
236
+ try:
237
+ from langchain_community.tools.playwright.utils import create_sync_playwright_browser
238
+ browser = create_sync_playwright_browser(headless=True)
239
+ page = browser.new_page()
240
+ page.goto("https://example.com")
241
+ t = page.title()
242
+ browser.close()
243
+ return f"Success! Title: {t}"
244
+ except ImportError:
245
+ return "Playwright not installed/importable."
246
+ except Exception as e:
247
+ return f"Playwright Failed: {e}"
248
+
249
+ pw_btn.click(test_playwright_btn, outputs=pw_out)
250
+
251
  if __name__ == "__main__":
252
  print("\n" + "-"*30 + " App Starting " + "-"*30)
253
  # Check for SPACE_HOST and SPACE_ID at startup for information
requirements.txt CHANGED
@@ -12,4 +12,18 @@ google-cloud-aiplatform
12
  youtube-transcript-api
13
  arize-otel
14
  openinference-instrumentation-google-genai
15
- openinference-instrumentation-langchain
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  youtube-transcript-api
13
  arize-otel
14
  openinference-instrumentation-google-genai
15
+ openinference-instrumentation-langchain
16
+ wikipedia
17
+ langchain-experimental
18
+ arxiv
19
+ xmltodict
20
+ chromadb
21
+ langchain-chroma
22
+ langchain-huggingface
23
+ youtube-search
24
+ sentence-transformers
25
+ playwright
26
+ lxml
27
+ pytubefix
28
+ pandas
29
+ openpyxl