Files changed (3) hide show
  1. agent.py +47 -220
  2. app.py +4 -66
  3. requirements.txt +1 -19
agent.py CHANGED
@@ -1,269 +1,96 @@
1
  from typing import TypedDict, Annotated, List
2
  import operator
3
  import os
4
- import base64
5
- import requests
6
  from langchain_google_genai import ChatGoogleGenerativeAI
7
- from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
8
  from langgraph.graph import StateGraph, END, START
9
  from langgraph.prebuilt import ToolNode
10
- from langchain_core.tools import tool
11
- from langchain_community.document_loaders import YoutubeLoader, WikipediaLoader
12
- from langchain_community.tools import WikipediaQueryRun
13
- from langchain_community.utilities import WikipediaAPIWrapper
14
- from langchain_experimental.utilities import PythonREPL
15
- from langchain_chroma import Chroma
16
- from langchain_huggingface import HuggingFaceEmbeddings
17
- from langchain.tools import tool
18
- from langchain_community.tools import YouTubeSearchTool
19
- # Playwright Imports (Optional)
20
- try:
21
- from langchain_community.agent_toolkits import PlaywrightBrowserToolkit
22
- from langchain_community.tools.playwright.utils import create_sync_playwright_browser
23
- except ImportError:
24
- PlaywrightBrowserToolkit = None
25
- create_sync_playwright_browser = None
26
-
27
- from langchain_text_splitters import RecursiveCharacterTextSplitter
28
- from langchain_core.documents import Document
29
  from dotenv import load_dotenv
30
 
31
  load_dotenv()
32
 
33
- # Configure tracing
 
34
  try:
35
- if os.getenv("ARIZE_SPACE_ID") and os.getenv("ARIZE_API_KEY"):
36
- from arize.otel import register
37
- from openinference.instrumentation.google_genai import GoogleGenAIInstrumentor
38
- from openinference.instrumentation.langchain import LangChainInstrumentor
39
-
40
- tracer_provider = register(
41
- space_id=os.getenv("ARIZE_SPACE_ID"),
42
- api_key=os.getenv("ARIZE_API_KEY"),
43
- project_name=os.getenv("ARIZE_PROJECT_NAME", "langgraph-agent-test")
44
- )
45
- GoogleGenAIInstrumentor().instrument(tracer_provider=tracer_provider)
46
- LangChainInstrumentor().instrument(tracer_provider=tracer_provider)
47
- print("Tracing configured with Arize.")
48
- else:
49
- print("Arize tracing skipped: ARIZE_SPACE_ID or ARIZE_API_KEY not set.")
50
- except ImportError:
51
- print("Tracing libraries not installed. Skipping tracing.")
52
- except Exception as e:
53
- print(f"Error configuring tracing: {e}")
54
 
55
  # 1. Define the state
56
  class AgentState(TypedDict):
57
  messages: Annotated[List[BaseMessage], operator.add]
58
 
59
- # Helper to split and save documents to Chroma
60
- def save_to_chroma(docs):
61
- if 'vector_store' in globals() and vector_store and docs:
62
- try:
63
- splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
64
- splits = splitter.split_documents(docs)
65
- if splits:
66
- vector_store.add_documents(splits)
67
- except Exception as e:
68
- print(f"Error saving to Chroma: {e}")
69
-
70
  # 2. Define the tools
71
- @tool
72
- def get_youtube_transcript(url: str) -> str:
73
- """Retrieves the transcript of a YouTube video given its URL."""
74
- try:
75
- loader = YoutubeLoader.from_youtube_url(url, add_video_info=True)
76
- docs = loader.load()
77
- if not docs:
78
- return "No transcript found. Please search Google for the video title or ID."
79
-
80
- # Save to Chroma
81
- save_to_chroma(docs)
82
-
83
- return "\n\n".join([f"Metadata: {d.metadata}\nContent: {d.page_content}" for d in docs])
84
- except Exception as e:
85
- return f"Error getting transcript: {e}. Please try searching Google for the video URL or ID."
86
-
87
- @tool
88
- def calculator(expression: str) -> str:
89
- """Calculates a mathematical expression using Python. Example: '2 + 2', '34 * 5', 'import math; math.sqrt(2)'"""
90
- try:
91
- repl = PythonREPL()
92
- if "print" not in expression:
93
- expression = f"print({expression})"
94
- return repl.run(expression)
95
- except Exception as e:
96
- return f"Error calculating: {e}"
97
-
98
- @tool
99
- def search_wikipedia(query: str) -> str:
100
- """Search Wikipedia for a query. Useful for factual lists and biographies."""
101
- try:
102
- loader = WikipediaLoader(query=query, load_max_docs=3)
103
- docs = loader.load()
104
- # Save to Chroma
105
- save_to_chroma(docs)
106
- return "\n\n".join([d.page_content[:10000] for d in docs])
107
- except Exception as e:
108
- return f"Error searching Wikipedia: {e}"
109
-
110
- # ChromaDB RAG Tool
111
- vector_store = None
112
- try:
113
- embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
114
- vector_store = Chroma(
115
- collection_name="agent_memory",
116
- embedding_function=embeddings,
117
- persist_directory="./chroma_db"
118
- )
119
- except Exception as e:
120
- print(f"Warning: ChromaDB initialization failed. RAG features disabled. Error: {e}")
121
-
122
- @tool
123
- def search_knowledge_base(query: str) -> str:
124
- """Searches for relevant documents in the persistent knowledge base (memory of previous searches)."""
125
- try:
126
- retriever = vector_store.as_retriever()
127
- docs = retriever.invoke(query)
128
- if not docs:
129
- return "No relevant information found."
130
- return "\n".join([d.page_content for d in docs])
131
- except Exception as e:
132
- return f"Error searching knowledge base: {e}"
133
-
134
- @tool
135
- def browse_page(url: str) -> str:
136
- """Browses a web page and extracts text using Playwright. Use this to read content from specific URLs."""
137
- if not create_sync_playwright_browser:
138
- return "Browsing unavailable (Playwright not installed)."
139
- try:
140
- browser = create_sync_playwright_browser(headless=True)
141
- page = browser.new_page()
142
- page.goto(url)
143
- text = page.inner_text("body")
144
- browser.close()
145
- # Save to Chroma
146
- if 'vector_store' in globals() and vector_store:
147
- splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
148
- docs = [Document(page_content=text, metadata={"source": url})]
149
- splits = splitter.split_documents(docs)
150
- vector_store.add_documents(splits)
151
- return text[:10000]
152
- except Exception as e:
153
- return f"Error browsing: {e}"
154
-
155
- @tool
156
- def search_youtube_videos(query: str) -> str:
157
- """Search for YouTube videos. Provide only the search keywords."""
158
- try:
159
- tool = YouTubeSearchTool()
160
- return tool.run(f"{query}, 3")
161
- except Exception as e:
162
- return f"Error searching YouTube: {e}"
163
-
164
- # Combine Tools (Native Google Search is enabled via model param)
165
- # Removed rag_tool/knowledge_base as it was empty -> Adding it back now
166
- tools = [get_youtube_transcript, calculator, search_wikipedia, search_knowledge_base, search_youtube_videos, browse_page]
167
  tool_node = ToolNode(tools)
168
 
169
  # 3. Define the model
170
- LLM = "gemini-2.0-flash"
171
- model = ChatGoogleGenerativeAI(
172
- model=LLM,
173
- temperature=0,
174
- max_retries=5,
175
- google_search_retrieval=True
176
- )
177
  model = model.bind_tools(tools)
178
 
179
  # 4. Define the agent node
180
  def should_continue(state):
181
  messages = state['messages']
182
  last_message = messages[-1]
 
183
  if not last_message.tool_calls:
184
  return "end"
 
185
  else:
186
  return "continue"
187
 
188
  def call_model(state):
189
  messages = state['messages']
190
  response = model.invoke(messages)
 
191
  return {"messages": [response]}
192
 
193
  # 5. Create the graph
194
  workflow = StateGraph(AgentState)
 
 
195
  workflow.add_node("agent", call_model)
196
  workflow.add_node("action", tool_node)
 
 
 
197
  workflow.add_edge(START, "agent")
198
- workflow.add_conditional_edges("agent", should_continue, {"continue": "action", "end": END})
 
 
 
 
 
 
 
 
 
 
 
 
199
  workflow.add_edge("action", "agent")
 
 
 
 
200
  app = workflow.compile()
201
 
 
202
  class LangGraphAgent:
203
  def __init__(self):
204
  self.app = app
205
 
206
- def __call__(self, question: str, task_id: str = None) -> str:
207
- messages = [
208
- SystemMessage(content="""You are a helpful assistant with multimodal capabilities (Vision, Audio, PDF analysis).
209
- Step 1: ALWAYS START by performing a Google Search (or using Wikipedia/YouTube) to gather up-to-date information. Do not answer from memory.
210
- Step 2: If a URL is provided, search for the **EXACT URL** string on Google first to identify the video/page title. Do not add keywords yet. **DO NOT use the 'youtube_search' tool for this step; use Google Search.**
211
- Step 3: Once you have the title, search for that title to find descriptions or summaries.
212
- Step 4: Analyze the information found. If you cannot access a specific page or video directly (e.g. empty transcript), DO NOT GIVE UP. Use Google Search to find descriptions, summaries, or discussions from reliable sources.
213
- Step 5: If you identify relevant Wikipedia pages or YouTube videos, use the specific tools ('search_wikipedia', 'get_youtube_transcript') to ingest them into your Knowledge Base.
214
- Step 6: Reason to find the exact answer. Verify your findings by cross-referencing multiple sources if possible. You can use 'search_knowledge_base' to connect facts you have saved.
215
- Step 7: Output the final answer strictly in this format:
216
- FINAL ANSWER: [ANSWER]
217
-
218
- Do not include "FINAL ANSWER:" in the [ANSWER] part itself.
219
- Example:
220
- Thinking: ...
221
- FINAL ANSWER: 3
222
-
223
- If the question involves an image, video, or audio file provided in the context, analyze it to answer.
224
- """),
225
- ]
226
-
227
- content = []
228
- content.append({"type": "text", "text": question})
229
-
230
- if task_id:
231
- image_url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
232
- try:
233
- # Check headers first
234
- response = requests.head(image_url, timeout=5)
235
- mime_type = response.headers.get("Content-Type", "")
236
-
237
- # Allow images, audio, video, pdf
238
- if response.status_code == 200 and any(t in mime_type for t in ["image/", "audio/", "video/", "application/pdf"]):
239
- # Fetch the file
240
- img_response = requests.get(image_url, timeout=10)
241
- if img_response.status_code == 200:
242
- file_data = base64.b64encode(img_response.content).decode("utf-8")
243
- content.append({
244
- "type": "image_url", # LangChain uses this key for multimodal data URI
245
- "image_url": {"url": f"data:{mime_type};base64,{file_data}"}
246
- })
247
- except Exception as e:
248
- print(f"Error checking/fetching file: {e}")
249
-
250
- messages.append(HumanMessage(content=content))
251
-
252
- inputs = {"messages": messages}
253
  final_state = self.app.invoke(inputs)
254
- result = final_state['messages'][-1].content
255
-
256
- def extract_text(content):
257
- if isinstance(content, str):
258
- return content
259
- if isinstance(content, list):
260
- return " ".join([extract_text(c) for c in content])
261
- if isinstance(content, dict):
262
- return content.get('text', str(content))
263
- return str(content)
264
-
265
- text_result = extract_text(result)
266
- if "FINAL ANSWER:" in text_result:
267
- return text_result.split("FINAL ANSWER:")[-1].strip()
268
- return text_result
269
-
 
1
  from typing import TypedDict, Annotated, List
2
  import operator
3
  import os
 
 
4
  from langchain_google_genai import ChatGoogleGenerativeAI
5
+ from langchain_core.messages import BaseMessage, HumanMessage
6
  from langgraph.graph import StateGraph, END, START
7
  from langgraph.prebuilt import ToolNode
8
+ from langchain_tavily import TavilySearch
9
+ import google.auth
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  from dotenv import load_dotenv
11
 
12
  load_dotenv()
13
 
14
+
15
+ # Set up Google credentials
16
  try:
17
+ _, project_id = google.auth.default()
18
+ os.environ["GOOGLE_CLOUD_PROJECT"] = project_id
19
+ os.environ["GOOGLE_CLOUD_LOCATION"] = "global"
20
+ os.environ["GOOGLE_GENAI_USE_VERTEXAI"] = "True"
21
+ except google.auth.exceptions.DefaultCredentialsError:
22
+ print("Google Cloud credentials not found. Please configure your credentials.")
23
+ # You might want to fall back to an API key or raise an exception here
24
+ # For this example, we'll proceed, but it will likely fail if not configured
25
+ pass
26
+
 
 
 
 
 
 
 
 
 
27
 
28
  # 1. Define the state
29
  class AgentState(TypedDict):
30
  messages: Annotated[List[BaseMessage], operator.add]
31
 
 
 
 
 
 
 
 
 
 
 
 
32
  # 2. Define the tools
33
+ tools = [TavilySearch(max_results=1)]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  tool_node = ToolNode(tools)
35
 
36
  # 3. Define the model
37
+ LLM = "gemini-1.5-flash"
38
+ model = ChatGoogleGenerativeAI(model=LLM, temperature=0)
 
 
 
 
 
39
  model = model.bind_tools(tools)
40
 
41
  # 4. Define the agent node
42
  def should_continue(state):
43
  messages = state['messages']
44
  last_message = messages[-1]
45
+ # If there are no tool calls, then we finish
46
  if not last_message.tool_calls:
47
  return "end"
48
+ # Otherwise if there are tool calls, we continue
49
  else:
50
  return "continue"
51
 
52
  def call_model(state):
53
  messages = state['messages']
54
  response = model.invoke(messages)
55
+ # We return a list, because this will get added to the existing list
56
  return {"messages": [response]}
57
 
58
  # 5. Create the graph
59
  workflow = StateGraph(AgentState)
60
+
61
+ # Define the two nodes we will cycle between
62
  workflow.add_node("agent", call_model)
63
  workflow.add_node("action", tool_node)
64
+
65
+ # Set the entrypoint as `agent`
66
+ # This means that this node is the first one called
67
  workflow.add_edge(START, "agent")
68
+
69
+ # We now add a conditional edge
70
+ workflow.add_conditional_edges(
71
+ "agent",
72
+ should_continue,
73
+ {
74
+ "continue": "action",
75
+ "end": END,
76
+ },
77
+ )
78
+
79
+ # We now add a normal edge from `tools` to `agent`.
80
+ # This means that after `tools` is called, `agent` node is called next.
81
  workflow.add_edge("action", "agent")
82
+
83
+ # Finally, we compile it!
84
+ # This compiles it into a LangChain Runnable,
85
+ # meaning you can use it as you would any other runnable
86
  app = workflow.compile()
87
 
88
+
89
  class LangGraphAgent:
90
  def __init__(self):
91
  self.app = app
92
 
93
+ def __call__(self, question: str) -> str:
94
+ inputs = {"messages": [HumanMessage(content=question)]}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  final_state = self.app.invoke(inputs)
96
+ return final_state['messages'][-1].content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -2,30 +2,16 @@ import os
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
5
- import time
6
- import subprocess
7
- import sys
8
  from dotenv import load_dotenv
9
  from agent import LangGraphAgent
10
 
11
  load_dotenv()
12
 
13
- def install_playwright():
14
- try:
15
- subprocess.run(["playwright", "--version"], check=True)
16
- except (subprocess.CalledProcessError, FileNotFoundError):
17
- print("Installing Playwright browsers...")
18
- try:
19
- subprocess.run([sys.executable, "-m", "playwright", "install", "chromium"], check=True)
20
- print("Playwright browsers installed.")
21
- except Exception as e:
22
- print(f"Failed to install Playwright browsers: {e}")
23
-
24
  # (Keep Constants as is)
25
  # --- Constants ---
26
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
27
 
28
- def run_and_submit_all(profile: gr.OAuthProfile | None, *args):
29
  """
30
  Fetches all questions, runs the SimpleAgent on them, submits all answers,
31
  and displays the results.
@@ -89,18 +75,15 @@ def run_and_submit_all(profile: gr.OAuthProfile | None, *args):
89
  answers_payload = []
90
  print(f"Running agent on {len(questions_data)} questions...")
91
  for item in questions_data:
92
- time.sleep(2) # Rate limit to avoid 429 errors
93
  task_id = item.get("task_id")
94
  question_text = item.get("question")
95
  if not task_id or question_text is None:
96
  print(f"Skipping item with missing task_id or question: {item}")
97
  continue
98
  try:
99
- submitted_answer = agent(question_text, task_id=task_id)
100
- # Clean answer if agent included "FINAL ANSWER:"
101
- clean_answer = submitted_answer.replace("FINAL ANSWER:", "").strip()
102
- answers_payload.append({"task_id": task_id, "submitted_answer": clean_answer})
103
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer}) # Log original
104
  except Exception as e:
105
  print(f"Error running agent on task {task_id}: {e}")
106
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
@@ -217,52 +200,7 @@ with gr.Blocks() as demo:
217
  outputs=[answer_textbox]
218
  )
219
 
220
- def export_results(df):
221
- if df is None or df.empty:
222
- return None
223
- file_path = "results.txt"
224
- with open(file_path, "w", encoding="utf-8") as f:
225
- for _, row in df.iterrows():
226
- f.write(f"Task ID: {row.get('Task ID', 'N/A')}\n")
227
- f.write(f"Question: {row.get('Question', 'N/A')}\n")
228
- f.write(f"Answer: {row.get('Submitted Answer', 'N/A')}\n")
229
- f.write("-" * 40 + "\n")
230
- return file_path
231
-
232
- gr.Markdown("---")
233
- gr.Markdown("## Tools")
234
- export_button = gr.Button("Export Results to Text")
235
- file_output = gr.File(label="Download Results")
236
-
237
- export_button.click(
238
- fn=export_results,
239
- inputs=[results_table],
240
- outputs=[file_output]
241
- )
242
-
243
- with gr.Tab("Diagnostics"):
244
- gr.Markdown("### Check Playwright")
245
- pw_btn = gr.Button("Test Playwright")
246
- pw_out = gr.Textbox(label="Result")
247
-
248
- def test_playwright_btn():
249
- try:
250
- from langchain_community.tools.playwright.utils import create_sync_playwright_browser
251
- browser = create_sync_playwright_browser(headless=True)
252
- page = browser.new_page()
253
- page.goto("https://example.com")
254
- t = page.title()
255
- browser.close()
256
- return f"Success! Title: {t}"
257
- except ImportError:
258
- return "Playwright not installed/importable."
259
- except Exception as e:
260
- return f"Playwright Failed: {e}"
261
-
262
- pw_btn.click(test_playwright_btn, outputs=pw_out)
263
-
264
  if __name__ == "__main__":
265
- install_playwright()
266
  print("\n" + "-"*30 + " App Starting " + "-"*30)
267
  # Check for SPACE_HOST and SPACE_ID at startup for information
268
  space_host_startup = os.getenv("SPACE_HOST")
 
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
 
 
 
5
  from dotenv import load_dotenv
6
  from agent import LangGraphAgent
7
 
8
  load_dotenv()
9
 
 
 
 
 
 
 
 
 
 
 
 
10
  # (Keep Constants as is)
11
  # --- Constants ---
12
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
13
 
14
+ def run_and_submit_all( profile: gr.OAuthProfile | None):
15
  """
16
  Fetches all questions, runs the SimpleAgent on them, submits all answers,
17
  and displays the results.
 
75
  answers_payload = []
76
  print(f"Running agent on {len(questions_data)} questions...")
77
  for item in questions_data:
 
78
  task_id = item.get("task_id")
79
  question_text = item.get("question")
80
  if not task_id or question_text is None:
81
  print(f"Skipping item with missing task_id or question: {item}")
82
  continue
83
  try:
84
+ submitted_answer = agent(question_text)
85
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer.strip()})
86
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
87
  except Exception as e:
88
  print(f"Error running agent on task {task_id}: {e}")
89
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
200
  outputs=[answer_textbox]
201
  )
202
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
  if __name__ == "__main__":
 
204
  print("\n" + "-"*30 + " App Starting " + "-"*30)
205
  # Check for SPACE_HOST and SPACE_ID at startup for information
206
  space_host_startup = os.getenv("SPACE_HOST")
requirements.txt CHANGED
@@ -8,22 +8,4 @@ tavily-python
8
  langchain-google-genai
9
  google-auth
10
  langchain-tavily
11
- google-cloud-aiplatform
12
- youtube-transcript-api
13
- arize-otel
14
- openinference-instrumentation-google-genai
15
- openinference-instrumentation-langchain
16
- wikipedia
17
- langchain-experimental
18
- arxiv
19
- xmltodict
20
- chromadb
21
- langchain-chroma
22
- langchain-huggingface
23
- youtube-search
24
- sentence-transformers
25
- playwright
26
- lxml
27
- pytubefix
28
- pandas
29
- openpyxl
 
8
  langchain-google-genai
9
  google-auth
10
  langchain-tavily
11
+ google-cloud-aiplatform