Spaces:
Sleeping
Sleeping
| import os | |
| from dotenv import load_dotenv | |
| from typing import TypedDict, List, Dict, Any, Optional | |
| from urllib.parse import urlparse | |
| from langgraph.graph import StateGraph, START, END, MessagesState | |
| from langchain.agents import create_tool_calling_agent, ConversationalAgent, AgentExecutor, initialize_agent, create_react_agent | |
| from langchain_google_genai import ChatGoogleGenerativeAI | |
| from langchain_groq import ChatGroq | |
| from langchain_core.tools import tool, Tool | |
| from langchain_core.messages import HumanMessage, SystemMessage | |
| from langchain.memory import ConversationBufferMemory | |
| from langchain_core.prompts import ChatPromptTemplate, PromptTemplate | |
| from langgraph.prebuilt import ToolNode | |
| from langgraph.prebuilt import tools_condition | |
| # 1. Web Browsing | |
| from langchain_community.tools import DuckDuckGoSearchResults | |
| from langchain_community.document_loaders import ImageCaptionLoader | |
| import requests, time, yt_dlp | |
| import pandas as pd | |
| from pathlib import Path | |
| from bs4 import BeautifulSoup | |
| from langchain_community.tools import WikipediaQueryRun | |
| from langchain_community.utilities import WikipediaAPIWrapper, DuckDuckGoSearchAPIWrapper | |
| from langchain_community.document_loaders import YoutubeLoader | |
| from langchain_community.document_loaders import UnstructuredExcelLoader | |
| from langchain_community.document_loaders import AssemblyAIAudioTranscriptLoader | |
| from langchain.text_splitter import CharacterTextSplitter | |
| from langchain_community.utilities import GoogleSerperAPIWrapper | |
| load_dotenv() | |
| DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" | |
| def duckduck_websearch(query: str) -> str: | |
| """Allows search through DuckDuckGo. | |
| Args: | |
| query: what you want to search | |
| """ | |
| try: | |
| # search = DuckDuckGoSearchResults() | |
| # results = search.invoke(query) | |
| search = search = DuckDuckGoSearchAPIWrapper(max_results=5) | |
| results = search.run(query) | |
| if not results or results.strip() == "": | |
| return "No search results found." | |
| return results | |
| except Exception as e: | |
| print(str(e)) | |
| print('Try to use request method for duckcudckgo Search') | |
| base_url = "https://html.duckduckgo.com/html" | |
| params = {"q": query} | |
| response = requests.get(base_url, params=params, timeout=10) | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| for result in soup.find_all('div', {'class': 'result'}): | |
| title = result.find('a', {'class': 'result__a'}) | |
| snippet = result.find('a', {'class': 'result__snippet'}) | |
| if title and snippet: | |
| results.append({ | |
| 'title': title.get_text(), | |
| 'snippet': snippet.get_text(), | |
| 'url': title.get('href') | |
| }) | |
| # Format results | |
| formatted_results = [] | |
| for r in results[:10]: # Limit to top 5 results | |
| formatted_results.append(f"[{r['title']}]({r['url']})\n{r['snippet']}\n") | |
| return "## Search Results\n\n" + "\n".join(formatted_results) | |
| def serper_websearch(query: str) -> str: | |
| """Allows search through Serper. | |
| Args: | |
| query: what you want to search | |
| """ | |
| search = GoogleSerperAPIWrapper(serper_api_key=os.getenv("SERPER_API_KEY")) | |
| results = search.run(query) | |
| return results | |
| def visit_webpage(url: str) -> str: | |
| """Fetches raw HTML content of a web page. | |
| Args: | |
| url: the webpage url | |
| """ | |
| try: | |
| response = requests.get(url, timeout=5) | |
| return response.text[:5000] | |
| except Exception as e: | |
| return f"[ERROR fetching {url}]: {str(e)}" | |
| def wiki_search(query: str) -> str: | |
| """Wiki search tools. | |
| Args: | |
| query: what you want to wiki | |
| """ | |
| api_wrapper = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=100) | |
| wikipediatool = WikipediaQueryRun(api_wrapper=api_wrapper) | |
| return wikipediatool.run({"query": query}) | |
| def text_splitter(text: str) -> List[str]: | |
| """Splits text into chunks using LangChain's CharacterTextSplitter. | |
| Args: | |
| text: A string of text to split. | |
| """ | |
| splitter = CharacterTextSplitter(chunk_size=450, chunk_overlap=10) | |
| return splitter.split_text(text) | |
| def youtube_transcript(video_url: str) -> str: | |
| """Fetched youtube transcript | |
| Args: | |
| video_url: YouTube video url | |
| """ | |
| try: | |
| loader = YoutubeLoader.from_youtube_url(video_url) | |
| # video_id = video_url.split("v=")[-1].split("&")[0] | |
| # transcript = YouTubeTranscriptApi.get_transcript(video_id) | |
| return loader.load() | |
| except Exception as e: | |
| return f"Error fetching transcript: {str(e)}" | |
| # 4. File Reading | |
| def read_file(task_id: str) -> str: | |
| """First download the file, then read its content | |
| Args: | |
| dir: the task_id | |
| """ | |
| file_url = f'{DEFAULT_API_URL}/files/{task_id}' | |
| r = requests.get(file_url, timeout=15, allow_redirects=True) | |
| with open('temp', "wb") as fp: | |
| fp.write(r.content) | |
| with open('temp') as f: | |
| return f.read() | |
| def excel_read(task_id: str) -> str: | |
| """First download the excel file, then read its content | |
| Args: | |
| dir: the task_id | |
| """ | |
| try: | |
| file_url = f'{DEFAULT_API_URL}/files/{task_id}' | |
| r = requests.get(file_url, timeout=15, allow_redirects=True) | |
| with open('temp.xlsx', "wb") as fp: | |
| fp.write(r.content) | |
| # Read the Excel file | |
| df = pd.read_excel('temp.xlsx') | |
| # Run various analyses based on the query | |
| result = ( | |
| f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns.\n" | |
| ) | |
| result += f"Columns: {', '.join(df.columns)}\n\n" | |
| # Add summary statistics | |
| result += "Summary statistics:\n" | |
| result += str(df.describe()) | |
| return result | |
| except Exception as e: | |
| return f"Error analyzing Excel file: {str(e)}" | |
| def csv_read(task_id: str) -> str: | |
| """First download the csv file, then read its content | |
| Args: | |
| dir: the task_id | |
| """ | |
| try: | |
| file_url = f'{DEFAULT_API_URL}/files/{task_id}' | |
| r = requests.get(file_url, timeout=15, allow_redirects=True) | |
| with open('temp.csv', "wb") as fp: | |
| fp.write(r.content) | |
| # Read the CSV file | |
| df = pd.read_csv(temp.csv) | |
| # Run various analyses based on the query | |
| result = ( | |
| f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns.\n" | |
| ) | |
| result += f"Columns: {', '.join(df.columns)}\n\n" | |
| # Add summary statistics | |
| result += "Summary statistics:\n" | |
| result += str(df.describe()) | |
| return result | |
| except Exception as e: | |
| return f"Error analyzing CSV file: {str(e)}" | |
| def mp3_listen(task_id: str) -> str: | |
| """First download the mp3 file, then listen to it | |
| Args: | |
| dir: the task_id | |
| """ | |
| file_url = f'{DEFAULT_API_URL}/files/{task_id}' | |
| r = requests.get(file_url, timeout=15, allow_redirects=True) | |
| with open('temp.mp3', "wb") as fp: | |
| fp.write(r.content) | |
| loader = AssemblyAIAudioTranscriptLoader(file_path="temp.mp3", api_key=os.getenv("AssemblyAI_API_KEY")) | |
| docs = loader.load() | |
| contents = [doc.page_content for doc in docs] | |
| return "\n".join(contents) | |
| # 5. Image Open | |
| def image_caption(dir: str) -> str: | |
| """Understand the content of the provided image | |
| Args: | |
| dir: the image url link | |
| """ | |
| loader = ImageCaptionLoader(images=[dir]) | |
| metadata = loader.load() | |
| return metadata[0].page_content | |
| # 2. Coding | |
| from langchain_experimental.tools import PythonREPLTool | |
| def run_python(code: str): | |
| """ Run the given python code | |
| Args: | |
| code: the python code | |
| """ | |
| return PythonREPLTool().run(code) | |
| def multiply(a: float, b: float) -> float: | |
| """Multiply two numbers. | |
| Args: | |
| a: first float | |
| b: second float | |
| """ | |
| return a * b | |
| def add(a: float, b: float) -> float: | |
| """Add two numbers. | |
| Args: | |
| a: first float | |
| b: second float | |
| """ | |
| return a + b | |
| def subtract(a: float, b: float) -> float: | |
| """Subtract two numbers. | |
| Args: | |
| a: first float | |
| b: second float | |
| """ | |
| return a - b | |
| def divide(a: float, b: float) -> float: | |
| """Divide two numbers. | |
| Args: | |
| a: first float | |
| b: second float | |
| """ | |
| if b == 0: | |
| raise ValueError("Cannot divide by zero.") | |
| return a / b | |
| # 3. Multi-Modality | |
| # - multiply: multiply two numbers, A and B | |
| # - add: add two numbers, A and B | |
| # - subtract: Subtract A by B with passing A as the first argument | |
| # - divide: Divide A by B with passing A as the first argument | |
| # ("human", f"Question: {question}\nReport to validate: {final_answer}") | |
| class BasicAgent: | |
| def __init__(self): | |
| self.model = ChatGoogleGenerativeAI( | |
| model="gemini-2.0-flash-lite", | |
| temperature=0, | |
| max_tokens=1024, | |
| candidate_count=1, | |
| google_api_key=os.getenv("GEMINI_API_KEY"), | |
| ) | |
| # System Prompt for few shot prompting | |
| self.sys_prompt = """" | |
| You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: | |
| FINAL ANSWER: [YOUR FINAL ANSWER]. | |
| YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separared list of numbers and/or strings. | |
| If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. | |
| If you are asked for a string, don't use articles, neither abbreviations (eg. for cities), and write the digits in plain text unless specified otherwise. | |
| If you are asked for a comma separated list, apply the above rules depending of whether the element to put in the list is a number or a string. | |
| You have access to the following tools: | |
| - serper_websearch: web search the content of the query by passing the query as input with Serper Search Engine | |
| - duckduck_websearch: web search the content of the query by passing the query as input with DuckDuckGo Search Engine | |
| - visit_webpage: visit the given webpage url by passing the url as input | |
| - wiki_search: wiki search the content of the query by passing the query as input if the question asks for wiki search it | |
| - text_splitter: split text into chunks | |
| - youtube_transcript: fetch the transcript of the Youtube video by passing the video url as input if the question asks for watching a Youtube video | |
| - read_file: read the content of the attached file by passing the TASK-ID as input | |
| - excel_read: read the content of the attached excel file by passing the TASK-ID as input | |
| - csv_read: read the content of the attached csv file by passing the TASK-ID as input | |
| - mp3_listen: listen to the content of the attached mp3 file by passing the TASK-ID as input | |
| - image_caption: understand the visual content of the attached image by passing the TASK-ID as input | |
| - run_python: run the python code | |
| If Task ID is included in the question, remember to call the relevant read tools [ie. read_file, excel_read, csv_read, mp3_listen, image_caption] | |
| Note: python_tool is called when the question mentions the term "Python" or any math calculation. | |
| """ | |
| # self.tools = [duckduck_websearch, serper_websearch, visit_webpage, wiki_search, text_splitter, self._analyze_video, youtube_transcript, read_file, excel_read, csv_read, mp3_listen, image_caption, run_python] | |
| self.tools = [ | |
| Tool( | |
| name="duckduck_websearch", | |
| func=duckduck_websearch, | |
| description="Search the web for information with DuckDuckGo" | |
| ), | |
| Tool( | |
| name="serper_websearch", | |
| func=serper_websearch, | |
| description="Search the web for information with Serper" | |
| ), | |
| Tool( | |
| name="visit_webpage", | |
| func=visit_webpage, | |
| description="Directly visit the webpage" | |
| ), | |
| Tool( | |
| name="wiki_search", | |
| func=wiki_search, | |
| description="Search the information on Wikipedia" | |
| ), | |
| Tool( | |
| name="text_splitter", | |
| func=text_splitter, | |
| description="Split text into chunks" | |
| ), | |
| Tool( | |
| name="analyze_video", | |
| func=self._analyze_video, | |
| description="Analyze YouTube video content directly" | |
| ), | |
| Tool( | |
| name="youtube_transcript", | |
| func=youtube_transcript, | |
| description="Fetch the transcript of YouTube video" | |
| ), | |
| Tool( | |
| name="read_file", | |
| func=read_file, | |
| description="Read the file content" | |
| ), | |
| Tool( | |
| name="excel_read", | |
| func=excel_read, | |
| description="Read the content of Excel file" | |
| ), | |
| Tool( | |
| name="csv_read", | |
| func=csv_read, | |
| description="Read the content of CSV file" | |
| ), | |
| Tool( | |
| name='mp3_listen', | |
| func=mp3_listen, | |
| description="Listen to the MP3 file" | |
| ), | |
| Tool( | |
| name="image_caption", | |
| func=image_caption, | |
| description="Understand the image content" | |
| ), | |
| Tool( | |
| name="run_python", | |
| func=run_python, | |
| description="Run Python code" | |
| ) | |
| ] | |
| # Setup memory | |
| self.memory = ConversationBufferMemory( | |
| memory_key="chat_history", | |
| return_messages=True | |
| ) | |
| self.agent = self.__setup_agent__() | |
| # self.prompt = ChatPromptTemplate.from_messages([ | |
| # ("system", self.sys_prompt), | |
| # ("human", "{input}") | |
| # ]) | |
| # self.agent = initialize_agent( | |
| # tools=self.tools, | |
| # llm=self.model, | |
| # agent="zero-shot-react-description", # ReAct agent type | |
| # verbose=True, | |
| # system_prompt=self.prompt, | |
| # handle_parsing_errors=True, | |
| # max_iterations=30 | |
| # # "Check your output and make sure it conforms, use the Action/Action Input syntax" | |
| # ) | |
| print("BasicAgent initialized.") | |
| def __call__(self, task: dict) -> str: | |
| task_id, question, file_name = task["task_id"], task["question"], task["file_name"] | |
| print(f"Agent received question (first 50 chars): {question[:50]}...") | |
| if file_name == "" or file_name is None: | |
| question = question | |
| else: | |
| question = f"{question} with TASK-ID: {task_id}" | |
| # fixed_answer = self.agent.run(f'{question} with TASK-ID: {task_id}') | |
| fixed_answer = "This is a default answer." | |
| max_retries = 5 | |
| base_sleep = 1 | |
| for attempt in range(max_retries): | |
| try: | |
| fixed_answer = self.agent.run(question) | |
| print(f"Agent returning fixed answer: {fixed_answer}") | |
| time.sleep(60) | |
| return fixed_answer | |
| except Exception as e: | |
| sleep_time = base_sleep * (attempt + 1) | |
| if attempt < max_retries - 1: | |
| print(str(e)) | |
| print(f"Attempt {attempt + 1} failed. Retrying in {sleep_time} seconds...") | |
| time.sleep(sleep_time) | |
| continue | |
| return f"Error processing query after {max_retries} attempts: {str(e)}" | |
| return fixed_answer | |
| def _analyze_video(self, url: str) -> str: | |
| """Analyze video content using Gemini's video understanding capabilities.""" | |
| try: | |
| # Validate URL | |
| parsed_url = urlparse(url) | |
| if not all([parsed_url.scheme, parsed_url.netloc]): | |
| return "Please provide a valid video URL with http:// or https:// prefix." | |
| # Check if it's a YouTube URL | |
| if 'youtube.com' not in url and 'youtu.be' not in url: | |
| return "Only YouTube videos are supported at this time." | |
| try: | |
| # Configure yt-dlp with minimal extraction | |
| ydl_opts = { | |
| 'quiet': True, | |
| 'no_warnings': True, | |
| 'extract_flat': True, | |
| 'no_playlist': True, | |
| 'youtube_include_dash_manifest': False | |
| } | |
| with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
| try: | |
| # Try basic info extraction | |
| info = ydl.extract_info(url, download=False, process=False) | |
| if not info: | |
| return "Could not extract video information." | |
| title = info.get('title', 'Unknown') | |
| description = info.get('description', '') | |
| # Create a detailed prompt with available metadata | |
| prompt = f"""Please analyze this YouTube video: | |
| Title: {title} | |
| URL: {url} | |
| Description: {description} | |
| Please provide a detailed analysis focusing on: | |
| 1. Main topic and key points from the title and description | |
| 2. Expected visual elements and scenes | |
| 3. Overall message or purpose | |
| 4. Target audience""" | |
| # Use the LLM with proper message format | |
| messages = [HumanMessage(content=prompt)] | |
| response = self.model.invoke(messages) | |
| return response.content if hasattr(response, 'content') else str(response) | |
| except Exception as e: | |
| if 'Sign in to confirm' in str(e): | |
| return "This video requires age verification or sign-in. Please provide a different video URL." | |
| return f"Error accessing video: {str(e)}" | |
| except Exception as e: | |
| return f"Error extracting video info: {str(e)}" | |
| except Exception as e: | |
| return f"Error analyzing video: {str(e)}" | |
| def __setup_agent__(self) -> AgentExecutor: | |
| PREFIX = """ | |
| You are a general AI assistant that can use various tools to answer question. I will ask you a question. Report your thoughts, and finish your answer with the following template: | |
| FINAL ANSWER: [YOUR FINAL ANSWER]. | |
| YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separared list of numbers and/or strings. | |
| If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. | |
| If you are asked for a string, don't use articles, neither abbreviations (eg. for cities), and write the digits in plain text unless specified otherwise. | |
| If you are asked for a comma separated list, apply the above rules depending of whether the element to put in the list is a number or a string. | |
| NOTE: | |
| - If Task ID is included in the question, remember to call the relevant read tools [ie. read_file, excel_read, csv_read, mp3_listen, image_caption] | |
| - python_tool is called when the question mentions the term "Python" or any math calculation. | |
| """ | |
| FORMAT_INSTRUCTIONS = """ | |
| To use a tool, use the following format: | |
| Thought: Do I need to use a tool? Yes | |
| Action: the action to take, should be one of [{tool_names}] | |
| Action Input: the input to the action | |
| Observation: the result of the action | |
| When you have a response to say to the Human, or if you do not need to use a tool, you MUST use the format: | |
| Thought: Do I need to use a tool? No | |
| Final Answer: [your response here] | |
| Begin! Remember to ALWAYS include 'Thought:', 'Action:', 'Action Input:', and 'Final Answer:' in your responses. | |
| """ | |
| SUFFIX = """ | |
| Previous conversation history: | |
| {chat_history} | |
| New question: {input} | |
| {agent_scratchpad} | |
| """ | |
| agent = ConversationalAgent.from_llm_and_tools( | |
| llm=self.model, | |
| tools=self.tools, | |
| prefix=PREFIX, | |
| format_instructions=FORMAT_INSTRUCTIONS, | |
| suffix=SUFFIX, | |
| input_variables=["input", "chat_history", "agent_scratchpad", "tool_names"], | |
| handle_parsing_errors=True | |
| ) | |
| return AgentExecutor.from_agent_and_tools( | |
| agent=agent, | |
| tools=self.tools, | |
| memory=self.memory, | |
| max_iterations=30, | |
| verbose=True, | |
| handle_parsing_errors=True, | |
| # return_only_outputs=True # This ensures we only get the final output | |
| ) | |