Spaces:
Sleeping
Sleeping
| import base64 | |
| import re | |
| from dotenv import load_dotenv | |
| import requests | |
| # langchain imports | |
| from langchain_core.tools import Tool, tool | |
| from langchain_core.messages import HumanMessage | |
| from langgraph.prebuilt import create_react_agent | |
| #from langgraph.graph import START, StateGraph | |
| #from langgraph.prebuilt import tools_condition | |
| from langchain_community.tools import DuckDuckGoSearchRun | |
| from langchain_community.retrievers import WikipediaRetriever | |
| from langchain_experimental.utilities import PythonREPL | |
| from langchain_google_genai import ChatGoogleGenerativeAI | |
| #from langchain_ollama import ChatOllama | |
| from langfuse.langchain import CallbackHandler | |
| # tool imports | |
| import pandas as pd | |
| import whisper | |
| from youtube_transcript_api import YouTubeTranscriptApi | |
| load_dotenv() | |
| langfuse_handler = CallbackHandler() | |
| # --- LLM --- | |
| #llm = ChatOllama(model="qwen3:8b", temperature=0) | |
| llm = ChatGoogleGenerativeAI(model='gemini-2.5-flash', temperature=0) | |
| # --- System Prompt --- | |
| with open('system_prompt.txt', 'r', encoding='utf-8') as f: | |
| system_prompt = f.read() | |
| # --- Tools --- | |
| # python REPL tool | |
| python_repl = PythonREPL() | |
| execute_python = Tool( | |
| name="execute_python", | |
| description="A Python shell. Use this tool to execute python commands. " | |
| "Input should be valid python code. " | |
| "If you want to see the output of a value, you should print it out with `print(...)`.", | |
| func=python_repl.run, | |
| ) | |
| def get_youtube_transcript(url: str) -> str: | |
| """ | |
| Retrieve the text transcript of a YouTube video | |
| Args: | |
| url (str): link to the YouTube video | |
| Returns: | |
| str: text transcript | |
| """ | |
| def extract_video_id(url: str) -> str: | |
| # extracts video id from youtube url | |
| patterns = [ | |
| r"v=([a-zA-Z0-9_-]{11})", # regular link | |
| r"youtu\.be/([a-zA-Z0-9_-]{11})", # shortened link | |
| r"youtube\.com/embed/([a-zA-Z0-9_-]{11})", # embed link | |
| ] | |
| for pattern in patterns: | |
| match = re.search(pattern, url) | |
| if match: | |
| return match.group(1) | |
| raise ValueError("Invalid YouTube URL") | |
| try: | |
| video_id = extract_video_id(url) | |
| api = YouTubeTranscriptApi() | |
| transcript = api.fetch(video_id) | |
| txt = '\n'.join([s.text for s in transcript.snippets]) | |
| return txt | |
| except Exception as e: | |
| return f"An error occured using get_youtube_transcript tool: {e}" | |
| def reverse_string(text: str) -> str: | |
| """ | |
| A tool to reverse the order of characters in a text string | |
| Args: | |
| text (str): text string to reverse | |
| Returns: | |
| str: reversed text string | |
| """ | |
| try: | |
| return text[::-1] | |
| except Exception as e: | |
| return f"An error occured using reverse_string tool: {e}" | |
| def search_web(query: str) -> str: | |
| """ | |
| A tool to perform a search for a query using the web | |
| Args: | |
| query (str): query to search on the web | |
| Returns: | |
| str: web search result | |
| """ | |
| try: | |
| search = DuckDuckGoSearchRun() | |
| return search.invoke(query) | |
| except Exception as e: | |
| return f"An error occured using search_web tool: {e}" | |
| def search_wikipedia(query: str) -> str: | |
| """ | |
| A tool to perform a search for a query using Wikipedia | |
| Args: | |
| query (str): query to search on Wikipedia | |
| Returns: | |
| str: wikipedia search result | |
| """ | |
| try: | |
| retriever = WikipediaRetriever() | |
| return retriever.invoke(query) | |
| except Exception as e: | |
| return f"An error occured using search_wiki tool: {e}" | |
| def transcribe_audio(url: str) -> str: | |
| """ | |
| A tool to transcribe an audio file (.mp3) using an automatic speech recognition model | |
| Args: | |
| url (str): link to audio file (.mp3) | |
| Returns: | |
| str: transcript of the audio file | |
| """ | |
| try: | |
| # fetch audio file | |
| response = requests.get(url) | |
| response.raise_for_status() | |
| tmp = 'tmp_audio.mp3' | |
| with open(tmp, "wb") as f: | |
| f.write(response.content) | |
| # transcribe | |
| model = whisper.load_model('tiny') | |
| result = model.transcribe(tmp) | |
| return result['text'] | |
| except Exception as e: | |
| return f"An error occured using transcribe_audio tool: {e}" | |
| def view_png_file(url: str) -> str: | |
| """ | |
| A tool to view the contents of an image file (.png) | |
| Args: | |
| url (str): link to image file (.png) | |
| Returns: | |
| str: image contents | |
| """ | |
| try: | |
| # fetch the image | |
| response = requests.get(url) | |
| response.raise_for_status() | |
| # convert image bytes to base64 | |
| image = base64.b64encode(response.content).decode('utf-8') | |
| # text + image artifact | |
| return ( | |
| "Here is the image.", | |
| [{ | |
| "type": "image", | |
| "source": { | |
| "type": "url", | |
| "url": image, | |
| } | |
| }] | |
| ) | |
| except Exception as e: | |
| return f"An error occured using view_png_file tool: {e}" | |
| def view_py_file(url: str) -> str: | |
| """ | |
| A tool to view the contents of a python file (.py) | |
| Args: | |
| url (str): link to python file (.py) | |
| Returns: | |
| str: contents of python file | |
| """ | |
| try: | |
| # fetch python file | |
| response = requests.get(url) | |
| response.raise_for_status() | |
| return response.text | |
| except Exception as e: | |
| return f"An error occured using view_py_file tool: {e}" | |
| def view_xlsx_file(url: str) -> str: | |
| """ | |
| A tool to view the contents of an excel file (.xlsx) | |
| Args: | |
| url (str): link to excel file (.xlsx) | |
| Returns: | |
| str: contents of excel file | |
| """ | |
| try: | |
| # fetch python file | |
| response = requests.get(url) | |
| response.raise_for_status() | |
| tmp = 'tmp.xlsx' | |
| with open(tmp, "wb") as f: | |
| f.write(response.content) | |
| data = pd.read_excel('tmp.xlsx') | |
| return data.to_string() | |
| except Exception as e: | |
| return f"An error occured using view_xlsx_file tool: {e}" | |
| # agent toolkit | |
| tools = [ | |
| execute_python, | |
| get_youtube_transcript, | |
| reverse_string, | |
| search_web, search_wikipedia, | |
| transcribe_audio, | |
| view_png_file, view_py_file, view_xlsx_file | |
| ] | |
| # --- LangGraph --- | |
| agent = create_react_agent( | |
| model=llm, | |
| tools=tools, | |
| prompt=system_prompt | |
| ) | |
| class GAIAAgent: | |
| def __init__(self): | |
| print("GAIAAgent initialized.") | |
| def __call__(self, question: str) -> str: | |
| print(f"Agent received question: {question}") | |
| messages = agent.invoke( | |
| {"messages": [ | |
| #SystemMessage(content=system), | |
| HumanMessage(content=question) | |
| ]}, | |
| config={ | |
| "callbacks": [langfuse_handler], | |
| "recursion_limit": 10 | |
| } | |
| ) | |
| # extract answer | |
| final_message = messages['messages'][-1].content | |
| match = re.search(r"(?<=FINAL ANSWER:\s).*", final_message) | |
| if match: | |
| final_answer = match.group(0) | |
| else: | |
| final_answer = final_message | |
| print(f"Agent returning answer: {final_answer}") | |
| return final_answer | |