General_AI_Assistant_GAIA

Sleeping

App Files Files Community

laverdes commited on Jun 20, 2025

Commit

a60e9fe

verified ·

1 Parent(s): a21e3ef

feat: smart_read_file, extract clean text, extra tools

Browse files

Files changed (1) hide show

tools.py +178 -62

tools.py CHANGED Viewed

@@ -3,12 +3,14 @@ import base64
 import json
 import inspect
 import time
-from typing import Callable
 from datetime import datetime, timezone
 from langchain.tools import tool
 from langchain_community.tools.tavily_search import TavilySearchResults
 from langchain_core.messages import HumanMessage
 from langchain_google_genai.chat_models import ChatGoogleGenerativeAIError
@@ -21,6 +23,10 @@ from langchain_google_community import SpeechToTextLoader
 from langchain_community.tools import YouTubeSearchTool
 from youtube_transcript_api import YouTubeTranscriptApi
 from langchain_community.tools.file_management.read import ReadFileTool
 from basic_agent import print_conversation
@@ -115,28 +121,94 @@ def search_and_extract(query: str) -> list[dict]:
     return structured_results
-youtube_search_api = YouTubeSearchTool()
 @tool
-def youtube_search_tool(query: str, number_of_results:int=3) -> list:
-    """Search YouTube for a query and return the top number_of_results."""
     if CUSTOM_DEBUG:
         print_tool_call(
-            youtube_search_tool,
-            tool_name='youtube_search_tool',
-            args={'query': query, number_of_results: number_of_results},
         )
-    response = youtube_search_api.run(f"{query},{number_of_results}")
     if CUSTOM_DEBUG:
-        print_tool_response(response)
-    return response
 def extract_video_id(url: str) -> str:
     parsed = urlparse(url)
     return parse_qs(parsed.query).get("v", [""])[0]
 @tool
 def load_youtube_transcript(url: str) -> str:
     """Load a YouTube transcript using youtube_transcript_api."""
@@ -165,43 +237,21 @@ def load_youtube_transcript(url: str) -> str:
         return error_str
-gemini = ChatGoogleGenerativeAI(model="gemini-1.5-flash")
 @tool
-def image_query_tool(image_path: str, question: str) -> str:
-    """
-    Uses Gemini Vision to answer a question about an image.
-    - image_path: file path to the image to analyze (.png)
-    - question: the query to ask about the image
-    """
-    try:
-        base64_img = encode_image_to_base64(image_path)
-    except OSError:
-        response = f"OSError: Invalid argument (invalid image path or file format): {image_path}. Please provide a valid PNG image."
-        print_tool_response(response)
-        return response
-    base64_img_str = f"data:image/png;base64,{base64_img}"
     if CUSTOM_DEBUG:
         print_tool_call(
-            image_query_tool,
-            tool_name='image_query_tool',
-            args={'base64_image': base64_img_str[:100], 'question': question},
         )
-    msg = HumanMessage(content=[
-        {"type": "text", "text": question},
-        {"type": "image_url", "image_url": base64_img_str},
-    ])
-    try:
-        response = gemini.invoke([msg])
-    except ChatGoogleGenerativeAIError:
-        response = "ChatGoogleGenerativeAIError: Invalid argument provided to Gemini: 400 Provided image is not valid"
-        print_tool_response(response)
-        return response
     if CUSTOM_DEBUG:
-        print_tool_response(response.content)
-    return response.content
 @tool
@@ -223,43 +273,109 @@ def search_and_extract_from_wikipedia(query: str) -> list:
 @tool
 def transcribe_audio(file_path: str) -> list:
-    """Transcribe audio from a file using Google Speech-to-Text."""
     if CUSTOM_DEBUG:
         print_tool_call(
             transcribe_audio,
             tool_name='transcribe_audio',
             args={'file_path': file_path},
         )
-    project_id = os.getenv("GOOGLE_CLOUD_PROJECT_ID")
-    loader = SpeechToTextLoader(
-        project_id=project_id,
-        file_path=file_path,
-        is_long = False,  # Set to True for long audio files
-    )
-    docs = loader.load()
-    docs_content = [doc.page_content for doc in docs]
     if CUSTOM_DEBUG:
         print_tool_response(docs_content)
     return docs_content
 read_tool = ReadFileTool()
 @tool
-def read_file_tool(file_path: str) -> str:
-    """Read the content of a file. Use this tool to read .py, .csv, .md, text files, PDFs, etc."""
     if CUSTOM_DEBUG:
         print_tool_call(
-            read_file_tool,
-            tool_name='read_file_tool',
             args={'file_path': file_path},
         )
-    response = read_tool.invoke({"file_path": file_path})
-    if not os.path.exists(file_path):
-        response = f"File not found: {file_path}"
-        print_tool_response(response)
-    print_tool_response(response)
-    return response

 import json
 import inspect
 import time
+import trafilatura
+from typing import Callable, Union
 from datetime import datetime, timezone
+from markitdown import MarkItDown
 from langchain.tools import tool
 from langchain_community.tools.tavily_search import TavilySearchResults
 from langchain_core.messages import HumanMessage
 from langchain_google_genai.chat_models import ChatGoogleGenerativeAIError
 from langchain_community.tools import YouTubeSearchTool
 from youtube_transcript_api import YouTubeTranscriptApi
 from langchain_community.tools.file_management.read import ReadFileTool
+from langchain.chains.summarize import load_summarize_chain
+from langchain.prompts import PromptTemplate
+from langchain_core.documents import Document
+from langchain_openai import ChatOpenAI
 from basic_agent import print_conversation
     return structured_results
 @tool
+def aggregate_information(results: list[str], query: str) -> str:
+    """
+    Processes a list of unstructured text chunks (e.g., search results) and produces a concise, query-specific summary.
+    Each input text is filtered and summarized individually in the context of the provided query. Irrelevant results are discarded.
+    Relevant content is aggregated and synthesized into a final, coherent answer that directly addresses the query.
+    """
     if CUSTOM_DEBUG:
         print_tool_call(
+            aggregate_information,
+            tool_name='aggregate_information',
+            args={'results': results, 'query': query},
         )
+    if not results:
+        response = "No search results provided."
+        if CUSTOM_DEBUG:
+            print_tool_response(response)
+        return response
+    # Convert to LangChain Document objects
+    docs = [Document(page_content=chunk) for chunk in results]
+    llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.2)
+    # Map Prompt — Summarize each document in light of the query
+    map_prompt = PromptTemplate.from_template(
+        "You are analyzing a search result in the context of the question: '{query}'.\n\n"
+        "Search result:\n{text}\n\n"
+        "Instructions:\n"
+        "- If the result contains information relevant to answering the query, summarize the relevant parts clearly.\n"
+        "- If the result is not helpful or unrelated, return 'IGNORE'.\n"
+        "- Do not include generic information or filler.\n"
+        "- Focus on extracting facts, key statements, or numbers that directly support the query.\n\n"
+        "Relevant Summary:"
+    )
+    # Combine Prompt — Aggregate the summaries to one final answer
+    combine_prompt = PromptTemplate.from_template(
+        "You are aggregating information to answer the following question: '{query}'.\n\n"
+        "Here are the summaries from filtered search results:\n{text}\n\n"
+        "Using the most relevant points, write a clear, concise, and complete answer to the original query.\n"
+        "If there's conflicting information, mention it briefly. Otherwise, focus on consensus.\n\n"
+        "Final Answer:"
+    )
+    chain = load_summarize_chain(
+        llm,
+        chain_type="map_reduce",
+        map_prompt=map_prompt.partial(query=query),
+        combine_prompt=combine_prompt.partial(query=query),
+    )
+    summary = chain.invoke({'input_documents': docs})
+    output_text = summary.get('output_text', str(summary))
+    output_text = json.dumps({'summary': output_text})
     if CUSTOM_DEBUG:
+        print_tool_response(output_text)
+    return output_text
 def extract_video_id(url: str) -> str:
     parsed = urlparse(url)
     return parse_qs(parsed.query).get("v", [""])[0]
+@tool
+def get_audio_from_youtube(urls: list[str], save_dir:str="./tmp/") -> list[str | PurePath | None]:
+    """Extracts audio from a YouTube video URL."""
+    if CUSTOM_DEBUG:
+        print_tool_call(
+            get_audio_from_youtube,
+            tool_name='get_audio_from_youtube',
+            args={'urls': urls, 'save_dir': save_dir},
+        )
+    loader = YoutubeAudioLoader(urls, save_dir)
+    audio_blobs = list(loader.yield_blobs())
+    paths = [str(blob.path) for blob in audio_blobs]
+    if CUSTOM_DEBUG:
+        print_tool_response(json.dumps({'paths': paths}))
+    return paths
 @tool
 def load_youtube_transcript(url: str) -> str:
     """Load a YouTube transcript using youtube_transcript_api."""
         return error_str
+youtube_search_api = YouTubeSearchTool()
 @tool
+def youtube_search_tool(query: str, number_of_results:int=3) -> list:
+    """Search YouTube for a query and return the top number_of_results."""
     if CUSTOM_DEBUG:
         print_tool_call(
+            youtube_search_tool,
+            tool_name='youtube_search_tool',
+            args={'query': query, number_of_results: number_of_results},
         )
+    response = youtube_search_api.run(f"{query},{number_of_results}")
     if CUSTOM_DEBUG:
+        print_tool_response(response)
+    return response
 @tool
 @tool
 def transcribe_audio(file_path: str) -> list:
+    """Transcribe audio from an audio file in file_path using Google Speech-to-Text."""
+    docs, docs_content = [], []
     if CUSTOM_DEBUG:
         print_tool_call(
             transcribe_audio,
             tool_name='transcribe_audio',
             args={'file_path': file_path},
         )
+    try:
+        loader = SpeechToTextLoader(
+            project_id=os.getenv("GOOGLE_CLOUD_PROJECT_ID"),
+            file_path=file_path,
+            is_long = False,  # Set to True for long audio files
+        )
+        docs = loader.load()
+    except Exception as e:
+        print(f"Error loading audio file: {e}")
+        try:
+            loader = SpeechToTextLoader(
+                project_id=os.getenv("GOOGLE_CLOUD_PROJECT_ID"),
+                file_path=file_path,
+                is_long=True,  # Set to True for long audio files
+            )
+            docs = loader.load()
+        except Exception as e:
+            docs_content = [f"Error loading audio file: {e}"]
+    docs_content = [doc.page_content for doc in docs] if docs else docs_content
     if CUSTOM_DEBUG:
         print_tool_response(docs_content)
     return docs_content
+@tool
+def extract_clean_text_from_url(url: str) -> str:
+    """Extract the main readable content from a webpage using trafilatura."""
+    if CUSTOM_DEBUG:
+        print_tool_call(
+            extract_clean_text_from_url,
+            tool_name='extract_clean_text_from_url',
+            args={'url': url},
+        )
+    downloaded = trafilatura.fetch_url(url)
+    response = ""
+    if not downloaded:
+        response = "Failed to download the page. Please check the URL."
+    if not "Failed" in response:
+        response = trafilatura.extract(downloaded)
+    response = response or "No meaningful content found."
+    if CUSTOM_DEBUG:
+        print_tool_response(response)
+    return response
 read_tool = ReadFileTool()
 @tool
+def smart_read_file(file_path: str) -> str:
+    """
+    Smart tool to read a file based on its type.
+    - Use `read_file_tool` for simple text, CSV, code files.
+    - Use MarkItDown for PDFs, Word, Excel, HTML, and other complex formats.
+    """
     if CUSTOM_DEBUG:
         print_tool_call(
+            smart_read_file,
+            tool_name='smart_read_file',
             args={'file_path': file_path},
         )
+    _, ext = os.path.splitext(file_path.lower())
+    if ext in [".mp3", ".wav", ".m4a", ".flac"]:
+        # If the file is an audio file, transcribe it
+        return transcribe_audio.invoke({"file_path": file_path})
+    if ext in [".png", ".jpg", ".jpeg", ".gif", ".bmp"]:
+        # If the file is an image, use image_query_tool to analyze it
+        q = "What can you tell me about this image?"
+        return image_query_tool.invoke({"image_path": file_path, "question": q})
+    if any(ext in url_pattern for url_pattern in ["http://", "https://", "www."]):
+        if "youtube.com/watch?v=" in file_path:
+            transcript = load_youtube_transcript.invoke({"url": file_path})
+            if "Error loading" in transcript:
+                return get_audio_from_youtube.invoke({'urls': [file_path], 'save_dir': './tmp/'})
+        else:
+            return extract_clean_text_from_url.invoke(file_path)
+    md = MarkItDown()
+    try:
+        result = md.convert(file_path)
+        result = result.text_content
+    except Exception as e:
+        # print("Error reading file with MarkItDown:", e)
+        result = read_tool.invoke({"file_path": file_path})
+    if CUSTOM_DEBUG:
+        print_tool_response(result)
+    return result