Spaces:

CTPC
/

AgentFinalAssignment

Sleeping

App Files Files Community

CTPC commited on Jun 23, 2025

Commit

153f125

verified ·

1 Parent(s): d15130d

Create tools.py

Browse files

Files changed (1) hide show

tools.py +220 -0

tools.py ADDED Viewed

	@@ -0,0 +1,220 @@

+import pandas as pd
+from langchain_community.tools import DuckDuckGoSearchRun, TavilySearchResults
+from langchain_core.tools import tool
+from langchain.schema import HumanMessage, AIMessage, SystemMessage
+from langchain_openai import AzureChatOpenAI
+from azure.identity import EnvironmentCredential
+from langchain_google_genai import ChatGoogleGenerativeAI
+import base64
+#LLMs
+def get_access_token():
+    credential = EnvironmentCredential()
+    access_token = credential.get_token("https://cognitiveservices.azure.com/.default")
+    return access_token.token
+llm = AzureChatOpenAI(
+    model_name="gpt-4o",
+    api_key=get_access_token(),
+    azure_endpoint="https://cog-sandbox-dev-eastus2-001.openai.azure.com/",
+    api_version="2024-08-01-preview"
+)
+google_llm = ChatGoogleGenerativeAI(model='gemini-2.0-flash-lite')
+#IMAGE_TOOLS
+@tool
+def extract_text(img_path: str) -> str:
+    """
+    Extract text from an image file using a multimodal model.
+    Args:
+        img_path: A local image file path (strings).
+    Returns:
+        A single string containing the concatenated text extracted from each image.
+    """
+    all_text = ""
+    try:
+        # Read image and encode as base64
+        with open(img_path, "rb") as image_file:
+            image_bytes = image_file.read()
+        image_base64 = base64.b64encode(image_bytes).decode("utf-8")
+        # Prepare the prompt including the base64 image data
+        message = [
+            HumanMessage(
+                content=[
+                    {
+                        "type": "text",
+                        "text": (
+                            "Extract all the text from this image. "
+                            "Return only the extracted text, no explanations."
+                        ),
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": f"data:image/png;base64,{image_base64}"
+                        },
+                    },
+                ]
+            )
+        ]
+        # Call the vision-capable model
+        response = llm.invoke(message)
+        # Append extracted text
+        all_text += response.content + "\n\n"
+        return all_text.strip()
+    except Exception as e:
+        # You can choose whether to raise or just return an empty string / error message
+        error_msg = f"Error extracting text: {str(e)}"
+        print(error_msg)
+        return ""
+@tool
+def describe_image(img_path: str) -> str:
+    """
+    Takes an image file path or URL and returns a detailed description of the image.
+    Args:
+        image_path_or_url (str): Local file path or URL to the image.
+    Returns:
+        str: A detailed description of the image content.
+    """
+    all_text = ""
+    try:
+        # Read image and encode as base64
+        with open(img_path, "rb") as image_file:
+            image_bytes = image_file.read()
+        image_base64 = base64.b64encode(image_bytes).decode("utf-8")
+        # Prepare the prompt including the base64 image data
+        message = [
+            HumanMessage(
+                content=[
+                    {
+                        "type": "text",
+                        "text": (
+                            "Provide a detailed description from this image. "
+                            "Return descriptive text only."
+                        ),
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": f"data:image/png;base64,{image_base64}"
+                        },
+                    },
+                ]
+            )
+        ]
+        # Call the vision-capable model
+        response = llm.invoke(message)
+        # Append extracted text
+        all_text += response.content + "\n\n"
+        return all_text.strip()
+    except Exception as e:
+        # You can choose whether to raise or just return an empty string / error message
+        error_msg = f"Error extracting text: {str(e)}"
+        print(error_msg)
+        return ""
+#AUDIO_TOOLS
+@tool
+def transcribe_audio(audio_path: str) -> str:
+    """
+    Transcribe audio from a file using a multimodal model.
+    Args:
+        audio_path: A local audio file path (strings).
+    Returns:
+        A single string containing the transcribed text.
+    """
+    all_text = ""
+    try:
+        # Read audio and encode as base64
+        with open(audio_path, "rb") as audio_file:
+            audio_bytes = audio_file.read()
+        audio_base64 = base64.b64encode(audio_bytes).decode()
+        # Prepare the prompt including the base64 image data
+        message = [
+            HumanMessage(
+                content=[
+                    {
+                        "type": "text",
+                        "text": (
+                            "Transcribe the following audio input:"
+                        ),
+                    },
+                    {
+                        "type": "input_audio",
+                        "input_audio": {
+                            "data": audio_base64,
+                            "format": "wav"
+                        },
+                    },
+                ]
+            )
+        ]
+        # Call the vision-capable model
+        response = google_llm.invoke(message)
+        # Append extracted text
+        all_text += response.content + "\n\n"
+        return all_text.strip()
+    except Exception as e:
+        # You can choose whether to raise or just return an empty string / error message
+        error_msg = f"Error transcribing audio: {str(e)}"
+        print(error_msg)
+        return ""
+#WEB_SEARCH_TOOL
+@tool
+def web_search(query: str) -> str:
+    """Perform a web search and return the top 5 results."""
+    #search_tool = DuckDuckGoSearchRun()
+    search_tool = TavilySearchResults(searxch_depth='basic')
+    result = search_tool.invoke(query)
+    return result
+#FILE_PARSE_TOOL
+@tool
+def read_file(file_path: str) -> str:
+    """
+    Reads a text based file and returns its content as a string.
+    Args:
+        file_path (str): The path to the file.
+    Returns:
+        str: The content of the file.
+    """
+    if file_path.endswith('.txt'):
+        with open(file_path, 'r') as file:
+            return file.read()
+    elif file_path.endswith('.csv'):
+        return pd.read_csv(file_path).to_string()
+    elif file_path.endswith('.xlsx'):
+        return pd.read_excel(file_path).to_string()
+    elif file_path.endswith('.py'):
+        with open(file_path, 'r') as file:
+            return file.read()
+    else:
+        raise ValueError("Unsupported file format. Only .txt, .csv, and .xlsx are supported.")