Spaces:

CTPC
/

AgentFinalAssignment

Sleeping

File size: 6,256 Bytes

import pandas as pd
from langchain_community.tools import DuckDuckGoSearchRun, TavilySearchResults
from langchain_core.tools import tool
from langchain.schema import HumanMessage, AIMessage, SystemMessage
from langchain_google_genai import ChatGoogleGenerativeAI
import base64

#LLMs
google_llm = ChatGoogleGenerativeAI(model='gemini-2.0-flash-lite')

#IMAGE_TOOLS
@tool
def extract_text(img_path: str) -> str:
    """
    Extract text from an image file using a multimodal model.

    Args:
        img_path: A local image file path (strings).

    Returns:
        A single string containing the concatenated text extracted from each image.
    """
    all_text = ""
    try:
       
        # Read image and encode as base64
        with open(img_path, "rb") as image_file:
            image_bytes = image_file.read()

        image_base64 = base64.b64encode(image_bytes).decode("utf-8")

        # Prepare the prompt including the base64 image data
        message = [
            HumanMessage(
                content=[
                    {
                        "type": "text",
                        "text": (
                            "Extract all the text from this image. "
                            "Return only the extracted text, no explanations."
                        ),
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/png;base64,{image_base64}"
                        },
                    },
                ]
            )
        ]

        # Call the vision-capable model
        response = google_llm.invoke(message)

        # Append extracted text
        all_text += response.content + "\n\n"

        return all_text.strip()
    except Exception as e:
        # You can choose whether to raise or just return an empty string / error message
        error_msg = f"Error extracting text: {str(e)}"
        print(error_msg)
        return ""

@tool
def describe_image(img_path: str) -> str:
    """
    Takes an image file path or URL and returns a detailed description of the image.

    Args:
        image_path_or_url (str): Local file path or URL to the image.

    Returns:
        str: A detailed description of the image content.
    """
    all_text = ""
    try:
       
        # Read image and encode as base64
        with open(img_path, "rb") as image_file:
            image_bytes = image_file.read()

        image_base64 = base64.b64encode(image_bytes).decode("utf-8")

        # Prepare the prompt including the base64 image data
        message = [
            HumanMessage(
                content=[
                    {
                        "type": "text",
                        "text": (
                            "Provide a detailed description from this image. "
                            "Return descriptive text only."
                        ),
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/png;base64,{image_base64}"
                        },
                    },
                ]
            )
        ]

        # Call the vision-capable model
        response = google_llm.invoke(message)

        # Append extracted text
        all_text += response.content + "\n\n"

        return all_text.strip()
    except Exception as e:
        # You can choose whether to raise or just return an empty string / error message
        error_msg = f"Error extracting text: {str(e)}"
        print(error_msg)
        return ""

#AUDIO_TOOLS
@tool 
def transcribe_audio(audio_path: str) -> str:
    """
    Transcribe audio from a file using a multimodal model.

    Args:
        audio_path: A local audio file path (strings).

    Returns:
        A single string containing the transcribed text.
    """
    all_text = ""
    try:
        # Read audio and encode as base64
        with open(audio_path, "rb") as audio_file:
            audio_bytes = audio_file.read()

        audio_base64 = base64.b64encode(audio_bytes).decode()

        # Prepare the prompt including the base64 image data
        message = [
            HumanMessage(
                content=[
                    {
                        "type": "text",
                        "text": (
                            "Transcribe the following audio input:"
                        ),
                    },
                    {
                        "type": "input_audio",
                        "input_audio": {
                            "data": audio_base64,
                            "format": "wav"
                        },
                    },
                ]
            )
        ]

        # Call the vision-capable model
        response = google_llm.invoke(message)

        # Append extracted text
        all_text += response.content + "\n\n"
        return all_text.strip()
    
    except Exception as e:
        # You can choose whether to raise or just return an empty string / error message
        error_msg = f"Error transcribing audio: {str(e)}"
        print(error_msg)
        return ""

#WEB_SEARCH_TOOL
@tool
def web_search(query: str) -> str:
    """Perform a web search and return the top 5 results."""
    #search_tool = DuckDuckGoSearchRun()
    search_tool = TavilySearchResults(searxch_depth='basic')
    result = search_tool.invoke(query)
    return result

#FILE_PARSE_TOOL
@tool
def read_file(file_path: str) -> str:
    """
    Reads a text based file and returns its content as a string.

    Args:
        file_path (str): The path to the file.

    Returns:
        str: The content of the file.
    """
    if file_path.endswith('.txt'):
        with open(file_path, 'r') as file:
            return file.read()
    elif file_path.endswith('.csv'):
        return pd.read_csv(file_path).to_string()
    elif file_path.endswith('.xlsx'):
        return pd.read_excel(file_path).to_string()
    elif file_path.endswith('.py'):
        with open(file_path, 'r') as file:
            return file.read()
    else:
        raise ValueError("Unsupported file format. Only .txt, .csv, and .xlsx are supported.")