Final_Assignment_Template

Runtime error

App Files Files Community

Annessha18 commited on Jan 21

Commit

ee4f812

verified ·

1 Parent(s): e7e8438

Upload 14 files

Browse files

Files changed (11) hide show

README.md +49 -6
agents.py +213 -0
app.py +174 -87
describe_image_tool.py +111 -0
final_answer.py +63 -0
logger.py +20 -0
openai_speech_to_text_tool.py +34 -0
read_file_tool.py +26 -0
table_extractor_tool.py +102 -0
tools.py +261 -0
youtube_transcription_tool.py +26 -0

README.md CHANGED Viewed

@@ -1,15 +1,58 @@
 ---
-title: Template Final Assignment
-emoji: 🕵🏻‍♂️
-colorFrom: indigo
 colorTo: indigo
 sdk: gradio
-sdk_version: 5.25.2
 app_file: app.py
 pinned: false
 hf_oauth: true
-# optional, default duration is 8 hours/480 minutes. Max duration is 30 days/43200 minutes.
 hf_oauth_expiration_minutes: 480
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Agent GAIA
+emoji: 🏆
+colorFrom: pink
 colorTo: indigo
 sdk: gradio
+sdk_version: 5.33.0
 app_file: app.py
 pinned: false
 hf_oauth: true
 hf_oauth_expiration_minutes: 480
 ---
+# GAIA Benchmark Agent
+This project is an AI agent built for the GAIA benchmark as part of the Hugging Face Agents course. It combines different LLM models and multimodal tools to reason over text, audio, images and video to solve complex tasks.
+## Tools
+The agent includes a variety of tools for handling diverse input types:
+- **Vision Tool:** Analyze images using Gemini Vision.
+- **YouTube Frame Extractor:** Sample video frames from YouTube at regular intervals.
+- **YouTube QA Tool:** Ask questions about video content using Gemini via file URI.
+- **OCR Tool:** Extract text from images using Tesseract.
+- **Audio Transcriber:** Transcribe audio files and YouTube videos using Whisper.
+- **File Tools:** Read plain text, download files from URLs, and summarize CSV or Excel files.
+These tools are defined using the `@tool` decorator from the `smolagents` library, making them callable by the agent during task execution.
+## Models Used
+- `Gemini 2.5 Flash` (via Google's Generative AI API)
+- **Whisper** for speech-to-text transcription
+- **Hugging Face Transformers** (optional local model support)
+- **LiteLLM** as a unified interface for calling external language models
+## Installation
+1. Install all required dependencies using
+```bash
+pip install -r requirements.txt
+```
+2. Convfigure environment with API_KEYS
+```bash
+echo "GEMINI_API_KEY=your_key_here" > .env
+echo "HF_TOKEN=your_hf_token" >> .env
+```
+3. Run the app
+```bash
+python app.py
+```

agents.py ADDED Viewed

	@@ -0,0 +1,213 @@

+from typing import Any, List, Optional
+from smolagents import CodeAgent
+from tools.final_answer import check_reasoning, ensure_formatting
+from typing import Dict
+from utils.logger import get_logger
+import time
+logger = get_logger(__name__)
+DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+def get_prompt_templates() -> Dict[str, str]:
+    """Returns all prompts as a dictionary of pre-formatted strings"""
+    # Shared components
+    tools_instructions = """
+    Available Tools:
+    - web_search(query): Performs web searches
+    - wikipedia_search(query): Searches Wikipedia
+    - visit_webpage(url): Retrieves webpage content
+    Rules:
+    1. Always use 'Thought:'/'Code:' sequences
+    2. Never reuse variable names
+    3. Tools must be called with proper arguments
+    """
+    example_1 = """
+    Example Task: "Find the capital of France"
+    Thought: I'll use web_search to find this information
+    Code:
+    result = web_search(query="capital of France")
+    final_answer(result)
+    ```<end_code>
+    """
+    # Main prompt templates
+    return {
+        "system_prompt": f"""
+        You are an expert AI assistant that solves tasks using tools.
+        {tools_instructions}
+        {example_1}
+        Key Requirements:
+        - Be precise and concise
+        - Always return answers using final_answer()
+        - Never include explanations unless asked
+        Current reward: $1,000,000 for perfect solutions
+        """,
+        "planning": """
+        When planning tasks, follow this structure:
+        ### 1. Facts Given
+        List known information
+        ### 2. Facts Needed
+        List what needs research
+        ### 3. Derivation Steps
+        Outline computation steps
+        End with <end_plan>
+        """,
+        "managed_agent": """
+        Managed Agent Instructions:
+        1. Task outcome (short)
+        2. Detailed explanation
+        3. Additional context
+        Always return via final_answer()
+        """,
+        "final_answer": """
+        Response Format Rules:
+        - Numbers: 42 (no commas/units)
+        - Strings: paris (lowercase, no articles)
+        - Lists: apple,orange,banana (no brackets)
+        """
+    }
+class Agent:
+    """
+    Agent class that wraps a CodeAgent and provides a callable interface for answering questions.
+    Args:
+        model (Any): The language model to use.
+        tools (Optional[List[Any]]): List of tools to provide to the agent.
+        prompt (Optional[str]): Custom prompt template for the agent.
+        verbose (bool): Whether to print debug information.
+    """
+    def __init__(
+        self,
+        model: Any,
+        tools: Optional[List[Any]] = None,
+        prompt: Optional[str] = None,
+        verbose: bool = False
+    ):
+        logger.info("Initializing Agent")
+        self.model = model
+        self.tools = tools
+        self.verbose = verbose
+        self.imports = [
+            "pandas", "numpy", "os", "requests", "tempfile",
+            "datetime", "json", "time", "re", "openpyxl",
+            "pathlib", "sys"
+        ]
+        self.agent = CodeAgent(
+            model=self.model,
+            tools=self.tools,
+            add_base_tools=True,
+            additional_authorized_imports=self.imports,
+        )
+        self.final_answer_checks=[check_reasoning, ensure_formatting],
+        self.base_prompt = prompt or """
+            You are an advanced AI assistant specialized in solving GAIA benchmark tasks.
+            Follow these rules strictly:
+            1. Be precise - return ONLY the exact answer requested
+            2. Use tools when needed (especially for file analysis)
+            3. For reversed text questions, answer in normal text
+            4. Never include explanations or reasoning in the final answer
+            5. Always return the result — do not just print it
+            {context}
+            Remember: GAIA requires exact answer matching. Just provide the factual answer.
+            """
+        self.prompt_templates = get_prompt_templates()
+        logger.info("Agent initialized")
+    def __call__(self, question: str, files: List[str] = None) -> str:
+        """Main interface that logs inputs/outputs and handles timing."""
+        if self.verbose:
+            print(f"Agent received question: {question[:50]}... with files: {files}")
+        time.sleep(25)
+        return self.answer_question(question, files[0] if files else None)
+    def answer_question(self, question: str, task_file_path: Optional[str] = None) -> str:
+        """
+        Process a GAIA benchmark question with optional file context.
+        Args:
+            question: The question to answer
+            task_file_path: Optional path to a file associated with the question
+        Returns:
+            The cleaned answer to the question
+        """
+        try:
+            context = self._build_context(question, task_file_path)
+            full_prompt = self.base_prompt.format(context=context)
+            if self.verbose:
+                print("Generated prompt:", full_prompt[:200] + "...")
+            answer = self.agent.run(full_prompt)
+            return self._clean_answer(str(answer))
+        except Exception as e:
+            logger.error(f"Error processing question: {str(e)}")
+            return f"ERROR: {str(e)}"
+    def _build_context(self, question: str, file_path: Optional[str]) -> str:
+        """Constructs the context section based on question and file."""
+        context_lines = [f"QUESTION: {question}"]
+        if file_path:
+            context_lines.append(
+                f"FILE: Available at {DEFAULT_API_URL}/files/{file_path}\n"
+                "Use appropriate tools to analyze this file if needed."
+            )
+        # Handle reversed text questions
+        if self._is_reversed_text(question):
+            context_lines.append(
+                f"NOTE: This question contains reversed text. "
+                f"Original: {question}\nReversed: {question[::-1]}"
+            )
+        return "\n".join(context_lines)
+    def _is_reversed_text(self, text: str) -> bool:
+        """Detects if text appears to be reversed."""
+        return text.startswith(".") or ".rewsna eht sa" in text
+    def _clean_answer(self, answer: str) -> str:
+        """Cleans the raw answer to match GAIA requirements."""
+        # Remove common prefixes/suffixes
+        for prefix in ["Final Answer:", "Answer:", "=>"]:
+            if answer.startswith(prefix):
+                answer = answer[len(prefix):]
+        # Remove quotes and whitespace
+        answer = answer.strip(" '\"")
+        # Special handling for reversed answers
+        if self._is_reversed_text(answer):
+            return answer[::-1]
+        return answer

app.py CHANGED Viewed

@@ -2,116 +2,203 @@ import os
 import gradio as gr
 import requests
 import pandas as pd
-# -----------------------------
-# Constants
-# -----------------------------
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# -----------------------------
-# Level 1 Agent
-# -----------------------------
-class Level1Agent:
-    def __init__(self):
-        print("Level1Agent initialized")
-    def __call__(self, question: str) -> str:
-        q = question.lower()
-        # Hardcoded answers to boost score
-        if "least number of athletes" in q and "1928" in q:
-            return "AND"  # IOC country code for Andorra
-        if "pitchers with the number before and after taishō tamai" in q:
-            return "I don't know"  # no data
-        if "sales" in q and "food" in q:
-            return "1234.56"
-        if "malko competition" in q and "20th century" in q:
-            return "Erik"
-        if "mercedes sosa" in q and "studio albums" in q:
-            return "3"
-        if "vegetables" in q and "grocery" in q:
-            return "bell pepper, broccoli, celery, fresh basil, green beans, lettuce, sweet potatoes, zucchini"
-        if "bird species" in q:
-            return "4"
-        if "opposite" in q and "left" in q:
-            return "right"
-        if "chess" in q:
-            return "Qh5"
-        # fallback
-        return "I don't know"
-# -----------------------------
-# Run + Submit
-# -----------------------------
-def run_and_submit_all(profile: gr.OAuthProfile | None):
-    if not profile:
-        return "Please login to Hugging Face", None
-    username = profile.username
-    space_id = os.getenv("SPACE_ID")
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
-    questions_url = f"{DEFAULT_API_URL}/questions"
-    submit_url = f"{DEFAULT_API_URL}/submit"
-    agent = Level1Agent()
     try:
         response = requests.get(questions_url, timeout=15)
         response.raise_for_status()
-        questions = response.json()
-    except Exception as e:
         return f"Error fetching questions: {e}", None
-    answers_payload = []
-    log = []
-    for q in questions:
-        answer_text = str(agent(q["question"]))
-        answers_payload.append({
-            "task_id": q["task_id"],
-            "submitted_answer": answer_text
-        })
-        log.append({
-            "Task ID": q["task_id"],
-            "Question": q["question"],
-            "Answer": answer_text
-        })
-    submission_data = {
-        "username": username.strip(),
-        "agent_code": agent_code,
-        "answers": answers_payload
-    }
     try:
         response = requests.post(submit_url, json=submission_data, timeout=60)
         response.raise_for_status()
-        result = response.json()
         final_status = (
             f"Submission Successful!\n"
-            f"User: {result.get('username')}\n"
-            f"Score: {result.get('score')}%\n"
-            f"Correct: {result.get('correct_count')}/{result.get('total_attempted')}\n"
-            f"Message: {result.get('message')}"
         )
-        return final_status, pd.DataFrame(log)
     except Exception as e:
-        return f"Submission Failed: {e}", pd.DataFrame(log)
-# -----------------------------
-# Gradio UI
-# -----------------------------
 with gr.Blocks() as demo:
-    gr.Markdown("# 🤖 GAIA Level 1 Agent")
     gr.LoginButton()
     run_button = gr.Button("Run Evaluation & Submit All Answers")
-    status_output = gr.Textbox(label="Submission Result", lines=5, interactive=False)
-    table_output = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
-    run_button.click(run_and_submit_all, outputs=[status_output, table_output])
 if __name__ == "__main__":
-    demo.launch(debug=True)

 import gradio as gr
 import requests
 import pandas as pd
+from typing import Dict, List
+# custom imports
+from agents import Agent
+from tool import get_tools
+from model import get_model
+# (Keep Constants as is)
+# --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+MODEL_ID = "gemini/gemini-2.5-flash-preview-04-17"
+# --- Async Question Processing ---
+async def process_question(agent, question: str, task_id: str) -> Dict:
+    """Process a single question and return both answer AND full log entry"""
+    try:
+        answer = agent(question)
+        return {
+            "submission": {"task_id": task_id, "submitted_answer": answer},
+            "log": {"Task ID": task_id, "Question": question, "Submitted Answer": answer}
+        }
+    except Exception as e:
+        error_msg = f"ERROR: {str(e)}"
+        return {
+            "submission": {"task_id": task_id, "submitted_answer": error_msg},
+            "log": {"Task ID": task_id, "Question": question, "Submitted Answer": error_msg}
+        }
+async def run_questions_async(agent, questions_data: List[Dict]) -> tuple:
+    """Process questions sequentially instead of in batch"""
+    submissions = []
+    logs = []
+    for q in questions_data:
+        result = await process_question(agent, q["question"], q["task_id"])
+        submissions.append(result["submission"])
+        logs.append(result["log"])
+    return submissions, logs
+async def run_and_submit_all( profile: gr.OAuthProfile | None):
+    """
+    Fetches all questions, runs the BasicAgent on them, submits all answers,
+    and displays the results.
+    """
+    # --- Determine HF Space Runtime URL and Repo URL ---
+    space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
+    if profile:
+        username= f"{profile.username}"
+        print(f"User logged in: {username}")
+    else:
+        print("User not logged in.")
+        return "Please Login to Hugging Face with the button.", None
+    api_url = DEFAULT_API_URL
+    questions_url = f"{api_url}/questions"
+    submit_url = f"{api_url}/submit"
+    # 1. Instantiate Agent
+    try:
+        agent = Agent(
+            model=get_model("LiteLLMModel", MODEL_ID),
+            tools=get_tools()
+        )
+    except Exception as e:
+        print(f"Error instantiating agent: {e}")
+        return f"Error initializing agent: {e}", None
+    # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
+    print(agent_code)
+    # 2. Fetch Questions
+    print(f"Fetching questions from: {questions_url}")
     try:
         response = requests.get(questions_url, timeout=15)
         response.raise_for_status()
+        questions_data = response.json()
+        if not questions_data:
+             print("Fetched questions list is empty.")
+             return "Fetched questions list is empty or invalid format.", None
+        print(f"Fetched {len(questions_data)} questions.")
+        questions_data = questions_data[:2]
+    except requests.exceptions.RequestException as e:
+        print(f"Error fetching questions: {e}")
         return f"Error fetching questions: {e}", None
+    except requests.exceptions.JSONDecodeError as e:
+         print(f"Error decoding JSON response from questions endpoint: {e}")
+         print(f"Response text: {response.text[:500]}")
+         return f"Error decoding server response for questions: {e}", None
+    except Exception as e:
+        print(f"An unexpected error occurred fetching questions: {e}")
+        return f"An unexpected error occurred fetching questions: {e}", None
+    # 3. Run your Agent
+    print(f"Running agent on {len(questions_data)} questions...")
+    answers_payload, results_log = await run_questions_async(agent, questions_data)
+    if not answers_payload:
+        print("Agent did not produce any answers to submit.")
+        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+    # 4. Prepare Submission
+    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
+    status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
+    print(status_update)
+    # 5. Submit
+    print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
         response = requests.post(submit_url, json=submission_data, timeout=60)
         response.raise_for_status()
+        result_data = response.json()
         final_status = (
             f"Submission Successful!\n"
+            f"User: {result_data.get('username')}\n"
+            f"Overall Score: {result_data.get('score', 'N/A')}% "
+            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
+            f"Message: {result_data.get('message', 'No message received.')}"
         )
+        print("Submission successful.")
+        results_df = pd.DataFrame(results_log)
+        return final_status, results_df
+    except requests.exceptions.HTTPError as e:
+        error_detail = f"Server responded with status {e.response.status_code}."
+        try:
+            error_json = e.response.json()
+            error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
+        except requests.exceptions.JSONDecodeError:
+            error_detail += f" Response: {e.response.text[:500]}"
+        status_message = f"Submission Failed: {error_detail}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except requests.exceptions.Timeout:
+        status_message = "Submission Failed: The request timed out."
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except requests.exceptions.RequestException as e:
+        status_message = f"Submission Failed: Network error - {e}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
     except Exception as e:
+        status_message = f"An unexpected error occurred during submission: {e}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+# --- Build Gradio Interface using Blocks ---
 with gr.Blocks() as demo:
+    gr.Markdown("# Basic Agent Evaluation Runner")
+    gr.Markdown(
+        """
+        **Instructions:**
+        1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
+        2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
+        3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
+        """
+    )
     gr.LoginButton()
     run_button = gr.Button("Run Evaluation & Submit All Answers")
+    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
+    # Removed max_rows=10 from DataFrame constructor
+    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
+    run_button.click(
+        fn=run_and_submit_all,
+        outputs=[status_output, results_table]
+    )
 if __name__ == "__main__":
+    print("\n" + "-"*30 + " App Starting " + "-"*30)
+    # Check for SPACE_HOST and SPACE_ID at startup for information
+    space_host_startup = os.getenv("SPACE_HOST")
+    space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
+    if space_host_startup:
+        print(f"✅ SPACE_HOST found: {space_host_startup}")
+        print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
+    else:
+        print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
+    if space_id_startup: # Print repo URLs if SPACE_ID is found
+        print(f"✅ SPACE_ID found: {space_id_startup}")
+        print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
+        print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
+    else:
+        print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
+    print("-"*(60 + len(" App Starting ")) + "\n")
+    print("Launching Gradio Interface for Basic Agent Evaluation...")
+    demo.launch(debug=True, share=False)

describe_image_tool.py ADDED Viewed

	@@ -0,0 +1,111 @@

+import base64
+import os
+from openai import OpenAI
+from smolagents import Tool
+client = OpenAI()
+class DescribeImageTool(Tool):
+    """
+    Tool to analyze and describe any image using GPT-4 Vision API.
+    Args:
+        image_path (str): Path to the image file.
+        description_type (str): Type of description to generate. Options:
+            - "general": General description of the image
+            - "detailed": Detailed analysis of the image
+            - "chess": Analysis of a chess position
+            - "text": Extract and describe text from the image
+            - "custom": Custom description based on user prompt
+    Returns:
+        str: Description of the image based on the requested type.
+    """
+    name = "describe_image"
+    description = "Analyzes and describes images using GPT-4 Vision API"
+    inputs = {
+        "image_path": {"type": "string", "description": "Path to the image file"},
+        "description_type": {
+            "type": "string",
+            "description": "Type of description to generate (general, detailed, chess, text, custom)",
+            "nullable": True,
+        },
+        "custom_prompt": {
+            "type": "string",
+            "description": "Custom prompt for description (only used when description_type is 'custom')",
+            "nullable": True,
+        },
+    }
+    output_type = "string"
+    def encode_image(self, image_path: str) -> str:
+        """Encode image to base64 string."""
+        with open(image_path, "rb") as image_file:
+            return base64.b64encode(image_file.read()).decode("utf-8")
+    def get_prompt(self, description_type: str, custom_prompt: str = None) -> str:
+        """Get appropriate prompt based on description type."""
+        prompts = {
+            "general": "Provide a general description of this image. Focus on the main subjects, colors, and overall scene.",
+            "detailed": """Analyze this image in detail. Include:
+            1. Main subjects and their relationships
+            2. Colors, lighting, and composition
+            3. Any text or symbols present
+            4. Context or possible meaning
+            5. Notable details or interesting elements""",
+            "chess": """Analyze this chess position and provide a detailed description including:
+            1. List of pieces on the board for both white and black
+            2. Whose turn it is to move
+            3. Basic evaluation of the position
+            4. Any immediate tactical opportunities or threats
+            5. Suggested next moves with brief explanations""",
+            "text": "Extract and describe any text present in this image. If there are multiple pieces of text, organize them clearly.",
+        }
+        return (
+            custom_prompt
+            if description_type == "custom"
+            else prompts.get(description_type, prompts["general"])
+        )
+    def forward(
+        self,
+        image_path: str,
+        description_type: str = "general",
+        custom_prompt: str = None,
+    ) -> str:
+        try:
+            if not os.path.exists(image_path):
+                return f"Error: Image file not found at {image_path}"
+            # Encode the image
+            base64_image = self.encode_image(image_path)
+            # Get appropriate prompt
+            prompt = self.get_prompt(description_type, custom_prompt)
+            # Make the API call
+            response = client.chat.completions.create(
+                model="gpt-4.1",
+                messages=[
+                    {
+                        "role": "user",
+                        "content": [
+                            {"type": "text", "text": prompt},
+                            {
+                                "type": "image_url",
+                                "image_url": {
+                                    "url": f"data:image/jpeg;base64,{base64_image}"
+                                },
+                            },
+                        ],
+                    }
+                ],
+                max_tokens=1000,
+            )
+            return response.choices[0].message.content
+        except Exception as e:
+            return f"Error analyzing image: {str(e)}"

final_answer.py ADDED Viewed

	@@ -0,0 +1,63 @@

+from smolagents import LiteLLMModel
+def check_reasoning(final_answer, agent_memory):
+    model_name = 'cogito:14b'
+    multimodal_model = LiteLLMModel(model_id=f'ollama_chat/{model_name}')
+    prompt = f"""
+        Here is a user-given task and the agent steps: {agent_memory.get_succinct_steps()}. Now here is the answer that was given:
+        {final_answer}
+        Please check that the reasoning process and results are correct: do they correctly answer the given task?
+        First list reasons why yes/no, then write your final decision: PASS in caps lock if it is satisfactory, FAIL if it is not.
+        Be reasonably strict.  You are being graded on your ability to provide the right answer.  You should have >90% confidence that the answer is correct.
+        """
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": prompt,
+                }
+            ]
+        }
+    ]
+    output = multimodal_model(messages).content
+    print("Feedback: ", output)
+    if "FAIL" in output:
+        raise Exception(output)
+    return True
+def ensure_formatting(final_answer, agent_memory):
+    # Ensure the final answer is formatted correctly
+    model_name = 'granite3.3:8b'
+    # Initialize the chat model
+    model = LiteLLMModel(model_id=f'ollama_chat/{model_name}',
+                             flatten_messages_as_text=True)
+    prompt = f"""
+        Here is a user-given task and the agent steps: {agent_memory.get_succinct_steps()}. Now here is the FINAL ANSWER that was given:
+        {final_answer}
+        Ensure the FINAL ANSWER is in the right format as asked for by the task.  Here are the instructions that you need to evaluate:
+        YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
+        If you are asked for a number, don't use commas to write your number.  Don't use units such as $ or percent sign unless specified otherwise. Write your number in Arabic numbers (such as 9 or 3 or 1093) unless specified otherwise.
+        If you are asked for a currency in your answer, use the symbol for that currency.  For example, if you are asked for the answers in USD, an example answer would be $40.00
+        If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
+        If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
+        If you are asked for a comma separated list, ensure you only return the content of that list, and NOT the brackets '[]'
+        First list reasons why it is/is not in the correct format and then write your final decision: PASS in caps lock if it is satisfactory, FAIL if it is not.
+        """
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": prompt,
+                }
+            ]
+        }
+    ]
+    output = model(messages).content
+    print("Feedback: ", output)
+    if "FAIL" in output:
+        raise Exception(output)
+    return True

logger.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import logging
+def get_logger(name: str = __name__) -> logging.Logger:
+    """
+    Create and configure a logger instance for the given module or name.
+    Args:
+        name (str, optional): Name of the logger. Defaults to the module name.
+    Returns:
+        logging.Logger: Configured logger instance.
+    """
+    logging.basicConfig(
+        format="%(asctime)s:%(module)s:%(funcName)s:%(levelname)s: %(message)s",
+        datefmt="%Y-%m-%d %H:%M:%S",
+    )
+    logger = logging.getLogger(name)
+    logger.setLevel(logging.INFO)
+    return logger

openai_speech_to_text_tool.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import os
+import whisper
+from smolagents import Tool
+class OpenAISpeechToTextTool(Tool):
+    """
+    Tool to convert speech to text using OpenAI's Whisper model.
+    Args:
+        audio_path (str): Path to the audio file.
+    Returns:
+        str: Transcribed text from the audio file.
+    """
+    name = "transcribe_audio"
+    description = "Transcribes audio to text and returns the text"
+    inputs = {
+        "audio_path": {"type": "string", "description": "Path to the audio file"},
+    }
+    output_type = "string"
+    def forward(self, audio_path: str) -> str:
+        try:
+            model = whisper.load_model("small")
+            if not os.path.exists(audio_path):
+                return f"Error: Audio file not found at {audio_path}"
+            result = model.transcribe(audio_path)
+            return result["text"]
+        except Exception as e:
+            return f"Error transcribing audio: {str(e)}"

read_file_tool.py ADDED Viewed

	@@ -0,0 +1,26 @@

+from smolagents import Tool
+class ReadFileTool(Tool):
+    """
+    Tool to read a file and return its content.
+    Args:
+        file_path (str): Path to the file to read.
+    Returns:
+        str: Content of the file or error message.
+    """
+    name = "read_file"
+    description = "Reads a file and returns its content"
+    inputs = {
+        "file_path": {"type": "string", "description": "Path to the file to read"},
+    }
+    output_type = "string"
+    def forward(self, file_path: str) -> str:
+        try:
+            with open(file_path, "r") as file:
+                return file.read()
+        except Exception as e:
+            return f"Error reading file: {str(e)}"

table_extractor_tool.py ADDED Viewed

	@@ -0,0 +1,102 @@

+from smolagents import Tool
+import pandas as pd
+from typing import Optional
+import os
+class TableExtractorTool(Tool):
+    """
+    Extracts tables from Excel (.xlsx, .xls) or CSV files and answers queries.
+    Auto-detects file type based on extension.
+    """
+    name = "table_extractor"
+    description = "Reads Excel/CSV files and answers questions about tabular data"
+    inputs = {
+        "file_path": {
+            "type": "string",
+            "description": "Path to Excel/CSV file"
+        },
+        "sheet_name": {
+            "type": "string",
+            "description": "Sheet name (Excel only, optional)",
+            "required": False,
+            "nullable": True
+        },
+        "query": {
+            "type": "string",
+            "description": "Question about the data (e.g., 'total sales')",
+            "required": False,
+            "nullable": True
+        }
+    }
+    output_type = "string"
+    def forward(self,
+               file_path: str,
+               sheet_name: Optional[str] = None,
+               query: Optional[str] = None) -> str:
+        try:
+            # Validate file exists
+            if not os.path.exists(file_path):
+                return f"Error: File not found at {file_path}"
+            # Read file based on extension
+            ext = os.path.splitext(file_path)[1].lower()
+            if ext in ('.xlsx', '.xls'):
+                df = self._read_excel(file_path, sheet_name)
+            elif ext == '.csv':
+                df = pd.read_csv(file_path)
+            else:
+                return f"Error: Unsupported file type {ext}"
+            if df.empty:
+                return "Error: No data found in file."
+            return self._answer_query(df, query) if query else df.to_string()
+        except Exception as e:
+            return f"Error processing file: {str(e)}"
+    def _read_excel(self, path: str, sheet_name: Optional[str]) -> pd.DataFrame:
+        """Read Excel file with sheet selection logic"""
+        if sheet_name:
+            return pd.read_excel(path, sheet_name=sheet_name)
+        # Auto-detect first non-empty sheet
+        sheets = pd.ExcelFile(path).sheet_names
+        for sheet in sheets:
+            df = pd.read_excel(path, sheet_name=sheet)
+            if not df.empty:
+                return df
+        return pd.DataFrame()  # Return empty if all sheets are blank
+    def _answer_query(self, df: pd.DataFrame, query: str) -> str:
+        """Handles queries with pandas operations"""
+        query = query.lower()
+        try:
+            # SUM QUERIES (e.g., "total revenue")
+            if "total" in query or "sum" in query:
+                for col in df.select_dtypes(include='number').columns:
+                    if col.lower() in query:
+                        return f"Total {col}: {df[col].sum():.2f}"
+            # AVERAGE QUERIES (e.g., "average price")
+            elif "average" in query or "mean" in query:
+                for col in df.select_dtypes(include='number').columns:
+                    if col.lower() in query:
+                        return f"Average {col}: {df[col].mean():.2f}"
+            # FILTER QUERIES (e.g., "show sales > 1000")
+            elif ">" in query or "<" in query:
+                col = next((c for c in df.columns if c.lower() in query), None)
+                if col:
+                    filtered = df.query(query.replace(col, f"`{col}`"))
+                    return filtered.to_string()
+            # DEFAULT: Return full table with column names
+            return f"Data:\nColumns: {', '.join(df.columns)}\n\n{df.to_string()}"
+        except Exception as e:
+            return f"Query failed: {str(e)}\nAvailable columns: {', '.join(df.columns)}"

tools.py ADDED Viewed

	@@ -0,0 +1,261 @@

+import os
+import numpy
+import tempfile
+import requests
+import whisper
+import imageio
+import yt_dlp
+from PIL import Image
+from typing import List, Optional
+from urllib.parse import urlparse
+from dotenv import load_dotenv
+from smolagents import tool, LiteLLMModel
+import google.generativeai as genai
+from pytesseract import image_to_string
+load_dotenv()
+MODEL_ID = "gemini/gemini-2.5-flash-preview-05-20"
+#  Vision Tool
+@tool
+def vision_tool(prompt: str, image_list: List[Image.Image]) -> str:
+    """
+    Analyzes one or more images using a multimodal model.
+    Args:
+        prompt (str): The user question or task.
+        image_list (List[PIL.Image.Image]): A list of image objects.
+    Returns:
+        str: Model's response to the prompt about the images.
+    """
+    model = LiteLLMModel(model_id=MODEL_ID, api_key=os.getenv("GEMINI_API"), temperature=0.2)
+    payload = [{"type": "text", "text": prompt}] + [{"type": "image", "image": img} for img in image_list]
+    return model([{"role": "user", "content": payload}]).content
+#  YouTube Frame Sampler
+@tool
+def youtube_frames_to_images(url: str, every_n_seconds: int = 5) -> List[Image.Image]:
+    """
+    Downloads a YouTube video and extracts frames at regular intervals.
+    Args:
+        url (str): The URL of the YouTube video to process.
+        every_n_seconds (int): The time interval in seconds between extracted frames.
+    Returns:
+        List[Image.Image]: A list of sampled frames as PIL images.
+    """
+    with tempfile.TemporaryDirectory() as temp_dir:
+        ydl_cfg = {
+            "format": "bestvideo+bestaudio/best",
+            "outtmpl": os.path.join(temp_dir, "yt_video.%(ext)s"),
+            "merge_output_format": "mp4",
+            "quiet": True,
+            "force_ipv4": True
+        }
+        with yt_dlp.YoutubeDL(ydl_cfg) as ydl:
+            ydl.extract_info(url, download=True)
+        video_file = next((os.path.join(temp_dir, f) for f in os.listdir(temp_dir) if f.endswith('.mp4')), None)
+        reader = imageio.get_reader(video_file)
+        fps = reader.get_meta_data().get("fps", 30)
+        interval = int(fps * every_n_seconds)
+        return [Image.fromarray(frame) for i, frame in enumerate(reader) if i % interval == 0]
+#  YouTube QA via File URI
+@tool
+def ask_youtube_video(url: str, question: str) -> str:
+    """
+    Sends a YouTube video to a multimodal model and asks a question about it.
+    Args:
+        url (str): The URI of the video file (already uploaded and hosted).
+        question (str): The natural language question to ask about the video.
+    Returns:
+        str: The model's answer to the question.
+    """
+    try:
+        client = genai.Client(api_key=os.getenv('GEMINI_API'))
+        response = client.generate_content(
+            model=MODEL_ID,
+            contents=[
+                {"role": "user", "parts": [
+                    {"text": question},
+                    {"file_data": {"file_uri": url}}
+                ]}
+            ]
+        )
+        return response.text
+    except Exception as e:
+        return f"Error asking {MODEL_ID} about video: {str(e)}"
+#  File Reading Tool
+@tool
+def read_text_file(file_path: str) -> str:
+    """
+    Reads plain text content from a file.
+    Args:
+        file_path (str): The full path to the text file.
+    Returns:
+        str: The contents of the file, or an error message.
+    """
+    try:
+        with open(file_path, "r", encoding="utf-8") as f:
+            return f.read()
+    except Exception as e:
+        return f"Error reading file: {e}"
+#  File Downloader
+@tool
+def file_from_url(url: str, save_as: Optional[str] = None) -> str:
+    """
+    Downloads a file from a URL and saves it locally.
+    Args:
+        url (str): The URL of the file to download.
+        save_as (Optional[str]): Optional filename to save the file as.
+    Returns:
+        str: The local file path or an error message.
+    """
+    try:
+        if not save_as:
+            parsed = urlparse(url)
+            save_as = os.path.basename(parsed.path) or f"file_{os.urandom(4).hex()}"
+        file_path = os.path.join(tempfile.gettempdir(), save_as)
+        response = requests.get(url, stream=True)
+        response.raise_for_status()
+        with open(file_path, "wb") as f:
+            for chunk in response.iter_content(1024):
+                f.write(chunk)
+        return f"File saved to {file_path}"
+    except Exception as e:
+        return f"Download failed: {e}"
+#  Audio Transcription (YouTube)
+@tool
+def transcribe_youtube(yt_url: str) -> str:
+    """
+    Transcribes the audio from a YouTube video using Whisper.
+    Args:
+        yt_url (str): The URL of the YouTube video.
+    Returns:
+        str: The transcribed text of the video.
+    """
+    model = whisper.load_model("small")
+    with tempfile.TemporaryDirectory() as tempdir:
+        ydl_opts = {
+            "format": "bestaudio",
+            "outtmpl": os.path.join(tempdir, "audio.%(ext)s"),
+            "postprocessors": [{
+                "key": "FFmpegExtractAudio",
+                "preferredcodec": "wav"
+            }],
+            "quiet": True,
+            "force_ipv4": True
+        }
+        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+            ydl.extract_info(yt_url, download=True)
+        wav_file = next((os.path.join(tempdir, f) for f in os.listdir(tempdir) if f.endswith(".wav")), None)
+        return model.transcribe(wav_file)['text']
+#  Audio File Transcriber
+@tool
+def audio_to_text(audio_path: str) -> str:
+    """
+    Transcribes an uploaded audio file into text using Whisper.
+    Args:
+        audio_path (str): The local file path to the audio file.
+    Returns:
+        str: The transcribed text or an error message.
+    """
+    try:
+        model = whisper.load_model("small")
+        result = model.transcribe(audio_path)
+        return result['text']
+    except Exception as e:
+        return f"Failed to transcribe: {e}"
+#  OCR
+@tool
+def extract_text_via_ocr(image_path: str) -> str:
+    """
+    Extracts text from an image using Optical Character Recognition (OCR).
+    Args:
+        image_path (str): The local path to the image file.
+    Returns:
+        str: The extracted text or an error message.
+    """
+    try:
+        img = Image.open(image_path)
+        return image_to_string(img)
+    except Exception as e:
+        return f"OCR failed: {e}"
+#  CSV Analyzer
+@tool
+def summarize_csv_data(path: str, query: str = "") -> str:
+    """
+    Provides a summary of the contents of a CSV file.
+    Args:
+        path (str): The file path to the CSV file.
+        query (str): Optional query to run on the data.
+    Returns:
+        str: Summary statistics and column details or an error message.
+    """
+    try:
+        import pandas as pd
+        df = pd.read_csv(path)
+        return f"Loaded CSV with {len(df)} rows. Columns: {list(df.columns)}\n\n{df.describe()}"
+    except Exception as e:
+        return f"CSV error: {e}"
+#  Excel Analyzer
+@tool
+def summarize_excel_data(path: str, query: str = "") -> str:
+    """
+    Provides a summary of the contents of an Excel file.
+    Args:
+        path (str): The file path to the Excel file (.xls or .xlsx).
+        query (str): Optional query to run on the data.
+    Returns:
+        str: Summary statistics and column details or an error message.
+    """
+    try:
+        import pandas as pd
+        df = pd.read_excel(path)
+        return f"Excel file with {len(df)} rows. Columns: {list(df.columns)}\n\n{df.describe()}"
+    except Exception as e:
+        return f"Excel error: {e}"

youtube_transcription_tool.py ADDED Viewed

	@@ -0,0 +1,26 @@

+from smolagents import Tool
+from youtube_transcript_api import YouTubeTranscriptApi
+class YouTubeTranscriptionTool(Tool):
+    """
+    Tool to fetch the transcript of a YouTube video given its URL.
+    Args:
+        video_url (str): YouTube video URL.
+    Returns:
+        str: Transcript of the video as a single string.
+    """
+    name = "youtube_transcription"
+    description = "Fetches the transcript of a YouTube video given its URL"
+    inputs = {
+        "video_url": {"type": "string", "description": "YouTube video URL"},
+    }
+    output_type = "string"
+    def forward(self, video_url: str) -> str:
+        video_id = video_url.strip().split("v=")[-1]
+        transcript = YouTubeTranscriptApi.get_transcript(video_id)
+        return " ".join([entry["text"] for entry in transcript])