import os from smolagents import Tool, tool from huggingface_hub import HfApi from dotenv import load_dotenv from huggingface_hub import InferenceClient from utils import upload_file load_dotenv() HF_TOKEN = os.environ.get("HF_TOKEN") api = HfApi() client = InferenceClient( provider="hf-inference", api_key=HF_TOKEN, ) # --- Constants --- local_data_path = "../data" if not os.path.exists(local_data_path): os.makedirs(local_data_path) @tool def image_question_answering(image_path: str, prompt: str) -> str: """ This function takes a image path and a prompt, and returns the answer to the question. Args: image_path: The path to the image file prompt: The prompt to the question Returns: The answer to the question """ file_extension = image_path.split(".")[-1] if file_extension in [".mp4", ".avi", ".mov", ".wmv", ".mkv", ".webm"]: return "Media type not supported. Please upload an image." if image_path.startswith("http"): media_url = image_path else: media_url = upload_file(image_path) messages = [ { "role": "user", "content": [ { "type": "text", "text": prompt, }, { "type": "image_url", "image_url": {"url": media_url}, } ], } ] completion = client.chat.completions.create( model="meta-llama/Llama-3.2-11B-Vision-Instruct", messages=messages, ) return completion.choices[0].message @tool def transcribe_audio(file_local_path: str) -> str: """ Transcribe the audio file and return the transcript Args: file_local_path: The local path to the audio file Returns: The transcript of the audio file """ file_url = upload_file(file_local_path) asr_tool = Tool.from_space( "hf-audio/whisper-large-v3", api_name="/predict_1", # from file name="transcribe_audio", description="Use this tool to transcribe the audio" ) transcript = asr_tool(file_url) return transcript class GetFileTool(Tool): name = "get_file" description = "Download a file from the given file name" inputs = { "file_name": { "type": "string", "description": "Download the file from the given file name and outputs the local path" } } output_type = "string" def forward(self, file_name: str) -> str: import requests if file_name == "": return "No file name provided" task_id = file_name.split(".")[0] url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}" headers = { "accept": "application/json" } req = requests.get(url, headers=headers) if req.status_code != 200: return "File not found, please check the file name and try again." local_file_path = local_data_path + "/" + file_name with open(local_file_path, "wb") as f: f.write(req.content) print(f"File saved to {local_file_path}. You can read this file to process its contents.") return local_file_path