AgentsCourseFinalProject

Runtime error

App Files Files Community

VicBeltran commited on Apr 30, 2025

Commit

de814df

1 Parent(s): 81917a3

working agent local version

Browse files

Files changed (6) hide show

__pycache__/agent.cpython-310.pyc +0 -0
__pycache__/agent_langchain.cpython-310.pyc +0 -0
agent.py +336 -0
agent_langchain.py +218 -0
app.py +6 -12
auxiliary_fns.py +69 -0

__pycache__/agent.cpython-310.pyc ADDED Viewed

Binary file (10.9 kB). View file

__pycache__/agent_langchain.cpython-310.pyc ADDED Viewed

Binary file (7.26 kB). View file

agent.py ADDED Viewed

	@@ -0,0 +1,336 @@

+import os
+import wiki
+import torch
+import logging
+import requests
+import wikipedia
+import pytesseract
+import pandas as pd
+from PIL import Image
+from io import BytesIO
+import soundfile as sf
+from pytube import YouTube
+from yt_dlp import YoutubeDL
+from transformers import (
+    AutoModelForCausalLM,
+    AutoTokenizer,
+    BitsAndBytesConfig,
+    pipeline,
+)
+from smolagents import (
+    CodeAgent,
+    DuckDuckGoSearchTool,
+    PythonInterpreterTool,
+    HfApiModel,
+    LiteLLMModel,
+    Tool,
+    TransformersModel
+)
+model = LiteLLMModel(
+    model_id="ollama_chat/qwen3:14b",
+    api_base="http://127.0.0.1:11434",
+    num_ctx=8192
+)
+#bnb_config = BitsAndBytesConfig(load_in_8bit=True)
+#tokenizer = AutoTokenizer.from_pretrained(model_id)
+# model = TransformersModel(
+#     model_id=model_id,
+#     torch_dtype="bfloat16",
+#     device_map="cuda",
+#     trust_remote_code=True,
+#     max_new_tokens=2048
+# )
+#model = torch.compile(model, mode="default")
+from whisper import load_model as load_whisper
+whisper_model = load_whisper("small")
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# ——————————————————————————————————————————————————————————
+# 1) GAIA system prompt
+# ——————————————————————————————————————————————————————————
+GAIA_SYSTEM_PROMPT = """
+You are a general AI assistant. I will ask you a question.
+Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
+YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
+If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
+If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
+If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
+All question related files if existant are given to you below as: AXULIARY FILE FOR QUESTION: [FILE_PATH]
+"""
+DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+AUDIO_FILES = ["wav", "mp3", "aac", "ogg"]
+IMAGE_FILES = ["png", "jpg", "tiff", "jpeg", "bmp"]
+TABULAR_FILES = ["csv", "xlsx"]
+# ——————————————————————————————————————————————————————————
+# 2) Custom tools
+# ——————————————————————————————————————————————————————————
+# --- File handler ---
+def file_handler(task_id: str, file_name: str):
+    try:
+        response = requests.get(f"{DEFAULT_API_URL}/files/{task_id}")
+        response.raise_for_status()
+        data = response.content
+        ext = file_name.split('.')[-1].lower()
+        return data, ext
+    except Exception as e:
+        logger.error(f"Failed to fetch file: {e}")
+        raise
+def fetch_file(args: str) -> str:
+    """
+    Download a binary blob by task_id,file_name via file_handler,
+    save it under ./tmp/, and return the local filesystem path.
+    Args:
+        args: "task_id, file_name"
+    """
+    task_id, file_name = [x.strip() for x in args.split(',')]
+    data, ext = file_handler(task_id, file_name)
+    local_path = f"./tmp/{task_id}.{ext}"
+    os.makedirs(os.path.dirname(local_path), exist_ok=True)
+    with open(local_path, 'wb') as f:
+        f.write(data)
+    return local_path
+class TranscriptionTool(Tool):
+    name = "TranscriptionTool"
+    description = """
+    This tool transcribes spoken content from local audio files such as .wav or .mp3.
+    It uses OpenAI's Whisper model to convert speech to text.
+    It expects a file path to the audio file and returns a string containing the transcription.
+    To call the tool on code just use TranscriptionTool(path).
+    """
+    inputs = {
+        "path": {
+            "type": "string",
+            "description": "The path to a local audio file (.wav, .mp3, etc.)"
+        }
+    }
+    output_type = "string"
+    def forward(self, path: str) -> str:
+        data, sr = sf.read(path, dtype='float32')
+        res = whisper_model.transcribe(data, language='en')
+        return f"The transcribed audio text is: {res['text']}\n"
+class OCRTool(Tool):
+    name = "OCRTool"
+    description = """
+    This tool extracts text from images using Tesseract OCR.
+    It takes a path to an image file (e.g., .png or .jpg) and returns any readable text found in the image.
+    To call the tool on code just use OCRTool(path).
+    """
+    inputs = {
+        "path": {
+            "type": "string",
+            "description": "The path to a local image file (.png, .jpg, etc.)"
+        }
+    }
+    output_type = "string"
+    def forward(self, path: str) -> str:
+        img = Image.open(path)
+        text = pytesseract.image_to_string(img)
+        return f"Extracted text from image:\n\n{text}"
+class TablePreviewTool(Tool):
+    name = "TablePreviewTool"
+    description = """
+        This tool previews a CSV or Excel spreadsheet file.
+        It returns the shape (rows, columns), column names, the first few rows of data and some description of the database.
+        Useful for understanding the structure of tabular data before processing it.
+        To call the tool on code just use TablePreviewTool(path)"""
+    inputs = {
+        "path": {
+            "type": "string",
+            "description": "The path to a .csv or .xlsx file"
+        }
+    }
+    output_type = "string"
+    def forward(self, path: str) -> str:
+        ext = path.rsplit('.', 1)[-1].lower()
+        df = pd.read_csv(path) if ext == 'csv' else pd.read_excel(path)
+        return f"""Shape: {df.shape}\n Columns: {list(df.columns)}\n\n
+                Head: {df.head().to_markdown()}\n\n Description of dataset: {str(df.describe())}"""
+class YouTubeInfoTool(Tool):
+    name = "YouTubeInfoTool"
+    description = """
+    This tool fetches metadata and English captions from a given YouTube video.
+    It returns the video's title, description, and the English subtitles if available.
+    To call the tool on code just use YouTubeInfoTool(url)"""
+    inputs = {
+        "url": {
+            "type": "string",
+            "description": "The full URL to a YouTube video"
+        }
+    }
+    output_type = "string"
+    def forward(self, url: str) -> str:
+        ydl_opts = {
+            "skip_download": True,
+            "quiet": True,
+            "writesubtitles": True,
+            "writeautomaticsub": True,
+        }
+        with YoutubeDL(ydl_opts) as ydl:
+            info = ydl.extract_info(url, download=False)
+        title = info.get("title", "")
+        if title == None:
+            title = "None"
+        desc = info.get("description", "")
+        if desc == None:
+            desc = "None"
+        # try manual subtitles first, then auto-generated
+        subs = info.get("subtitles", {}) or info.get("automatic_captions", {})
+        en_caps = subs.get("en") or subs.get("en-US") or []
+        if en_caps:
+            cap_url = en_caps[0]["url"]
+            captions = requests.get(cap_url).text
+        else:
+            captions = "No English captions available."
+        text = f"Title: {title}\n\nDescription:\n{desc}\n\nCaptions:\n{captions}"
+        return f"The Youtube video title, description and captions are respectivelly: {text}"
+class WikiTool(Tool):
+    name = "WikiTool"
+    description = """
+    This tool searches Wikipedia for a given query and returns a concise summary.
+    It takes a search term (string) as input and returns the first few sentences
+    of the corresponding Wikipedia article (or a notice if multiple or no pages are found).
+    To call the tool in code, use: WikiTool(query)
+    """
+    inputs = {
+        "query": {
+            "type": "string",
+            "description": "The search term for Wikipedia (e.g., 'Python programming language')."
+        }
+    }
+    output_type = "string"
+    def setup(self):
+        # Set language or any expensive init once
+        wikipedia.set_lang("en")
+    def forward(self, query: str) -> str:
+        # Search for matching pages
+        results = wikipedia.search(query, results=5)
+        if not results:
+            return f"No Wikipedia pages found for '{query}'."
+        # If multiple results, pick the top one
+        page_title = results[0]
+        try:
+            # Get the summary (first 3 sentences)
+            summary = wikipedia.summary(page_title, auto_suggest=False)
+            return f"Wikipedia summary for '{page_title}':\n\n{summary}"
+        except wikipedia.DisambiguationError as e:
+            options = ", ".join(e.options[:5])
+            return (
+                f"Your query '{query}' is ambiguous. "
+                f"Here are some options: {options}"
+            )
+        except Exception as e:
+            return f"Error retrieving Wikipedia summary for '{page_title}': {e}"
+class TextFileReaderTool(Tool):
+    name = "TextFileReaderTool"
+    description = """
+    This tool reads the full contents of a local text-based file (e.g., .txt, .py, .md).
+    It takes a file path as input and returns the entire file as a single string.
+    To call the tool in code, use: TextFileReaderTool(path)
+    """
+    inputs = {
+        "path": {
+            "type": "string",
+            "description": "The path to a local text based file (.txt, .py, .md, etc.), example: ./tmp/f918266a-b3e0-4914-865d-4faa564f1aef.py"
+        }
+    }
+    output_type = "string"
+    def forward(self, path: str) -> str:
+        try:
+            with open(path, 'r', encoding='utf-8') as f:
+                content = f.read()
+            return f"Contents of '{path}':\n\n{content}"
+        except FileNotFoundError:
+            return f"Error: File not found at '{path}'."
+        except Exception as e:
+            return f"Error reading '{path}': {e}"
+# ——————————————————————————————————————————————————————————
+# 3) Built-in smolagents tools
+# ——————————————————————————————————————————————————————————
+search_tool = DuckDuckGoSearchTool()
+python_repl = PythonInterpreterTool()
+# ——————————————————————————————————————————————————————————
+# 4) GaiaAgent class with file-preloading
+# ——————————————————————————————————————————————————————————
+class GAIAAgent:
+    def __init__(self, model_name: str = None):
+        """
+        Initialize the GAIA inference agent with your system prompt.
+        Args:
+            model_name: optional HF model identifier
+        """
+        self.system_prompt = GAIA_SYSTEM_PROMPT
+        self.model = model
+        self.agent = CodeAgent(
+            model=self.model,
+            tools=[
+                TextFileReaderTool(),
+                WikiTool(),
+                DuckDuckGoSearchTool(),
+                PythonInterpreterTool(),
+                TranscriptionTool(),
+                OCRTool(),
+                TablePreviewTool(),
+                YouTubeInfoTool(),
+            ],
+            max_steps=10,
+            verbosity_level=2,
+            add_base_tools=True,
+            additional_authorized_imports = ["numpy", "pandas", "wikipedia"]
+        )
+    def __call__(self, question: str,task_id: str = None, file_name: str = None) -> str:
+        """
+        Run the agent on `question`. If `task_id` and `file_name` are set,
+        download the file into ./tmp/ via fetch_file, then prefix:
+            "FILE: ./tmp/{file_name}\n\n{question}"
+        Returns only what's after 'FINAL ANSWER:'.
+        """
+        prompt = question
+        if task_id and file_name:
+            local_path = fetch_file(f"{task_id},{file_name}")
+            prompt = f"AXULIARY FILE FOR QUESTION: {local_path}\n\n{question}"
+        # Add system prompt before passing to model
+        full_prompt = f"{self.system_prompt}\n\nQuestion: {prompt}"
+        full_resp = self.agent.run(prompt)
+        if type(full_resp) != str:
+            full_resp = str(full_resp)
+        if "FINAL ANSWER:" in full_resp:
+            return full_resp.split("FINAL ANSWER:")[-1].strip()
+        if "**Answer**" in full_resp:
+            return full_resp.split("**Answer**:")[-1].strip()
+        if "**Answer:**" in full_resp:
+            return full_resp.split("**Answer:**")[-1].strip()
+        return full_resp

agent_langchain.py ADDED Viewed

	@@ -0,0 +1,218 @@

+import os
+import torch
+import logging
+import requests
+import pytesseract
+import pandas as pd
+from PIL import Image
+from io import BytesIO
+import soundfile as sf
+from langchain import hub
+from pytube import YouTube
+from transformers import (
+    AutoModelForCausalLM,
+    AutoTokenizer,
+    BitsAndBytesConfig,
+    pipeline,
+)
+from duckduckgo_search import DDGS
+from whisper import load_model as load_whisper
+from langchain_huggingface import HuggingFacePipeline
+from langchain.memory import ConversationBufferMemory
+from langchain_experimental.utilities import PythonREPL
+from langchain.agents import initialize_agent, Tool, AgentType, AgentExecutor, create_react_agent
+DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+AUDIO_FILES = ["wav", "mp3", "aac", "ogg"]
+IMAGE_FILES = ["png", "jpg", "tiff", "jpeg", "bmp"]
+TABULAR_FILES = ["csv", "xlsx"]
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+GAIA_SYSTEM_PROMPT = (
+    "You are a general AI assistant. I will ask you a question. Report your thoughts, "
+    "and finish your answer with the following template: "
+    "FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible "
+    "OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write "
+    "your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, "
+    "don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. "
+    "If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."
+)
+def file_handler(task_id: str, file_name: str):
+    try:
+        response = requests.get(f"{DEFAULT_API_URL}/files/{task_id}")
+        response.raise_for_status()
+        data = response.content
+        ext = file_name.split('.')[-1].lower()
+        return data, ext
+    except Exception as e:
+        logger.error(f"Failed to fetch file: {e}")
+        raise
+whisper_model = load_whisper("small")
+model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
+bnb_config = BitsAndBytesConfig(load_in_8bit=True)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    quantization_config=bnb_config,
+    device_map="auto",
+    #use_cache=True,
+)
+torch.backends.cuda.matmul.allow_tf32 = True
+try:
+    model.enable_xformers_memory_efficient_attention()
+except Exception as e:
+    logger.warning(f"Failed to enable xformers memory optimization: {e}")
+pipe = pipeline(
+    "text-generation",
+    model=model,
+    tokenizer=tokenizer,
+    temperature=0.05,
+    device_map="auto"
+)
+llm = HuggingFacePipeline(pipeline=pipe)
+def fetch_file(args: str) -> str:
+    try:
+        task_id, file_name = [x.strip() for x in args.split(',')]
+        data, ext = file_handler(task_id, file_name)
+        local_path = f"./tmp/{task_id}.{ext}"
+        os.makedirs(os.path.dirname(local_path), exist_ok=True)
+        with open(local_path, 'wb') as f:
+            f.write(data)
+        logger.info(f"File fetched and saved at {local_path}")
+        return local_path
+    except Exception as e:
+        logger.error(f"fetch_file failed: {e}")
+        raise
+def transcribe(path: str) -> str:
+    try:
+        data, sr = sf.read(path, dtype='float32')
+        res = whisper_model.transcribe(data, language='en')
+        return res['text']
+    except Exception as e:
+        logger.error(f"transcribe failed: {e}")
+        raise
+def ocr(path: str) -> str:
+    try:
+        img = Image.open(path)
+        return pytesseract.image_to_string(img)
+    except Exception as e:
+        logger.error(f"ocr failed: {e}")
+        raise
+def preview_table(path: str) -> str:
+    try:
+        ext = path.split('.')[-1]
+        df = pd.read_csv(path) if ext == 'csv' else pd.read_excel(path)
+        info = f"Table Shape: {df.shape}\nColumns: {list(df.columns)}\nHead:\n{df.head().to_markdown()}"
+        return info
+    except Exception as e:
+        logger.error(f"preview_table failed: {e}")
+        raise
+def youtube_info(url: str) -> str:
+    try:
+        yt = YouTube(url)
+        output = f"title: {yt.title}\n\ndescription: {yt.description}\n\n"
+        if 'en' in yt.captions:
+            output += yt.captions['en'].generate_srt_captions()
+        return output
+    except Exception as e:
+        logger.error(f"youtube_info failed: {e}")
+        raise
+def web_search(query: str) -> str:
+    results = []
+    with DDGS() as ddgs:
+        for r in ddgs.text(query, max_results=5):
+            results.append(f"{r['title']} — {r['href']}")
+    return '\n'.join(results)
+def read_code_from_file(file_path: str) -> str:
+    """Reads Python code from a file."""
+    try:
+        with open(file_path, 'r') as file:
+            code = file.read()
+        return code
+    except FileNotFoundError:
+        return "Error: File not found."
+    except Exception as e:
+        return f"Error reading file: {e}"
+def execute_python_from_file(file_path: str) -> str:
+    """Reads and executes Python code from a specified file."""
+    code = read_code_from_file(file_path)
+    if code.startswith("Error"):
+        return code
+    try:
+        output = python_repl.run(code)
+        return output
+    except Exception as e:
+        return f"Error executing code: {e}"
+# --- Define toolset ---
+tools = [
+    Tool(name='fetch_file', func=fetch_file, description='Download file by task_id,file_name'),
+    Tool(name='transcribe', func=transcribe, description='Transcribe a downloaded audio file'),
+    Tool(name='ocr', func=ocr, description='Extract text from a downloaded image'),
+    Tool(name='preview_table', func=preview_table, description='Show summary and first rows of a CSV/XLSX'),
+    Tool(name='youtube_info', func=youtube_info, description='Get info & transcript from a YouTube URL'),
+    Tool(name='web_search', func=web_search, description='Return top 5 search results for a query'),
+    Tool(name="Execute Python File",func=execute_python_from_file,description="Executes Python code from a specified file path. Input should be the full path to the Python file.",)
+]
+# --- Create agent using ReAct agent style ---
+base_prompt = hub.pull("langchain-ai/react-agent-template")
+tool_names = ", ".join([t.name for t in tools])
+agent = create_react_agent(llm, tools, base_prompt)
+memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
+agent_executor = AgentExecutor(
+    agent=agent,
+    tools=tools,
+    memory=memory,
+    verbose=True,
+    max_iterations=5,
+    verbose=True,
+    handle_parsing_errors=True,
+    return_only_outputs=True
+)
+# --- 4) GAIAAgent class returning only the FINAL ANSWER ---
+class GAIAAgent:
+    def __init__(self):
+        self.agent = self.executor = agent_executor
+    def __call__(self, question: str, task_id: str = None, file_name: str = None) -> str:
+        prompt=""
+        if task_id and file_name:
+            prompt += f"FILE: {task_id},{file_name}\n"
+        prompt += question
+        # Use executor to get full dict response
+        response = self.executor.invoke({"input": prompt, "instructions": GAIA_SYSTEM_PROMPT})
+        print("prompt : ", prompt)
+        output = response.get("output") if isinstance(response, dict) else str(response)
+        if output and 'FINAL ANSWER:' in output:
+            return output.split('FINAL ANSWER:')[-1].strip()
+        return output or ""
+agent = GAIAAgent()
+agent("Hello how are u?", "1", None)

app.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import os
-import gradio as gr
-import requests
 import inspect
 import pandas as pd
 # (Keep Constants as is)
 # --- Constants ---
@@ -10,14 +11,6 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Basic Agent Definition ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
-class BasicAgent:
-    def __init__(self):
-        print("BasicAgent initialized.")
-    def __call__(self, question: str) -> str:
-        print(f"Agent received question (first 50 chars): {question[:50]}...")
-        fixed_answer = "This is a default answer."
-        print(f"Agent returning fixed answer: {fixed_answer}")
-        return fixed_answer
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
@@ -40,7 +33,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     # 1. Instantiate Agent ( modify this part to create your agent)
     try:
-        agent = BasicAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
@@ -76,11 +69,12 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
-            submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:

 import os
 import inspect
+import requests
 import pandas as pd
+import gradio as gr
+from agent import GAIAAgent
 # (Keep Constants as is)
 # --- Constants ---
 # --- Basic Agent Definition ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     # 1. Instantiate Agent ( modify this part to create your agent)
     try:
+        agent = GAIAAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
+        question_file = item.get("file_name")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
+            submitted_answer = agent(question_text, task_id, question_file)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:

auxiliary_fns.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import os
+import requests
+import subprocess
+import pandas as pd
+from PIL import Image
+from io import BytesIO
+import soundfile as sf
+DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+IMAGE_FILES = ["png", "jpg", "tiff", "jpeg", "bmp"]
+AUDIO_FILES = ["wav", "mp3", "aac", "ogg"]
+TABULAR_FILES = ["csv", "xlsx"]
+def read_audio_file(audio_bytes, file_extension):
+    """
+    Reads audio data from in-memory bytes.
+    Args:
+        audio_bytes (bytes): The audio data as bytes.
+        file_extension (str): The extension of the audio file (e.g., 'wav', 'mp3').
+    """
+    try:
+        audio_buffer = BytesIO(audio_bytes)
+        format_string = file_extension.lower()
+        data, samplerate = sf.read(audio_buffer, format=format_string)
+        return (data, samplerate)
+    except sf.LibsndfileError:
+        print(f"Error: Could not read the audio data from memory with the specified format: {file_extension}")
+    except Exception as e:
+        print(f"An unexpected error occurred: {e}")
+def read_tabular_data(file_bytes, file_extension):
+    file_bytes.seek(0)
+    if file_extension == "csv":
+        return (pd.read_csv(file_bytes))
+    elif file_extension == "xlsx":
+        return (pd.read_excel(file_bytes))
+def read_image_data(file_bytes, file_extension):
+    return Image.open(file_bytes)
+def write_and_execute_file(text):
+    with open(f"file_to_execute.{file_extension}", "wb") as f:
+        f.write(text)
+    result = subprocess.run(['python', 'file_to_execute.py'], capture_output=True, text=True, check=True)
+    return result.stdout
+def file_handler(task_id, file_name):
+    response = requests.get(f"{DEFAULT_API_URL}/files/{task_id}")
+    response.raise_for_status()
+    data = response.content
+    ext = file_name.split(".")[-1]
+    if ext in AUDIO_FILES:
+        file_data = read_audio_file(data, ext)
+    elif ext in TABULAR_FILES:
+        file_data = read_tabular_file(data, ext)
+    elif ext in IMAGE_FILES:
+        file_data = read_image_file(data, ext)
+    elif ext == "py":
+        file_data = (data, ext)
+    return file_data, ext