Agent_course_Final_Assignment

Sleeping

App Files Files Community

RCaz commited on Nov 10, 2025

Commit

3e94ec9

verified ·

1 Parent(s): 54317b5

Update agent.py

Browse files

Files changed (1) hide show

agent.py +86 -45

agent.py CHANGED Viewed

@@ -3,7 +3,8 @@ from typing import Optional, Tuple, Literal
 from smolagents import tool
 import base64
 from openai import OpenAI
 @tool
 def download_and_get_path_for_provided_file(path: str):
@@ -29,7 +30,7 @@ def download_and_get_path_for_provided_file(path: str):
 @tool
 def extract_text_from_audio(file_path: str) -> str:
     """
-    Extract and return text transcription from an audio file.
     Args:
         file_path (str): Path to the audio file to be transcribed.
@@ -47,21 +48,25 @@ def extract_text_from_audio(file_path: str) -> str:
         >>> extract_text_from_audio("/path/to/audio/interview.mp3")
         "Could you please introduce yourself and your background?"
     """
-    client = OpenAI()
-    audio_file = open(file_path, "rb")
-    transcription = client.audio.transcriptions.create(
-        model="gpt-4o-transcribe",
-        file=audio_file,
-        response_format="text"
-    )
-    return transcription
 def describe_image(request:str, file_path: str) -> str:
     """
-    Extract and return the requested information from an image.
     Args:
         request: The information to retreive from the image. The request must be simple, short and precise.
@@ -79,44 +84,76 @@ def describe_image(request:str, file_path: str) -> str:
         "Qd3"
     """
-    client = OpenAI()
-    # Function to encode the image
-    def encode_image(image_path):
-        with open(image_path, "rb") as image_file:
-            return base64.b64encode(image_file.read()).decode("utf-8")
-    # Getting the Base64 string
-    base64_image = encode_image(file_path)
-    response = client.responses.create(
-        model="gpt-4.1",
-        input=[
-            {
-                "role": "user",
-                "content": [
-                    { "type": "input_text", "text": request },
-                    {
-                        "type": "input_image",
-                        "image_url": f"data:image/jpeg;base64,{base64_image}",
-                    },
-                ],
-            }
-        ],
-    )
-    return response.output_text
 @tool
-def get_transcript(file_id:str) -> str:
     from youtube_transcript_api import YouTubeTranscriptApi
     ytt_api = YouTubeTranscriptApi()
-    transcript = ytt_api.fetch(video_id)
     return transcript
 class TestAgent:
     def __init__(self):
@@ -143,11 +180,15 @@ class TestAgent:
         #model = InferenceClientModel("Qwen/Qwen2.5-Coder-32B-Instruct")
         # Instantiate the agent
         self.agent = CodeAgent(
-            tools=[extract_text_from_audio,        # homemade tool
-                   DuckDuckGoSearchTool(),          # basic tools from smolagent
                    VisitWebpageTool(),
-                   wikipedia_tool,                  # tool from langchain with extra parmaeters
-                   #youtube_tools,                   # tool from MCP server
                    FinalAnswerTool()],
             additional_authorized_imports=["pandas","markdownify","requests"],    # V2 add markdownify & requests
             model=model,
@@ -157,7 +198,7 @@ class TestAgent:
             use_structured_outputs_internally=True   # V3. Adds structure
         )
         # V3. add Guidance
-        prompt_for_guidance = "\n10. Provide the answer axactly as it is asked, be concise and precise\n\nNow Begin!"
         #self.agent.prompt_templates['system_prompt'] = self.agent.prompt_templates['system_prompt'] + prompt_for_guidance
         # V4. use prompt from the paper as guidance

 from smolagents import tool
 import base64
 from openai import OpenAI
+import joblib
+from openai import OpenAI
 @tool
 def download_and_get_path_for_provided_file(path: str):
 @tool
 def extract_text_from_audio(file_path: str) -> str:
     """
+    Extract and return text transcription from an audio file given its path.
     Args:
         file_path (str): Path to the audio file to be transcribed.
         >>> extract_text_from_audio("/path/to/audio/interview.mp3")
         "Could you please introduce yourself and your background?"
     """
+    try:
+        return joblib.load(f"cahced_files/{file_path}")
+    except:
+        client = OpenAI()
+        audio_file = open(file_path, "rb")
+        transcription = client.audio.transcriptions.create(
+            model="gpt-4o-transcribe",
+            file=audio_file,
+            response_format="text"
+        )
+        joblib.dump(transcription, f"cahced_files/{file_path}")
+        return transcription
 def describe_image(request:str, file_path: str) -> str:
     """
+    Extract and return the requested information from an image given its path.
     Args:
         request: The information to retreive from the image. The request must be simple, short and precise.
         "Qd3"
     """
+    try
+        return joblib.load(f"cahced_files/{file_path}")
+    except:
+        client = OpenAI()
+        # Function to encode the image
+        def encode_image(image_path):
+            with open(image_path, "rb") as image_file:
+                return base64.b64encode(image_file.read()).decode("utf-8")
+        # Getting the Base64 string
+        base64_image = encode_image(file_path)
+        response = client.responses.create(
+            model="gpt-4.1",
+            input=[
+                {
+                    "role": "user",
+                    "content": [
+                        { "type": "input_text", "text": request },
+                        {
+                            "type": "input_image",
+                            "image_url": f"data:image/jpeg;base64,{base64_image}",
+                        },
+                    ],
+                }
+            ],
+        )
+        joblib.dump(response.output_text,f"cahced_files/{file_path}")
+        return response.output_text
 @tool
+def get_transcript_from_youtube_file_id(file_id: str) -> str:
+    """
+    Retrieve the transcript for a YouTube video given its id.
+    Args:
+        file_id (str): The YouTube video ID (the alphanumeric string that appears after
+                      'v=' in a YouTube URL, e.g., 'dQw4w9WgXcQ').
+    Returns:
+        str: The transcript content for the specified video. a JSON string or formatted
+             text containing transcript segments with timestamps.
+    """
     from youtube_transcript_api import YouTubeTranscriptApi
     ytt_api = YouTubeTranscriptApi()
+    transcript = ytt_api.fetch(file_id)
     return transcript
+@tool
+def parse_python_file(path: str) -> str:
+    """
+    Read and return the contents of a Python file from its path.
+    Args:
+        path (str): The file path to the Python file to be read.
+    Returns:
+        str: The complete contents of the Python file as a string.
+    """
+    with open(file_path, "r") as py_file:
+        return py_file.read()
 class TestAgent:
     def __init__(self):
         #model = InferenceClientModel("Qwen/Qwen2.5-Coder-32B-Instruct")
         # Instantiate the agent
         self.agent = CodeAgent(
+            tools=[download_and_get_path_for_provided_file,        # V4. get attached file
+                   DuckDuckGoSearchTool(),                         # basic tools from smolagent
                    VisitWebpageTool(),
+                   wikipedia_tool,                                 # tool from langchain with extra parmaeters
+                   #youtube_tools,                                 # tool from MCP server
+                   get_transcript_from_youtube_file_id,            # V4
+                   parse_python_file,                              # V4
+                   describe_image,                                 # V4
+                   extract_text_from_audio,                        # V4
                    FinalAnswerTool()],
             additional_authorized_imports=["pandas","markdownify","requests"],    # V2 add markdownify & requests
             model=model,
             use_structured_outputs_internally=True   # V3. Adds structure
         )
         # V3. add Guidance
+        #prompt_for_guidance = "\n10. Provide the answer axactly as it is asked, be concise and precise\n\nNow Begin!"
         #self.agent.prompt_templates['system_prompt'] = self.agent.prompt_templates['system_prompt'] + prompt_for_guidance
         # V4. use prompt from the paper as guidance