Spaces:
Sleeping
Sleeping
Update agent.py
Browse files
agent.py
CHANGED
|
@@ -3,7 +3,8 @@ from typing import Optional, Tuple, Literal
|
|
| 3 |
from smolagents import tool
|
| 4 |
import base64
|
| 5 |
from openai import OpenAI
|
| 6 |
-
|
|
|
|
| 7 |
|
| 8 |
@tool
|
| 9 |
def download_and_get_path_for_provided_file(path: str):
|
|
@@ -29,7 +30,7 @@ def download_and_get_path_for_provided_file(path: str):
|
|
| 29 |
@tool
|
| 30 |
def extract_text_from_audio(file_path: str) -> str:
|
| 31 |
"""
|
| 32 |
-
Extract and return text transcription from an audio file.
|
| 33 |
|
| 34 |
Args:
|
| 35 |
file_path (str): Path to the audio file to be transcribed.
|
|
@@ -47,21 +48,25 @@ def extract_text_from_audio(file_path: str) -> str:
|
|
| 47 |
>>> extract_text_from_audio("/path/to/audio/interview.mp3")
|
| 48 |
"Could you please introduce yourself and your background?"
|
| 49 |
"""
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
audio_file = open(file_path, "rb")
|
| 53 |
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
|
| 62 |
def describe_image(request:str, file_path: str) -> str:
|
| 63 |
"""
|
| 64 |
-
Extract and return the requested information from an image.
|
| 65 |
|
| 66 |
Args:
|
| 67 |
request: The information to retreive from the image. The request must be simple, short and precise.
|
|
@@ -79,44 +84,76 @@ def describe_image(request:str, file_path: str) -> str:
|
|
| 79 |
"Qd3"
|
| 80 |
"""
|
| 81 |
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
# Function to encode the image
|
| 85 |
-
def encode_image(image_path):
|
| 86 |
-
with open(image_path, "rb") as image_file:
|
| 87 |
-
return base64.b64encode(image_file.read()).decode("utf-8")
|
| 88 |
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
{
|
| 97 |
-
"role": "user",
|
| 98 |
-
"content": [
|
| 99 |
-
{ "type": "input_text", "text": request },
|
| 100 |
-
{
|
| 101 |
-
"type": "input_image",
|
| 102 |
-
"image_url": f"data:image/jpeg;base64,{base64_image}",
|
| 103 |
-
},
|
| 104 |
-
],
|
| 105 |
-
}
|
| 106 |
-
],
|
| 107 |
-
)
|
| 108 |
|
| 109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
|
| 111 |
|
| 112 |
@tool
|
| 113 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
from youtube_transcript_api import YouTubeTranscriptApi
|
| 115 |
ytt_api = YouTubeTranscriptApi()
|
| 116 |
-
transcript = ytt_api.fetch(
|
| 117 |
return transcript
|
| 118 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
class TestAgent:
|
| 121 |
def __init__(self):
|
| 122 |
|
|
@@ -143,11 +180,15 @@ class TestAgent:
|
|
| 143 |
#model = InferenceClientModel("Qwen/Qwen2.5-Coder-32B-Instruct")
|
| 144 |
# Instantiate the agent
|
| 145 |
self.agent = CodeAgent(
|
| 146 |
-
tools=[
|
| 147 |
-
DuckDuckGoSearchTool(),
|
| 148 |
VisitWebpageTool(),
|
| 149 |
-
wikipedia_tool,
|
| 150 |
-
#youtube_tools,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
FinalAnswerTool()],
|
| 152 |
additional_authorized_imports=["pandas","markdownify","requests"], # V2 add markdownify & requests
|
| 153 |
model=model,
|
|
@@ -157,7 +198,7 @@ class TestAgent:
|
|
| 157 |
use_structured_outputs_internally=True # V3. Adds structure
|
| 158 |
)
|
| 159 |
# V3. add Guidance
|
| 160 |
-
prompt_for_guidance = "\n10. Provide the answer axactly as it is asked, be concise and precise\n\nNow Begin!"
|
| 161 |
#self.agent.prompt_templates['system_prompt'] = self.agent.prompt_templates['system_prompt'] + prompt_for_guidance
|
| 162 |
|
| 163 |
# V4. use prompt from the paper as guidance
|
|
|
|
| 3 |
from smolagents import tool
|
| 4 |
import base64
|
| 5 |
from openai import OpenAI
|
| 6 |
+
import joblib
|
| 7 |
+
from openai import OpenAI
|
| 8 |
|
| 9 |
@tool
|
| 10 |
def download_and_get_path_for_provided_file(path: str):
|
|
|
|
| 30 |
@tool
|
| 31 |
def extract_text_from_audio(file_path: str) -> str:
|
| 32 |
"""
|
| 33 |
+
Extract and return text transcription from an audio file given its path.
|
| 34 |
|
| 35 |
Args:
|
| 36 |
file_path (str): Path to the audio file to be transcribed.
|
|
|
|
| 48 |
>>> extract_text_from_audio("/path/to/audio/interview.mp3")
|
| 49 |
"Could you please introduce yourself and your background?"
|
| 50 |
"""
|
| 51 |
+
try:
|
| 52 |
+
return joblib.load(f"cahced_files/{file_path}")
|
|
|
|
| 53 |
|
| 54 |
+
except:
|
| 55 |
+
client = OpenAI()
|
| 56 |
+
audio_file = open(file_path, "rb")
|
| 57 |
+
|
| 58 |
+
transcription = client.audio.transcriptions.create(
|
| 59 |
+
model="gpt-4o-transcribe",
|
| 60 |
+
file=audio_file,
|
| 61 |
+
response_format="text"
|
| 62 |
+
)
|
| 63 |
+
joblib.dump(transcription, f"cahced_files/{file_path}")
|
| 64 |
+
return transcription
|
| 65 |
|
| 66 |
|
| 67 |
def describe_image(request:str, file_path: str) -> str:
|
| 68 |
"""
|
| 69 |
+
Extract and return the requested information from an image given its path.
|
| 70 |
|
| 71 |
Args:
|
| 72 |
request: The information to retreive from the image. The request must be simple, short and precise.
|
|
|
|
| 84 |
"Qd3"
|
| 85 |
"""
|
| 86 |
|
| 87 |
+
try
|
| 88 |
+
return joblib.load(f"cahced_files/{file_path}")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
|
| 90 |
+
except:
|
| 91 |
+
client = OpenAI()
|
| 92 |
+
|
| 93 |
+
# Function to encode the image
|
| 94 |
+
def encode_image(image_path):
|
| 95 |
+
with open(image_path, "rb") as image_file:
|
| 96 |
+
return base64.b64encode(image_file.read()).decode("utf-8")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
|
| 98 |
+
# Getting the Base64 string
|
| 99 |
+
base64_image = encode_image(file_path)
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
response = client.responses.create(
|
| 103 |
+
model="gpt-4.1",
|
| 104 |
+
input=[
|
| 105 |
+
{
|
| 106 |
+
"role": "user",
|
| 107 |
+
"content": [
|
| 108 |
+
{ "type": "input_text", "text": request },
|
| 109 |
+
{
|
| 110 |
+
"type": "input_image",
|
| 111 |
+
"image_url": f"data:image/jpeg;base64,{base64_image}",
|
| 112 |
+
},
|
| 113 |
+
],
|
| 114 |
+
}
|
| 115 |
+
],
|
| 116 |
+
)
|
| 117 |
+
joblib.dump(response.output_text,f"cahced_files/{file_path}")
|
| 118 |
+
return response.output_text
|
| 119 |
+
|
| 120 |
|
| 121 |
|
| 122 |
@tool
|
| 123 |
+
def get_transcript_from_youtube_file_id(file_id: str) -> str:
|
| 124 |
+
"""
|
| 125 |
+
Retrieve the transcript for a YouTube video given its id.
|
| 126 |
+
|
| 127 |
+
Args:
|
| 128 |
+
file_id (str): The YouTube video ID (the alphanumeric string that appears after
|
| 129 |
+
'v=' in a YouTube URL, e.g., 'dQw4w9WgXcQ').
|
| 130 |
+
|
| 131 |
+
Returns:
|
| 132 |
+
str: The transcript content for the specified video. a JSON string or formatted
|
| 133 |
+
text containing transcript segments with timestamps.
|
| 134 |
+
"""
|
| 135 |
from youtube_transcript_api import YouTubeTranscriptApi
|
| 136 |
ytt_api = YouTubeTranscriptApi()
|
| 137 |
+
transcript = ytt_api.fetch(file_id)
|
| 138 |
return transcript
|
| 139 |
|
| 140 |
+
|
| 141 |
+
@tool
|
| 142 |
+
def parse_python_file(path: str) -> str:
|
| 143 |
+
"""
|
| 144 |
+
Read and return the contents of a Python file from its path.
|
| 145 |
+
|
| 146 |
+
Args:
|
| 147 |
+
path (str): The file path to the Python file to be read.
|
| 148 |
|
| 149 |
+
Returns:
|
| 150 |
+
str: The complete contents of the Python file as a string.
|
| 151 |
+
|
| 152 |
+
"""
|
| 153 |
+
with open(file_path, "r") as py_file:
|
| 154 |
+
return py_file.read()
|
| 155 |
+
|
| 156 |
+
|
| 157 |
class TestAgent:
|
| 158 |
def __init__(self):
|
| 159 |
|
|
|
|
| 180 |
#model = InferenceClientModel("Qwen/Qwen2.5-Coder-32B-Instruct")
|
| 181 |
# Instantiate the agent
|
| 182 |
self.agent = CodeAgent(
|
| 183 |
+
tools=[download_and_get_path_for_provided_file, # V4. get attached file
|
| 184 |
+
DuckDuckGoSearchTool(), # basic tools from smolagent
|
| 185 |
VisitWebpageTool(),
|
| 186 |
+
wikipedia_tool, # tool from langchain with extra parmaeters
|
| 187 |
+
#youtube_tools, # tool from MCP server
|
| 188 |
+
get_transcript_from_youtube_file_id, # V4
|
| 189 |
+
parse_python_file, # V4
|
| 190 |
+
describe_image, # V4
|
| 191 |
+
extract_text_from_audio, # V4
|
| 192 |
FinalAnswerTool()],
|
| 193 |
additional_authorized_imports=["pandas","markdownify","requests"], # V2 add markdownify & requests
|
| 194 |
model=model,
|
|
|
|
| 198 |
use_structured_outputs_internally=True # V3. Adds structure
|
| 199 |
)
|
| 200 |
# V3. add Guidance
|
| 201 |
+
#prompt_for_guidance = "\n10. Provide the answer axactly as it is asked, be concise and precise\n\nNow Begin!"
|
| 202 |
#self.agent.prompt_templates['system_prompt'] = self.agent.prompt_templates['system_prompt'] + prompt_for_guidance
|
| 203 |
|
| 204 |
# V4. use prompt from the paper as guidance
|