Agent_course_Final_Assignment

Sleeping

App Files Files Community

Agent_course_Final_Assignment / agent.py

RCaz

Update agent.py

2bcf72e verified 4 months ago

raw

history blame

4.56 kB

	import math
	from typing import Optional, Tuple, Literal
	from smolagents import tool



	@tool
	def extract_text_from_audio(file_path : str) -> str:
	"""given a path to an audio file, it extract and returns the text contained in it as a string"""

	import speech_recognition as sr
	r = sr.Recognizer()
	with sr.AudioFile(file_path) as source:
	# listen for the data (load audio to memory)
	audio_data = r.record(source)
	# recognize (convert from speech to text)
	text = r.recognize_google(audio_data)
	return text


	@tool
	def extract_text_from_audio(file_path: str) -> str:
	"""
	Extract and return text transcription from an audio file using speech recognition.

	This tool uses Google's speech recognition API to convert spoken audio content
	into text. It supports various audio formats including WAV, AIFF, and FLAC
	(formats supported by the SpeechRecognition library).

	Args:
	file_path (str): Path to the audio file to be transcribed. The file should
	be in a format compatible with the SpeechRecognition library.

	Returns:
	str: The extracted text content from the audio file.

	Raises:
	Exception : the exception

	Examples:
	>>> extract_text_from_audio("meeting_recording.wav")
	"Hello team, welcome to our weekly meeting..."

	>>> extract_text_from_audio("/path/to/audio/interview.mp3")
	"Could you please introduce yourself and your background?"
	"""

	import speech_recognition as sr
	r = sr.Recognizer()
	try:
	with sr.AudioFile(file_path) as source:
	# listen for the data (load audio to memory)
	audio_data = r.record(source)
	# recognize (convert from speech to text)
	text = r.recognize_google(audio_data)
	return text
	except Exception as e:
	return e


	class TestAgent:
	def __init__(self):

	# import code agent and basic tool from smolagent
	from smolagents import CodeAgent, OpenAIServerModel, DuckDuckGoSearchTool, FinalAnswerTool, VisitWebpageTool, MCPClient

	# import additional tool from langchain @ https://docs.langchain.com/oss/python/integrations/tools
	#from langchain_community.agent_toolkits import load_tools
	from langchain_community.agent_toolkits.load_tools import load_tools

	from smolagents import Tool
	wikipedia_tool = Tool.from_langchain(load_tools(["wikipedia"])[0])
	wikipedia_tool.top_k_results=3

	# import tools from MCP servers @ https://github.com/mcp
	#from mcp import StdioServerParameters
	#server_parameters = StdioServerParameters(command="uvx",
	# args=["--quiet", "youtubeqa@0.2.1"],
	# env={"UV_PYTHON": "3.12", **os.environ},
	# )
	#youtube_tools = MCPServerTool(server_params=server_parameters)

	model = OpenAIServerModel(model_id="gpt-4o")
	#model = InferenceClientModel("Qwen/Qwen2.5-Coder-32B-Instruct")
	# Instantiate the agent
	self.agent = CodeAgent(
	tools=[extract_text_from_audio, # homemade tool
	DuckDuckGoSearchTool(), # basic tools from smolagent
	VisitWebpageTool(),
	wikipedia_tool, # tool from langchain with extra parmaeters
	#youtube_tools, # tool from MCP server
	FinalAnswerTool()],
	additional_authorized_imports=["pandas","markdownify","requests"], # V2 add markdownify & requests
	model=model,
	max_steps=4, # V3 increase steps
	planning_interval=2, # V3 add structure
	verbosity_level=2,
	use_structured_outputs_internally=True # V3. Adds structure
	)
	# V3. add Guidance
	prompt_for_guidance = "\n10. Provide the answer axactly as it is asked, be concise and precise\n\nNow Begin!"
	self.agent.prompt_templates['system_prompt'] = self.agent.prompt_templates['system_prompt'] + prompt_for_guidance

	def __call__(self, question: str) -> str:

	print(f"Agent received question (first 50 chars): {question[:50]}...")
	answer = self.agent.run(question)
	print(f"Agent returning his answer: {answer}")
	return answer