Spaces:

kberasneva
/

Final_Assignment_Template

Sleeping

Katya Beresneva

fix

13317d6 9 months ago

5.23 kB

	import os
	from langchain_core.messages import HumanMessage
	from langchain_core.runnables.config import RunnableConfig
	from langgraph.checkpoint.memory import MemorySaver
	from langchain.globals import set_debug
	from langchain.globals import set_verbose
	from langgraph.prebuilt import create_react_agent
	from langgraph.prebuilt import ToolNode
	from langgraph.prebuilt.chat_agent_executor import AgentState

	from smolagents import DuckDuckGoSearchTool
	from smolagents import PythonInterpreterTool
	from tools import analyze_audio
	from tools import analyze_excel
	from tools import analyze_image
	from tools import analyze_video
	from tools import download_file_for_task
	from tools import read_file_contents
	from tools import search_arxiv
	from tools import search_tavily
	from tools import search_wikipedia
	from tools import SmolagentToolWrapper
	from tools import tavily_extract_tool
	from utils import get_llm
	from config import GOOGLE_API_KEY, AGENT_MODEL_NAME


	GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY", "")
	if not GOOGLE_API_KEY:
	raise ValueError("GOOGLE_API_KEY environment variable is not set.")

	AGENT_MODEL_NAME = os.getenv("AGENT_MODEL_NAME", "gemini-2.0-flash-lite")

	MULTIMODAL_TASK_SOLVER_PROMPT = """
	You are a specialized multimodal task-solving AI assistant capable of handling complex data analysis and information retrieval tasks.
	Core Operating Guidelines:
	- Employ systematic analysis: Break down problems into logical steps
	- Maintain brevity: Provide answers in the most concise format possible - raw numbers, single words, or comma-delimited lists
	- Format compliance:
	* Numbers: No commas, units, or currency symbols
	* Lists: Pure comma-separated values without additional text
	* Text: Bare minimum words, no sentences or explanations
	- Tool utilization:
	* For multimedia content (images, audio, video) - use dedicated analysis tools
	* For data processing (Excel, structured data) - use appropriate parsers
	* For information retrieval - leverage search tools
	- Verification principle: Never guess - use available tools to verify information
	- Code usage: Implement Python code for calculations and data transformations
	- Answer format: Always prefix final answers with 'FINAL ANSWER: '
	- Counting queries: Return only the numerical count
	- Listing queries: Return only the comma-separated items
	- Sorting queries: Return only the ordered list

	Sample Responses:
	Q: Current Bitcoin price in USD? A: 47392
	Q: Sort these colors: blue, red, azure A: azure, blue, red
	Q: Capital of France? A: Paris
	Q: Count vowels in 'hello' A: 2
	Q: Temperature scale used in USA? A: Fahrenheit
	Q: List prime numbers under 10 A: 2, 3, 5, 7
	Q: Most streamed artist 2023? A: Taylor Swift
	"""

	#set_debug(True)
	#set_verbose(True)


	class MultiModalTaskState(AgentState):
	task_identifier: str
	query_text: str
	input_file_path: str


	class MultiModalAgent:
	def __init__(self, model_name: str \| None = None):
	if model_name is None:
	model_name = AGENT_MODEL_NAME
	llm = self._get_llm(model_name)
	tools = self._get_tools()
	self.agent = create_react_agent(
	model=llm,
	tools=tools,
	prompt=MULTIMODAL_TASK_SOLVER_PROMPT,
	checkpointer=MemorySaver()
	)

	def _get_llm(self, model_name: str):
	return get_llm(
	llm_provider_api_key=GOOGLE_API_KEY,
	model_name=model_name,
	)

	def _get_tools(self):
	tools = [
	SmolagentToolWrapper(DuckDuckGoSearchTool()),
	SmolagentToolWrapper(PythonInterpreterTool()),
	download_file_for_task,
	read_file_contents,
	analyze_audio,
	analyze_image,
	analyze_excel,
	analyze_video,
	search_arxiv,
	search_tavily,
	search_wikipedia,
	tavily_extract_tool,
	]
	return ToolNode(tools)

	async def __call__(
	self, task_identifier: str, query_text: str, input_file_path: str \| None = None
	) -> str:

	execution_config = RunnableConfig(
	recursion_limit=64,
	configurable={ "thread_id": task_identifier }
	)

	if not input_file_path:
	input_file_path = "None - no file present"

	user_input = HumanMessage(
	content=
	[
	{
	"type": "text",
	"text": f"Task Id: {task_identifier}, Question: {query_text}, Filename: {input_file_path}. If a filename is present (and is not 'None'), download the file for the task that's referenced in the question. If there isn't a filename present, please use tools where applicable."
	}
	]
	)

	response = await self.agent.ainvoke(
	{
	"messages": [user_input],
	"question": query_text,
	"task_id": task_identifier,
	"file_name": input_file_path
	}, execution_config)

	final_response = response['messages'][-1].content
	if "FINAL ANSWER: " in final_response:
	return final_response.split("FINAL ANSWER: ", 1)[1].strip()
	else:
	return final_response