import os import gradio as gr from llama_index.core.tools import FunctionTool from llama_index.core.agent.workflow import AgentWorkflow, ReActAgent from llama_index.llms.litellm import LiteLLM from code_agent import initialize_code_agent from scientific_paper_agent import load_scientific_paper_dataset, ScientificPaperRetriever from search_agent import init_search_tool from tools import math_tool_func, init_image_to_text from web_agent import initialize_web_agent global currentMode hf_token = os.environ.get('HF_TOKEN') llm = LiteLLM( model_name="huggingface/Qwen/Qwen2.5-7B-Instruct", api_key=hf_token ) image_to_text_tool = FunctionTool.from_defaults( fn=init_image_to_text, name="image_to_text_tool", description="Generate captions from an image URL using BLIP. Returns both conditional and unconditional captions." ) search_tool = init_search_tool() math_tool = FunctionTool.from_defaults( fn=math_tool_func, name="math_tool", description="Solving math problems using the Qwen2.5-Math-1.5B model." ) scientific_paper_dataset = load_scientific_paper_dataset() scientific_paper_tool = FunctionTool.from_defaults( fn=ScientificPaperRetriever(scientific_paper_dataset).run, name="scientific_paper_info_retriever", description="Retrieves detailed information about scientific papers." ) # Define Agents code_agent = initialize_code_agent() image_to_text_agent = ReActAgent( name="image_to_text", description="Generate text captions from images", tools=[image_to_text_tool], system_prompt=( "You are an assistant specialized in image understanding. " "When given an image URL, use the image_to_text_tool to generate captions. " "Provide both conditional and unconditional descriptions in clear, concise language. " "Do not invent details beyond what the tool provides." ), llm=llm ) math_agent = ReActAgent( name="math_solver", description="Solve math problems using a dedicated math model", tools=[math_tool], system_prompt=( "You are an assistant specialized in solving math problems. " "When given a math query, use the math_solver_tool to compute the answer. " "Explain the solution clearly and step by step when possible, " "but keep the final answer concise and accurate." ), llm=llm ) search_web_agent = ReActAgent( name="search_web", description="Searches the web for answers", tools=[search_tool], system_prompt=( "You are a helpful assistant. Use DuckDuckGoSearch to look up information. " "Always summarize the first useful result and return it directly. " "Do not keep searching repeatedly." ), llm=llm ) scientific_paper_agent = ReActAgent( name="scientific_paper_agent", description="Search scientific papers for the agent", tools=[scientific_paper_tool], system_prompt="You are a helpful assistant that can answer scientific questions based on scientific papers.", llm=llm ) query_engine_agent = initialize_web_agent(llm) # DEFINE THE WORKFLOW multi_agent_workflow = AgentWorkflow( agents=[ query_engine_agent, search_web_agent, math_agent, image_to_text_agent, scientific_paper_agent, code_agent ], root_agent="query_engine", initial_state={ "num_of_calls": 0 }, state_prompt="Current state: {state}. User Message: {msg}" ) async def respond( message, history: list[dict[str, str]], system_message, max_tokens, temperature, top_p, mode ): global currentMode if mode is not None: mode = mode else: mode = "Conversation Mode" print(f"Current Mode: {mode}") if mode == "Math Mode": currentMode = "math" elif mode == "Conversation Mode": currentMode = "conversation" elif mode == "Image Mode": currentMode = "image" else: currentMode = "conversation" result = await multi_agent_workflow.run(message, max_tokens=max_tokens, temperature=temperature, top_p=top_p) return result with gr.Blocks() as demo: # Dropdown placed above the chat input mode_dropdown = gr.Dropdown( choices=["Math Mode", "Conversation Mode", "Image Mode"], value="Conversation Mode", label="Interaction Mode" ) # ChatInterface without additional_inputs chatbot = gr.ChatInterface( fn=respond, type="messages", additional_inputs=[mode_dropdown] ) # Link dropdown value to respond function mode_dropdown.change( lambda m: m, inputs=mode_dropdown, outputs=[] ) if __name__ == "__main__": demo.launch()