File size: 4,724 Bytes
5dde853
6a57935
5dde853
 
55d776c
5dde853
 
 
 
 
 
 
 
 
 
55d776c
 
 
5dde853
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6a57935
 
55d776c
6a57935
 
 
 
 
 
5dde853
6a57935
5dde853
55d776c
 
 
 
 
 
5dde853
 
 
 
 
 
 
 
 
 
55d776c
 
 
 
5dde853
6a57935
 
5dde853
 
 
 
 
 
 
 
 
55d776c
 
5dde853
6a57935
5dde853
 
 
 
 
 
6a57935
 
 
5dde853
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
import os
import gradio as gr
from llama_index.core.tools import FunctionTool
from llama_index.core.agent.workflow import AgentWorkflow, ReActAgent
from llama_index.llms.litellm import LiteLLM
from code_agent import initialize_code_agent
from scientific_paper_agent import load_scientific_paper_dataset, ScientificPaperRetriever
from search_agent import init_search_tool
from tools import math_tool_func, init_image_to_text
from web_agent import initialize_web_agent

global currentMode

hf_token = os.environ.get('HF_TOKEN')

llm = LiteLLM(
    model_name="huggingface/Qwen/Qwen2.5-7B-Instruct",
    api_key=hf_token
)
image_to_text_tool = FunctionTool.from_defaults(
    fn=init_image_to_text,
    name="image_to_text_tool",
    description="Generate captions from an image URL using BLIP. Returns both conditional and unconditional captions."
)
search_tool = init_search_tool()
math_tool = FunctionTool.from_defaults(
    fn=math_tool_func,
    name="math_tool",
    description="Solving math problems using the Qwen2.5-Math-1.5B model."
)

scientific_paper_dataset = load_scientific_paper_dataset()
scientific_paper_tool = FunctionTool.from_defaults(
    fn=ScientificPaperRetriever(scientific_paper_dataset).run,
    name="scientific_paper_info_retriever",
    description="Retrieves detailed information about scientific papers."
)

# Define Agents
code_agent = initialize_code_agent()
image_to_text_agent = ReActAgent(
    name="image_to_text",
    description="Generate text captions from images",
    tools=[image_to_text_tool],
    system_prompt=(
        "You are an assistant specialized in image understanding. "
        "When given an image URL, use the image_to_text_tool to generate captions. "
        "Provide both conditional and unconditional descriptions in clear, concise language. "
        "Do not invent details beyond what the tool provides."
    ),
    llm=llm
)
math_agent = ReActAgent(
    name="math_solver",
    description="Solve math problems using a dedicated math model",
    tools=[math_tool],
    system_prompt=(
        "You are an assistant specialized in solving math problems. "
        "When given a math query, use the math_solver_tool to compute the answer. "
        "Explain the solution clearly and step by step when possible, "
        "but keep the final answer concise and accurate."
    ),
    llm=llm
)
search_web_agent = ReActAgent(
    name="search_web",
    description="Searches the web for answers",
    tools=[search_tool],
    system_prompt=(
        "You are a helpful assistant. Use DuckDuckGoSearch to look up information. "
        "Always summarize the first useful result and return it directly. "
        "Do not keep searching repeatedly."
    ),
    llm=llm
)
scientific_paper_agent = ReActAgent(
    name="scientific_paper_agent",
    description="Search scientific papers for the agent",
    tools=[scientific_paper_tool],
    system_prompt="You are a helpful assistant that can answer scientific questions based on scientific papers.",
    llm=llm
)
query_engine_agent = initialize_web_agent(llm)


# DEFINE THE WORKFLOW
multi_agent_workflow = AgentWorkflow(
    agents=[
        query_engine_agent,
        search_web_agent,
        math_agent,
        image_to_text_agent,
        scientific_paper_agent,
        code_agent
    ],
    root_agent="query_engine",
    initial_state={ "num_of_calls": 0 },
    state_prompt="Current state: {state}. User Message: {msg}"
)



async def respond(
    message,
    history: list[dict[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
    mode
):
    global currentMode
    if mode is not None:
        mode = mode
    else:
        mode =  "Conversation Mode"

    print(f"Current Mode: {mode}")

    if mode == "Math Mode":
        currentMode = "math"
    elif mode == "Conversation Mode":
        currentMode = "conversation"
    elif mode == "Image Mode":
        currentMode = "image"
    else:
        currentMode = "conversation"

    result = await multi_agent_workflow.run(message, max_tokens=max_tokens, temperature=temperature, top_p=top_p)

    return result



with gr.Blocks() as demo:
    # Dropdown placed above the chat input
    mode_dropdown = gr.Dropdown(
        choices=["Math Mode", "Conversation Mode", "Image Mode"],
        value="Conversation Mode",
        label="Interaction Mode"
    )
    # ChatInterface without additional_inputs
    chatbot = gr.ChatInterface(
        fn=respond,
        type="messages",
        additional_inputs=[mode_dropdown]
    )

    # Link dropdown value to respond function
    mode_dropdown.change(
        lambda m: m,
        inputs=mode_dropdown,
        outputs=[]
    )

if __name__ == "__main__":
    demo.launch()