1MR commited on
Commit
846e6fc
·
verified ·
1 Parent(s): abb55b3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +318 -0
app.py ADDED
@@ -0,0 +1,318 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # --- FastAPI imports ---
2
+ from fastapi import FastAPI, Request, Query
3
+ from fastapi.responses import JSONResponse
4
+
5
+ # Add interactive loop for user input with Ctrl+C to break
6
+ app = FastAPI()
7
+
8
+
9
+ import os
10
+ import json
11
+ from typing import TypedDict, Annotated, List, Dict, Any
12
+ from typing import Literal, Tuple
13
+ import operator
14
+ from pydantic import BaseModel
15
+ from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage, ToolMessage, AIMessage
16
+ from langchain.tools import BaseTool, StructuredTool, tool
17
+ from langgraph.graph import StateGraph, END
18
+ from langchain_mistralai import ChatMistralAI
19
+ from langchain_groq import ChatGroq
20
+ from langchain_google_genai import ChatGoogleGenerativeAI
21
+ from langgraph.checkpoint.memory import InMemorySaver
22
+ import requests
23
+ import base64
24
+ os.environ["GOOGLE_API_KEY"] = "AIzaSyD2DMFgcL0kWTQYhii8wseSHY3BRGWSebk"
25
+
26
+ def encode_image(image_path):
27
+ with open(image_path, "rb") as image_file:
28
+ return base64.b64encode(image_file.read()).decode('utf-8')
29
+
30
+ # llm_text = ChatGoogleGenerativeAI(model="gemini-2.0-flash")
31
+ llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash")
32
+ vision_llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash")
33
+ # llm = ChatGoogleGenerativeAI(model="gemini-2.5-pro")
34
+ memory = InMemorySaver()
35
+
36
+ class AgentState(TypedDict):
37
+ messages: Annotated[list[AnyMessage], operator.add]
38
+ agent_type: str
39
+ user_task: str
40
+
41
+
42
+
43
+ class OneWordOutput(BaseModel):
44
+ choice: Literal["Conversiton", "Movement"]
45
+ def decide_which_agent_to_go_node(state: AgentState) -> AgentState:
46
+ """This node does nothing but pass state to conditional routing."""
47
+ return state
48
+ def route_based_on_agent_type(state: AgentState) -> str:
49
+ """This function is only used for conditional routing."""
50
+ user_task = state.get('user_task', '')
51
+ llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash")
52
+ llm_structured = llm.with_structured_output(OneWordOutput)
53
+ decide_prompt = f"""
54
+ Your job is to decide which agent node to use based on the user task.
55
+ you have 2 options:
56
+ 1. Conversiton: Use this if the user just wants to chat, brainstorm, or discuss ideas.
57
+ 2. Movement: Use this agent for tasks that require physical movement or navigation.
58
+ """
59
+ decide_message = [
60
+ SystemMessage(content=decide_prompt),
61
+ HumanMessage(content=user_task)
62
+ ]
63
+
64
+ try:
65
+ response = llm_structured.invoke(decide_message)
66
+ agent_type = response.choice
67
+ print(f"Agent type decision: {agent_type}")
68
+ except Exception as e:
69
+ print(f"Error in agent decision: {e}")
70
+ # agent_type = "main_agent"
71
+
72
+ state['agent_type'] = agent_type
73
+ # ✅ Map model output to graph routing key
74
+ if agent_type == "Conversiton":
75
+ return "Conversiton"
76
+ elif agent_type == "Movement":
77
+ return "Movement"
78
+ def call_llm_Conversiton(state: AgentState):
79
+ messages = state['messages']
80
+ # if system_prompt_Conversiton:
81
+ # messages = [SystemMessage(content=system_prompt_Conversiton)] + messages
82
+ message = llm.invoke(messages)
83
+ return {"messages": [message]}
84
+
85
+ # system_prompt_Movement = """
86
+ # You are Movement agent. Your task is to assist with physical movement or navigation-related tasks. Provide clear and concise instructions to help achieve the user's goals.
87
+ # You just need to make movement plan that follow the user in the room.
88
+ # you will be provided with image and what objects you will follow in the image.
89
+ # You have 4 wheels that you can control (Front_Right(FR), Front_Left(FL), Back_Right(BR), back_Left(BL)).
90
+ # the speed of each wheel can be set from 0 to 10.
91
+ # the direction of each wheel can be set to Forward, Backward, or Stop.
92
+ # You will generate a movement plan in json format based on the image and the object you will follow.
93
+ # Make the movement plan same as real world movement of cars.
94
+ # the json format of the movment is like this that you will generate based on the image.
95
+ # Here are some examples of movement plans you can generate based on different scenarios:
96
+ # Movement plan example:
97
+ # ````
98
+ # {
99
+ # "direction": "forward",
100
+ # "4wheels": {
101
+ # "FR": {"speed": 10, "Direction": "Forward"},
102
+ # "FL": {"speed": 10, "Direction": "Forward"},
103
+ # "BR": {"speed": 10, "Direction": "Forward"},
104
+ # "BL": {"speed": 10, "Direction": "Forward"}
105
+ # }
106
+ # }
107
+ # ````
108
+ # ````
109
+ # {
110
+ # "direction": "backward",
111
+ # "4wheels": {
112
+ # "FR": {"speed": 10, "Direction": "Backward"},
113
+ # "FL": {"speed": 10, "Direction": "Backward"},
114
+ # "BR": {"speed": 10, "Direction": "Backward"},
115
+ # "BL": {"speed": 10, "Direction": "Backward"}
116
+ # }
117
+ # }
118
+ # ````
119
+ # ````
120
+ # {
121
+ # "direction": "left",
122
+ # "4wheels": {
123
+ # "FR": {"speed": 10, "Direction": "Forward"},
124
+ # "FL": {"speed": 5, "Direction": "Forward"},
125
+ # "BR": {"speed": 10, "Direction": "Forward"},
126
+ # "BL": {"speed": 5, "Direction": "Forward"}
127
+ # }
128
+ # }
129
+ # ````
130
+ # ````
131
+ # {
132
+ # "direction": "right",
133
+ # "4wheels": {
134
+ # "FR": {"speed": 5, "Direction": "Forward"},
135
+ # "FL": {"speed": 10, "Direction": "Forward"},
136
+ # "BR": {"speed": 5, "Direction": "Forward"},
137
+ # "BL": {"speed": 10, "Direction": "Forward"}
138
+ # }
139
+ # }
140
+ # ````
141
+ # ````
142
+
143
+ # "direction": "forward_left_diagonal",
144
+ # "4wheels": {
145
+ # "FR": {"speed": 0, "Direction": "Stop"},
146
+ # "FL": {"speed": 10, "Direction": "Forward"},
147
+ # "BR": {"speed": 10, "Direction": "Forward"},
148
+ # "BL": {"speed": 0, "Direction": "Stop"}
149
+ # }
150
+ # }
151
+ # ````
152
+ # """
153
+
154
+ system_prompt_Movement = """
155
+ You are Movement agent. Your task is to assist with physical movement or navigation-related tasks.
156
+ You must output ONLY valid JSON (without markdown, without ```json, without explanations).
157
+
158
+ Rules:
159
+ - Do not include extra text or explanations.
160
+ - Do not wrap the JSON inside code blocks.
161
+ - Output pure JSON only.
162
+
163
+ Here are valid examples:
164
+
165
+ {
166
+ "direction": "forward",
167
+ "4wheels": {
168
+ "FR": {"speed": 10, "Direction": "Forward"},
169
+ "FL": {"speed": 10, "Direction": "Forward"},
170
+ "BR": {"speed": 10, "Direction": "Forward"},
171
+ "BL": {"speed": 10, "Direction": "Forward"}
172
+ }
173
+ }
174
+
175
+ {
176
+ "direction": "left",
177
+ "4wheels": {
178
+ "FR": {"speed": 10, "Direction": "Forward"},
179
+ "FL": {"speed": 5, "Direction": "Forward"},
180
+ "BR": {"speed": 10, "Direction": "Forward"},
181
+ "BL": {"speed": 5, "Direction": "Forward"}
182
+ }
183
+ }
184
+ """
185
+
186
+ def take_image_and_object():
187
+ url = "http://192.168.1.14:8080/photo.jpg"
188
+ r = requests.get(url)
189
+
190
+ with open("Taken_image.jpg", "wb") as f:
191
+ f.write(r.content)
192
+
193
+ def call_llm_Movement(state: AgentState):
194
+ # take_image_and_object()
195
+ file_path = "Taken_image.jpg"
196
+ base64_image = encode_image(file_path)
197
+ user_task = state.get('user_task', '')
198
+ messages = [
199
+ {"role": "system", "content": system_prompt_Movement},
200
+ {
201
+ "role": "user",
202
+ "content": [
203
+ {"type": "text", "text": user_task},
204
+ {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}},
205
+ ],
206
+ }
207
+ ]
208
+ message = vision_llm.invoke(messages)
209
+ return {"messages": [message]}
210
+
211
+
212
+ graph = StateGraph(AgentState)
213
+
214
+ graph.set_entry_point('decide_agent')
215
+ graph.add_node('Conversiton', call_llm_Conversiton)
216
+ graph.add_node('Movement', call_llm_Movement)
217
+ graph.add_node('decide_agent', decide_which_agent_to_go_node)
218
+ graph.add_conditional_edges(
219
+ 'decide_agent',
220
+ route_based_on_agent_type,
221
+ {
222
+ 'Conversiton': 'Conversiton',
223
+ 'Movement': 'Movement'
224
+ }
225
+ )
226
+ graph.add_edge('Conversiton', END)
227
+ graph.add_edge('Movement', END)
228
+ compiled_graph = graph.compile(checkpointer=memory)
229
+ compiled_graph.get_graph().draw_mermaid_png(output_file_path=r"Newgraph.png")
230
+
231
+
232
+
233
+ def query_agent_with_planning(message: str, thread_id: str = "default") -> str:
234
+ """
235
+ Run the compiled agent graph with the given user message.
236
+ Handles both Conversiton and Movement flows.
237
+ """
238
+ print(f"\n🎯 TASK RECEIVED: {message}")
239
+ print("=" * 50)
240
+
241
+ # Initial state for the graph
242
+ initial_state = {
243
+ "messages": [HumanMessage(content=message)],
244
+ "user_task": message, # Save user input to state['user_task']
245
+ "agent_type": "",
246
+ }
247
+
248
+ config = {
249
+ "configurable": {"thread_id": thread_id},
250
+ "recursion_limit": 100
251
+ }
252
+
253
+ final_response = ""
254
+
255
+ try:
256
+ print("📋 RUNNING AGENT GRAPH...")
257
+ printed_messages = set()
258
+ for event in compiled_graph.stream(initial_state, config):
259
+ for node_name, node_output in event.items():
260
+ print(f"\n🔄 Executing Node: {node_name}")
261
+ if "messages" in node_output:
262
+ for msg in node_output["messages"]:
263
+ if hasattr(msg, "content") and msg.content not in printed_messages:
264
+ # Try to parse msg.content as JSON
265
+ try:
266
+ json_obj = json.loads(msg.content)
267
+ print(json.dumps(json_obj, indent=2))
268
+ final_response += json.dumps(json_obj) + "\n"
269
+ except Exception:
270
+ print(f"📝 {msg.content}")
271
+ final_response += msg.content + "\n"
272
+ printed_messages.add(msg.content)
273
+
274
+ # Show agent type decision
275
+ if "agent_type" in node_output and node_output["agent_type"]:
276
+ print(f"🤖 Agent Selected: {node_output['agent_type']}")
277
+
278
+ except Exception as e:
279
+ error_msg = f"❌ Execution Error: {str(e)}"
280
+ print(error_msg)
281
+ final_response += error_msg
282
+
283
+ return final_response.strip()
284
+
285
+
286
+
287
+
288
+ # Accept user input as a query parameter (GET or POST)
289
+
290
+ import re
291
+ import asyncio
292
+
293
+
294
+ def extract_json_from_response(response: str):
295
+ # Try to find the first JSON object in the response string
296
+ match = re.search(r'(\{[\s\S]*\})', response)
297
+ if match:
298
+ try:
299
+ return json.loads(match.group(1))
300
+ except Exception:
301
+ return None
302
+ return None
303
+
304
+ @app.get("/ask")
305
+ async def ask(user_input: str = Query(...)):
306
+ if not user_input.strip():
307
+ return JSONResponse(content={"error": "user_input is required"}, status_code=400)
308
+
309
+ loop = asyncio.get_event_loop()
310
+ # response = await loop.run_in_executor(None, query_agent_with_planning, user_input)
311
+ try:
312
+ response = await loop.run_in_executor(None, query_agent_with_planning, user_input)
313
+ except asyncio.CancelledError:
314
+ return JSONResponse(content={"error": "Request was cancelled"}, status_code=499)
315
+ json_obj = extract_json_from_response(response)
316
+ if json_obj:
317
+ return JSONResponse(content=json_obj)
318
+ return JSONResponse(content={"error": "No valid JSON found", "raw": response}, status_code=422)