Spaces:

1MR
/

ROBO

Sleeping

App Files Files Community

1MR commited on Sep 15, 2025

Commit

846e6fc

verified ·

1 Parent(s): abb55b3

Create app.py

Browse files

Files changed (1) hide show

app.py +318 -0

app.py ADDED Viewed

	@@ -0,0 +1,318 @@

+# --- FastAPI imports ---
+from fastapi import FastAPI, Request, Query
+from fastapi.responses import JSONResponse
+# Add interactive loop for user input with Ctrl+C to break
+app = FastAPI()
+import os
+import json
+from typing import TypedDict, Annotated, List, Dict, Any
+from typing import Literal, Tuple
+import operator
+from pydantic import BaseModel
+from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage, ToolMessage, AIMessage
+from langchain.tools import BaseTool, StructuredTool, tool
+from langgraph.graph import StateGraph, END
+from langchain_mistralai import ChatMistralAI
+from langchain_groq import ChatGroq
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langgraph.checkpoint.memory import InMemorySaver
+import requests
+import base64
+os.environ["GOOGLE_API_KEY"] = "AIzaSyD2DMFgcL0kWTQYhii8wseSHY3BRGWSebk"
+def encode_image(image_path):
+  with open(image_path, "rb") as image_file:
+    return base64.b64encode(image_file.read()).decode('utf-8')
+# llm_text = ChatGoogleGenerativeAI(model="gemini-2.0-flash")
+llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash")
+vision_llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash")
+# llm = ChatGoogleGenerativeAI(model="gemini-2.5-pro")
+memory = InMemorySaver()
+class AgentState(TypedDict):
+    messages: Annotated[list[AnyMessage], operator.add]
+    agent_type: str
+    user_task: str
+class OneWordOutput(BaseModel):
+    choice: Literal["Conversiton", "Movement"]
+def decide_which_agent_to_go_node(state: AgentState) -> AgentState:
+    """This node does nothing but pass state to conditional routing."""
+    return state
+def route_based_on_agent_type(state: AgentState) -> str:
+    """This function is only used for conditional routing."""
+    user_task = state.get('user_task', '')
+    llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash")
+    llm_structured = llm.with_structured_output(OneWordOutput)
+    decide_prompt = f"""
+    Your job is to decide which agent node to use based on the user task.
+    you have 2 options:
+    1. Conversiton: Use this if the user just wants to chat, brainstorm, or discuss ideas.
+    2. Movement: Use this agent for tasks that require physical movement or navigation.
+    """
+    decide_message = [
+        SystemMessage(content=decide_prompt),
+        HumanMessage(content=user_task)
+        ]
+    try:
+        response = llm_structured.invoke(decide_message)
+        agent_type = response.choice
+        print(f"Agent type decision: {agent_type}")
+    except Exception as e:
+        print(f"Error in agent decision: {e}")
+        # agent_type = "main_agent"
+    state['agent_type'] = agent_type
+    # ✅ Map model output to graph routing key
+    if agent_type == "Conversiton":
+        return "Conversiton"
+    elif agent_type == "Movement":
+        return "Movement"
+def call_llm_Conversiton(state: AgentState):
+    messages = state['messages']
+    # if system_prompt_Conversiton:
+    #     messages = [SystemMessage(content=system_prompt_Conversiton)] + messages
+    message = llm.invoke(messages)
+    return {"messages": [message]}
+# system_prompt_Movement = """
+# You are Movement agent. Your task is to assist with physical movement or navigation-related tasks. Provide clear and concise instructions to help achieve the user's goals.
+# You just need to make movement plan that follow the user in the room.
+# you will be provided with image and what objects you will follow in the image.
+# You have 4 wheels that you can control (Front_Right(FR), Front_Left(FL), Back_Right(BR), back_Left(BL)).
+# the speed of each wheel can be set from 0 to 10.
+# the direction of each wheel can be set to Forward, Backward, or Stop.
+# You will generate a movement plan in json format based on the image and the object you will follow.
+# Make the movement plan same as real world movement of cars.
+# the json format of the movment is like this that you will generate based on the image.
+# Here are some examples of movement plans you can generate based on different scenarios:
+# Movement plan example:
+# ````
+# {
+#   "direction": "forward",
+#   "4wheels": {
+#     "FR": {"speed": 10, "Direction": "Forward"},
+#     "FL": {"speed": 10, "Direction": "Forward"},
+#     "BR": {"speed": 10, "Direction": "Forward"},
+#     "BL": {"speed": 10, "Direction": "Forward"}
+#   }
+# }
+# ````
+# ````
+# {
+#   "direction": "backward",
+#   "4wheels": {
+#     "FR": {"speed": 10, "Direction": "Backward"},
+#     "FL": {"speed": 10, "Direction": "Backward"},
+#     "BR": {"speed": 10, "Direction": "Backward"},
+#     "BL": {"speed": 10, "Direction": "Backward"}
+#   }
+# }
+# ````
+# ````
+# {
+#   "direction": "left",
+#   "4wheels": {
+#     "FR": {"speed": 10, "Direction": "Forward"},
+#     "FL": {"speed": 5, "Direction": "Forward"},
+#     "BR": {"speed": 10, "Direction": "Forward"},
+#     "BL": {"speed": 5, "Direction": "Forward"}
+#   }
+# }
+# ````
+# ````
+# {
+#   "direction": "right",
+#   "4wheels": {
+#     "FR": {"speed": 5, "Direction": "Forward"},
+#     "FL": {"speed": 10, "Direction": "Forward"},
+#     "BR": {"speed": 5, "Direction": "Forward"},
+#     "BL": {"speed": 10, "Direction": "Forward"}
+#   }
+# }
+# ````
+# ````
+#   "direction": "forward_left_diagonal",
+#   "4wheels": {
+#     "FR": {"speed": 0, "Direction": "Stop"},
+#     "FL": {"speed": 10, "Direction": "Forward"},
+#     "BR": {"speed": 10, "Direction": "Forward"},
+#     "BL": {"speed": 0, "Direction": "Stop"}
+#   }
+# }
+# ````
+# """
+system_prompt_Movement = """
+You are Movement agent. Your task is to assist with physical movement or navigation-related tasks.
+You must output ONLY valid JSON (without markdown, without ```json, without explanations).
+Rules:
+- Do not include extra text or explanations.
+- Do not wrap the JSON inside code blocks.
+- Output pure JSON only.
+Here are valid examples:
+{
+  "direction": "forward",
+  "4wheels": {
+    "FR": {"speed": 10, "Direction": "Forward"},
+    "FL": {"speed": 10, "Direction": "Forward"},
+    "BR": {"speed": 10, "Direction": "Forward"},
+    "BL": {"speed": 10, "Direction": "Forward"}
+  }
+}
+{
+  "direction": "left",
+  "4wheels": {
+    "FR": {"speed": 10, "Direction": "Forward"},
+    "FL": {"speed": 5, "Direction": "Forward"},
+    "BR": {"speed": 10, "Direction": "Forward"},
+    "BL": {"speed": 5, "Direction": "Forward"}
+  }
+}
+"""
+def take_image_and_object():
+    url = "http://192.168.1.14:8080/photo.jpg"
+    r = requests.get(url)
+    with open("Taken_image.jpg", "wb") as f:
+        f.write(r.content)
+def call_llm_Movement(state: AgentState):
+    # take_image_and_object()
+    file_path = "Taken_image.jpg"
+    base64_image = encode_image(file_path)
+    user_task = state.get('user_task', '')
+    messages = [
+        {"role": "system", "content": system_prompt_Movement},
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": user_task},
+                {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}},
+            ],
+        }
+    ]
+    message = vision_llm.invoke(messages)
+    return {"messages": [message]}
+graph = StateGraph(AgentState)
+graph.set_entry_point('decide_agent')
+graph.add_node('Conversiton', call_llm_Conversiton)
+graph.add_node('Movement', call_llm_Movement)
+graph.add_node('decide_agent', decide_which_agent_to_go_node)
+graph.add_conditional_edges(
+    'decide_agent',
+    route_based_on_agent_type,
+    {
+        'Conversiton': 'Conversiton',
+        'Movement': 'Movement'
+    }
+)
+graph.add_edge('Conversiton', END)
+graph.add_edge('Movement', END)
+compiled_graph = graph.compile(checkpointer=memory)
+compiled_graph.get_graph().draw_mermaid_png(output_file_path=r"Newgraph.png")
+def query_agent_with_planning(message: str, thread_id: str = "default") -> str:
+    """
+    Run the compiled agent graph with the given user message.
+    Handles both Conversiton and Movement flows.
+    """
+    print(f"\n🎯 TASK RECEIVED: {message}")
+    print("=" * 50)
+    # Initial state for the graph
+    initial_state = {
+        "messages": [HumanMessage(content=message)],
+        "user_task": message,  # Save user input to state['user_task']
+        "agent_type": "",
+    }
+    config = {
+        "configurable": {"thread_id": thread_id},
+        "recursion_limit": 100
+    }
+    final_response = ""
+    try:
+        print("📋 RUNNING AGENT GRAPH...")
+        printed_messages = set()
+        for event in compiled_graph.stream(initial_state, config):
+            for node_name, node_output in event.items():
+                print(f"\n🔄 Executing Node: {node_name}")
+                if "messages" in node_output:
+                    for msg in node_output["messages"]:
+                        if hasattr(msg, "content") and msg.content not in printed_messages:
+                            # Try to parse msg.content as JSON
+                            try:
+                                json_obj = json.loads(msg.content)
+                                print(json.dumps(json_obj, indent=2))
+                                final_response += json.dumps(json_obj) + "\n"
+                            except Exception:
+                                print(f"📝 {msg.content}")
+                                final_response += msg.content + "\n"
+                            printed_messages.add(msg.content)
+                # Show agent type decision
+                if "agent_type" in node_output and node_output["agent_type"]:
+                    print(f"🤖 Agent Selected: {node_output['agent_type']}")
+    except Exception as e:
+        error_msg = f"❌ Execution Error: {str(e)}"
+        print(error_msg)
+        final_response += error_msg
+    return final_response.strip()
+# Accept user input as a query parameter (GET or POST)
+import re
+import asyncio
+def extract_json_from_response(response: str):
+    # Try to find the first JSON object in the response string
+    match = re.search(r'(\{[\s\S]*\})', response)
+    if match:
+        try:
+            return json.loads(match.group(1))
+        except Exception:
+            return None
+    return None
+@app.get("/ask")
+async def ask(user_input: str = Query(...)):
+    if not user_input.strip():
+        return JSONResponse(content={"error": "user_input is required"}, status_code=400)
+    loop = asyncio.get_event_loop()
+    # response = await loop.run_in_executor(None, query_agent_with_planning, user_input)
+    try:
+        response = await loop.run_in_executor(None, query_agent_with_planning, user_input)
+    except asyncio.CancelledError:
+        return JSONResponse(content={"error": "Request was cancelled"}, status_code=499)
+    json_obj = extract_json_from_response(response)
+    if json_obj:
+        return JSONResponse(content=json_obj)
+    return JSONResponse(content={"error": "No valid JSON found", "raw": response}, status_code=422)