Spaces:

Shiva-teja-chary
/

multi

Sleeping

App Files Files Community

Shiva-teja-chary commited on Jan 28

Commit

2aeab51

verified ·

1 Parent(s): 094512f

Create app.py

Browse files

Files changed (1) hide show

app.py +197 -0

app.py ADDED Viewed

	@@ -0,0 +1,197 @@

+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from typing import List, Optional
+from openai import OpenAI
+import os
+import json
+import re
+# ======================================================
+# NVIDIA OPENAI-COMPATIBLE CLIENT
+# ======================================================
+client = OpenAI(
+    base_url="https://integrate.api.nvidia.com/v1",
+    api_key=os.getenv("NVIDIA_API_KEY")
+)
+MODEL = "deepseek-ai/deepseek-v3.2"
+app = FastAPI()
+# ======================================================
+# HELPER: CALL LLM AND RETURN PURE JSON
+# ======================================================
+def call_llm_json(prompt: str) -> dict:
+    try:
+        completion = client.chat.completions.create(
+            model=MODEL,
+            messages=[{"role": "user", "content": prompt}],
+            temperature=0,
+            top_p=0.95,
+            max_tokens=4096,
+            extra_body={"chat_template_kwargs": {"thinking": True}},
+            stream=False
+        )
+        content = completion.choices[0].message.content
+        # 🔥 Extract ONLY JSON (ignore reasoning)
+        match = re.search(r"\{[\s\S]*\}", content)
+        if not match:
+            raise ValueError("No JSON found in LLM output")
+        return json.loads(match.group())
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+# ======================================================
+# 1️⃣ GENERATE STEPS (HIGH-LEVEL PLAN)
+# ======================================================
+class GenerateStepsRequest(BaseModel):
+    user_request: str
+class GenerateStepsResponse(BaseModel):
+    intent: str
+    restaurant: Optional[str]
+    food_item: Optional[str]
+    steps: List[str]
+@app.post("/generate_steps", response_model=GenerateStepsResponse)
+def generate_steps(req: GenerateStepsRequest):
+    prompt = f"""
+You are an intent and planning engine.
+USER REQUEST:
+{req.user_request}
+TASKS:
+1. Detect intent
+2. Extract restaurant name (if food)
+3. Extract food item (if food)
+4. Generate HIGH-LEVEL steps ONLY
+INTENT RULES:
+- food, restaurant, dish, eat, Swiggy, Zomato → order_food
+- ride, cab, bike, auto, Uber, Ola, Rapido → book_ride
+- otherwise → unknown
+STEP RULES:
+- App is ALREADY open
+- DO NOT include "open app"
+- Steps must be GENERIC (no UI clicks)
+- Max 10 steps
+- Order must be logical
+OUTPUT JSON FORMAT (ONLY JSON):
+{{
+  "intent": "order_food | book_ride | unknown",
+  "restaurant": "string | null",
+  "food_item": "string | null",
+  "steps": [
+    "step 1",
+    "step 2"
+  ]
+}}
+"""
+    return call_llm_json(prompt)
+# ======================================================
+# 2️⃣ NEXT UI STEP (SCREEN → ACTION)
+# ======================================================
+class NextUiStepRequest(BaseModel):
+    user_request: str
+    intent: str
+    restaurant: Optional[str]
+    food_item: Optional[str]
+    current_step: str
+    screen: str
+class UiStep(BaseModel):
+    type: str
+    value: Optional[str] = None
+    ms: Optional[int] = None
+class NextUiStepResponse(BaseModel):
+    done: bool
+    intent: str
+    steps: List[UiStep]
+    next: str
+@app.post("/next_ui_step", response_model=NextUiStepResponse)
+def next_ui_step(req: NextUiStepRequest):
+    prompt = f"""
+You are an Android UI automation agent.
+INPUTS:
+1) user_request – what the user wants
+2) screenshot – current app screen text
+3) current_step – current high-level step
+USER REQUEST:
+{req.user_request}
+CURRENT STEP:
+{req.current_step}
+SCREEN TEXT:
+{req.screen}
+KNOWN CONTEXT:
+- intent: {req.intent}
+- restaurant: {req.restaurant}
+- food_item: {req.food_item}
+GOAL:
+Return the NEXT UI step as JSON.
+================ INTENT =================
+Use provided intent ONLY.
+================ GENERAL RULES =================
+- Return ONLY valid JSON
+- NO explanation
+- Think ONE screen only
+================ FOOD RULES =================
+1) If restaurant exists → search & open
+2) After typing → ALWAYS click first result
+3) If item not visible → scroll_down
+4) Add item → click ADD / Add / + Add
+5) If cart visible → open cart → done=true
+FOOD STEPS:
+food_open_search
+food_type
+food_click_first_result
+food_open_restaurant
+food_find_item
+food_add_first
+food_open_cart
+================ WAIT =================
+- If loading → wait (1500–3000 ms)
+================ RESPONSE FORMAT =================
+{{
+  "done": false,
+  "intent": "{req.intent}",
+  "steps": [
+    {{ "type": "food_open_search" }}
+  ],
+  "next": "short hint"
+}}
+"""
+    return call_llm_json(prompt)