multi / app.py
Shiva-teja-chary's picture
Create app.py
2aeab51 verified
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import List, Optional
from openai import OpenAI
import os
import json
import re
# ======================================================
# NVIDIA OPENAI-COMPATIBLE CLIENT
# ======================================================
client = OpenAI(
base_url="https://integrate.api.nvidia.com/v1",
api_key=os.getenv("NVIDIA_API_KEY")
)
MODEL = "deepseek-ai/deepseek-v3.2"
app = FastAPI()
# ======================================================
# HELPER: CALL LLM AND RETURN PURE JSON
# ======================================================
def call_llm_json(prompt: str) -> dict:
try:
completion = client.chat.completions.create(
model=MODEL,
messages=[{"role": "user", "content": prompt}],
temperature=0,
top_p=0.95,
max_tokens=4096,
extra_body={"chat_template_kwargs": {"thinking": True}},
stream=False
)
content = completion.choices[0].message.content
# πŸ”₯ Extract ONLY JSON (ignore reasoning)
match = re.search(r"\{[\s\S]*\}", content)
if not match:
raise ValueError("No JSON found in LLM output")
return json.loads(match.group())
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
# ======================================================
# 1️⃣ GENERATE STEPS (HIGH-LEVEL PLAN)
# ======================================================
class GenerateStepsRequest(BaseModel):
user_request: str
class GenerateStepsResponse(BaseModel):
intent: str
restaurant: Optional[str]
food_item: Optional[str]
steps: List[str]
@app.post("/generate_steps", response_model=GenerateStepsResponse)
def generate_steps(req: GenerateStepsRequest):
prompt = f"""
You are an intent and planning engine.
USER REQUEST:
{req.user_request}
TASKS:
1. Detect intent
2. Extract restaurant name (if food)
3. Extract food item (if food)
4. Generate HIGH-LEVEL steps ONLY
INTENT RULES:
- food, restaurant, dish, eat, Swiggy, Zomato β†’ order_food
- ride, cab, bike, auto, Uber, Ola, Rapido β†’ book_ride
- otherwise β†’ unknown
STEP RULES:
- App is ALREADY open
- DO NOT include "open app"
- Steps must be GENERIC (no UI clicks)
- Max 10 steps
- Order must be logical
OUTPUT JSON FORMAT (ONLY JSON):
{{
"intent": "order_food | book_ride | unknown",
"restaurant": "string | null",
"food_item": "string | null",
"steps": [
"step 1",
"step 2"
]
}}
"""
return call_llm_json(prompt)
# ======================================================
# 2️⃣ NEXT UI STEP (SCREEN β†’ ACTION)
# ======================================================
class NextUiStepRequest(BaseModel):
user_request: str
intent: str
restaurant: Optional[str]
food_item: Optional[str]
current_step: str
screen: str
class UiStep(BaseModel):
type: str
value: Optional[str] = None
ms: Optional[int] = None
class NextUiStepResponse(BaseModel):
done: bool
intent: str
steps: List[UiStep]
next: str
@app.post("/next_ui_step", response_model=NextUiStepResponse)
def next_ui_step(req: NextUiStepRequest):
prompt = f"""
You are an Android UI automation agent.
INPUTS:
1) user_request – what the user wants
2) screenshot – current app screen text
3) current_step – current high-level step
USER REQUEST:
{req.user_request}
CURRENT STEP:
{req.current_step}
SCREEN TEXT:
{req.screen}
KNOWN CONTEXT:
- intent: {req.intent}
- restaurant: {req.restaurant}
- food_item: {req.food_item}
GOAL:
Return the NEXT UI step as JSON.
================ INTENT =================
Use provided intent ONLY.
================ GENERAL RULES =================
- Return ONLY valid JSON
- NO explanation
- Think ONE screen only
================ FOOD RULES =================
1) If restaurant exists β†’ search & open
2) After typing β†’ ALWAYS click first result
3) If item not visible β†’ scroll_down
4) Add item β†’ click ADD / Add / + Add
5) If cart visible β†’ open cart β†’ done=true
FOOD STEPS:
food_open_search
food_type
food_click_first_result
food_open_restaurant
food_find_item
food_add_first
food_open_cart
================ WAIT =================
- If loading β†’ wait (1500–3000 ms)
================ RESPONSE FORMAT =================
{{
"done": false,
"intent": "{req.intent}",
"steps": [
{{ "type": "food_open_search" }}
],
"next": "short hint"
}}
"""
return call_llm_json(prompt)