Spaces:

Shiva-teja-chary
/

multi

Sleeping

App Files Files Community

multi / app.py

Shiva-teja-chary

Create app.py

2aeab51 verified 3 months ago

raw

history blame contribute delete

4.55 kB

	from fastapi import FastAPI, HTTPException
	from pydantic import BaseModel
	from typing import List, Optional
	from openai import OpenAI
	import os
	import json
	import re

	# ======================================================
	# NVIDIA OPENAI-COMPATIBLE CLIENT
	# ======================================================
	client = OpenAI(
	base_url="https://integrate.api.nvidia.com/v1",
	api_key=os.getenv("NVIDIA_API_KEY")
	)

	MODEL = "deepseek-ai/deepseek-v3.2"

	app = FastAPI()


	# ======================================================
	# HELPER: CALL LLM AND RETURN PURE JSON
	# ======================================================
	def call_llm_json(prompt: str) -> dict:
	try:
	completion = client.chat.completions.create(
	model=MODEL,
	messages=[{"role": "user", "content": prompt}],
	temperature=0,
	top_p=0.95,
	max_tokens=4096,
	extra_body={"chat_template_kwargs": {"thinking": True}},
	stream=False
	)

	content = completion.choices[0].message.content

	# 🔥 Extract ONLY JSON (ignore reasoning)
	match = re.search(r"\{[\s\S]*\}", content)
	if not match:
	raise ValueError("No JSON found in LLM output")

	return json.loads(match.group())

	except Exception as e:
	raise HTTPException(status_code=500, detail=str(e))


	# ======================================================
	# 1️⃣ GENERATE STEPS (HIGH-LEVEL PLAN)
	# ======================================================
	class GenerateStepsRequest(BaseModel):
	user_request: str


	class GenerateStepsResponse(BaseModel):
	intent: str
	restaurant: Optional[str]
	food_item: Optional[str]
	steps: List[str]


	@app.post("/generate_steps", response_model=GenerateStepsResponse)
	def generate_steps(req: GenerateStepsRequest):

	prompt = f"""
	You are an intent and planning engine.

	USER REQUEST:
	{req.user_request}

	TASKS:
	1. Detect intent
	2. Extract restaurant name (if food)
	3. Extract food item (if food)
	4. Generate HIGH-LEVEL steps ONLY

	INTENT RULES:
	- food, restaurant, dish, eat, Swiggy, Zomato → order_food
	- ride, cab, bike, auto, Uber, Ola, Rapido → book_ride
	- otherwise → unknown

	STEP RULES:
	- App is ALREADY open
	- DO NOT include "open app"
	- Steps must be GENERIC (no UI clicks)
	- Max 10 steps
	- Order must be logical

	OUTPUT JSON FORMAT (ONLY JSON):
	{{
	"intent": "order_food \| book_ride \| unknown",
	"restaurant": "string \| null",
	"food_item": "string \| null",
	"steps": [
	"step 1",
	"step 2"
	]
	}}
	"""

	return call_llm_json(prompt)


	# ======================================================
	# 2️⃣ NEXT UI STEP (SCREEN → ACTION)
	# ======================================================
	class NextUiStepRequest(BaseModel):
	user_request: str
	intent: str
	restaurant: Optional[str]
	food_item: Optional[str]
	current_step: str
	screen: str


	class UiStep(BaseModel):
	type: str
	value: Optional[str] = None
	ms: Optional[int] = None


	class NextUiStepResponse(BaseModel):
	done: bool
	intent: str
	steps: List[UiStep]
	next: str


	@app.post("/next_ui_step", response_model=NextUiStepResponse)
	def next_ui_step(req: NextUiStepRequest):

	prompt = f"""
	You are an Android UI automation agent.

	INPUTS:
	1) user_request – what the user wants
	2) screenshot – current app screen text
	3) current_step – current high-level step

	USER REQUEST:
	{req.user_request}

	CURRENT STEP:
	{req.current_step}

	SCREEN TEXT:
	{req.screen}

	KNOWN CONTEXT:
	- intent: {req.intent}
	- restaurant: {req.restaurant}
	- food_item: {req.food_item}

	GOAL:
	Return the NEXT UI step as JSON.

	================ INTENT =================
	Use provided intent ONLY.

	================ GENERAL RULES =================
	- Return ONLY valid JSON
	- NO explanation
	- Think ONE screen only

	================ FOOD RULES =================
	1) If restaurant exists → search & open
	2) After typing → ALWAYS click first result
	3) If item not visible → scroll_down
	4) Add item → click ADD / Add / + Add
	5) If cart visible → open cart → done=true

	FOOD STEPS:
	food_open_search
	food_type
	food_click_first_result
	food_open_restaurant
	food_find_item
	food_add_first
	food_open_cart

	================ WAIT =================
	- If loading → wait (1500–3000 ms)

	================ RESPONSE FORMAT =================
	{{
	"done": false,
	"intent": "{req.intent}",
	"steps": [
	{{ "type": "food_open_search" }}
	],
	"next": "short hint"
	}}
	"""

	return call_llm_json(prompt)