Shiva-teja-chary commited on
Commit
2aeab51
Β·
verified Β·
1 Parent(s): 094512f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +197 -0
app.py ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from pydantic import BaseModel
3
+ from typing import List, Optional
4
+ from openai import OpenAI
5
+ import os
6
+ import json
7
+ import re
8
+
9
+ # ======================================================
10
+ # NVIDIA OPENAI-COMPATIBLE CLIENT
11
+ # ======================================================
12
+ client = OpenAI(
13
+ base_url="https://integrate.api.nvidia.com/v1",
14
+ api_key=os.getenv("NVIDIA_API_KEY")
15
+ )
16
+
17
+ MODEL = "deepseek-ai/deepseek-v3.2"
18
+
19
+ app = FastAPI()
20
+
21
+
22
+ # ======================================================
23
+ # HELPER: CALL LLM AND RETURN PURE JSON
24
+ # ======================================================
25
+ def call_llm_json(prompt: str) -> dict:
26
+ try:
27
+ completion = client.chat.completions.create(
28
+ model=MODEL,
29
+ messages=[{"role": "user", "content": prompt}],
30
+ temperature=0,
31
+ top_p=0.95,
32
+ max_tokens=4096,
33
+ extra_body={"chat_template_kwargs": {"thinking": True}},
34
+ stream=False
35
+ )
36
+
37
+ content = completion.choices[0].message.content
38
+
39
+ # πŸ”₯ Extract ONLY JSON (ignore reasoning)
40
+ match = re.search(r"\{[\s\S]*\}", content)
41
+ if not match:
42
+ raise ValueError("No JSON found in LLM output")
43
+
44
+ return json.loads(match.group())
45
+
46
+ except Exception as e:
47
+ raise HTTPException(status_code=500, detail=str(e))
48
+
49
+
50
+ # ======================================================
51
+ # 1️⃣ GENERATE STEPS (HIGH-LEVEL PLAN)
52
+ # ======================================================
53
+ class GenerateStepsRequest(BaseModel):
54
+ user_request: str
55
+
56
+
57
+ class GenerateStepsResponse(BaseModel):
58
+ intent: str
59
+ restaurant: Optional[str]
60
+ food_item: Optional[str]
61
+ steps: List[str]
62
+
63
+
64
+ @app.post("/generate_steps", response_model=GenerateStepsResponse)
65
+ def generate_steps(req: GenerateStepsRequest):
66
+
67
+ prompt = f"""
68
+ You are an intent and planning engine.
69
+
70
+ USER REQUEST:
71
+ {req.user_request}
72
+
73
+ TASKS:
74
+ 1. Detect intent
75
+ 2. Extract restaurant name (if food)
76
+ 3. Extract food item (if food)
77
+ 4. Generate HIGH-LEVEL steps ONLY
78
+
79
+ INTENT RULES:
80
+ - food, restaurant, dish, eat, Swiggy, Zomato β†’ order_food
81
+ - ride, cab, bike, auto, Uber, Ola, Rapido β†’ book_ride
82
+ - otherwise β†’ unknown
83
+
84
+ STEP RULES:
85
+ - App is ALREADY open
86
+ - DO NOT include "open app"
87
+ - Steps must be GENERIC (no UI clicks)
88
+ - Max 10 steps
89
+ - Order must be logical
90
+
91
+ OUTPUT JSON FORMAT (ONLY JSON):
92
+ {{
93
+ "intent": "order_food | book_ride | unknown",
94
+ "restaurant": "string | null",
95
+ "food_item": "string | null",
96
+ "steps": [
97
+ "step 1",
98
+ "step 2"
99
+ ]
100
+ }}
101
+ """
102
+
103
+ return call_llm_json(prompt)
104
+
105
+
106
+ # ======================================================
107
+ # 2️⃣ NEXT UI STEP (SCREEN β†’ ACTION)
108
+ # ======================================================
109
+ class NextUiStepRequest(BaseModel):
110
+ user_request: str
111
+ intent: str
112
+ restaurant: Optional[str]
113
+ food_item: Optional[str]
114
+ current_step: str
115
+ screen: str
116
+
117
+
118
+ class UiStep(BaseModel):
119
+ type: str
120
+ value: Optional[str] = None
121
+ ms: Optional[int] = None
122
+
123
+
124
+ class NextUiStepResponse(BaseModel):
125
+ done: bool
126
+ intent: str
127
+ steps: List[UiStep]
128
+ next: str
129
+
130
+
131
+ @app.post("/next_ui_step", response_model=NextUiStepResponse)
132
+ def next_ui_step(req: NextUiStepRequest):
133
+
134
+ prompt = f"""
135
+ You are an Android UI automation agent.
136
+
137
+ INPUTS:
138
+ 1) user_request – what the user wants
139
+ 2) screenshot – current app screen text
140
+ 3) current_step – current high-level step
141
+
142
+ USER REQUEST:
143
+ {req.user_request}
144
+
145
+ CURRENT STEP:
146
+ {req.current_step}
147
+
148
+ SCREEN TEXT:
149
+ {req.screen}
150
+
151
+ KNOWN CONTEXT:
152
+ - intent: {req.intent}
153
+ - restaurant: {req.restaurant}
154
+ - food_item: {req.food_item}
155
+
156
+ GOAL:
157
+ Return the NEXT UI step as JSON.
158
+
159
+ ================ INTENT =================
160
+ Use provided intent ONLY.
161
+
162
+ ================ GENERAL RULES =================
163
+ - Return ONLY valid JSON
164
+ - NO explanation
165
+ - Think ONE screen only
166
+
167
+ ================ FOOD RULES =================
168
+ 1) If restaurant exists β†’ search & open
169
+ 2) After typing β†’ ALWAYS click first result
170
+ 3) If item not visible β†’ scroll_down
171
+ 4) Add item β†’ click ADD / Add / + Add
172
+ 5) If cart visible β†’ open cart β†’ done=true
173
+
174
+ FOOD STEPS:
175
+ food_open_search
176
+ food_type
177
+ food_click_first_result
178
+ food_open_restaurant
179
+ food_find_item
180
+ food_add_first
181
+ food_open_cart
182
+
183
+ ================ WAIT =================
184
+ - If loading β†’ wait (1500–3000 ms)
185
+
186
+ ================ RESPONSE FORMAT =================
187
+ {{
188
+ "done": false,
189
+ "intent": "{req.intent}",
190
+ "steps": [
191
+ {{ "type": "food_open_search" }}
192
+ ],
193
+ "next": "short hint"
194
+ }}
195
+ """
196
+
197
+ return call_llm_json(prompt)