igiuseppe commited on
Commit
e79e607
·
1 Parent(s): 633edb2

added ux_testing api

Browse files
Files changed (6) hide show
  1. .gitignore +3 -1
  2. app.py +33 -1
  3. core.py +110 -6
  4. prompts.py +89 -22
  5. schemas.py +65 -4
  6. utils.py +14 -4
.gitignore CHANGED
@@ -7,4 +7,6 @@ eval/__pycache__/
7
  eval/synthetic/
8
  test_lite_llm.py
9
  test_gemini.py
10
- test/
 
 
 
7
  eval/synthetic/
8
  test_lite_llm.py
9
  test_gemini.py
10
+ test/
11
+ ux_test.py
12
+ images.json
app.py CHANGED
@@ -10,7 +10,8 @@ from schemas import (
10
  GenerateParametersRequest, GenerateParametersResponse,
11
  GenerateTwinRequest, GenerateTwinResponse,
12
  ChatWithTwinRequest, ChatWithTwinResponse,
13
- GenerateUsersAnswersRequest
 
14
  )
15
  import uvicorn
16
  import logging
@@ -229,4 +230,35 @@ def chat_with_twin_endpoint(request: ChatWithTwinRequest, x_api_key: str = Heade
229
  # Return the results in the format defined by ChatWithTwinResponse
230
  return ChatWithTwinResponse(answer=answer, new_memory=new_memory)
231
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
  if __name__ == "__main__": uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=False)
 
10
  GenerateParametersRequest, GenerateParametersResponse,
11
  GenerateTwinRequest, GenerateTwinResponse,
12
  ChatWithTwinRequest, ChatWithTwinResponse,
13
+ GenerateUsersAnswersRequest,
14
+ UXTestingRequest, UXTestingResponse
15
  )
16
  import uvicorn
17
  import logging
 
230
  # Return the results in the format defined by ChatWithTwinResponse
231
  return ChatWithTwinResponse(answer=answer, new_memory=new_memory)
232
 
233
+ # Endpoint for UX Testing with Images
234
+ @app.post("/ux-testing",
235
+ response_model=UXTestingResponse,
236
+ summary="Conduct UX Testing with Finite State Machine",
237
+ description="Conduct interactive UX testing with a persona using images. The persona will choose an action to take, provide reasoning, and indicate if the task is complete."
238
+ )
239
+ def ux_testing_endpoint(request: UXTestingRequest, x_api_key: str = Header(...)):
240
+ if x_api_key != API_KEY:
241
+ logger.warning("Unauthorized access attempt to /ux-testing.")
242
+ raise HTTPException(status_code=403, detail="Invalid API Key")
243
+
244
+ # Import the core function
245
+ from core import ux_testing_fsm
246
+
247
+ logger.info(f"Starting UX FSM testing with persona: {request.user.get('Name', 'Unknown')}")
248
+ logger.info(f"Task: {request.task}")
249
+ logger.info(f"Available actions: {request.available_actions}")
250
+
251
+ # Call the core function
252
+ result = ux_testing_fsm(
253
+ persona=request.user,
254
+ task=request.task,
255
+ image=request.image,
256
+ available_actions=request.available_actions,
257
+ session_history=request.session_history
258
+ )
259
+
260
+ logger.info("UX FSM testing completed successfully.")
261
+ # Return the results in the format defined by UXTestingResponse
262
+ return UXTestingResponse(**result)
263
+
264
  if __name__ == "__main__": uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=False)
core.py CHANGED
@@ -12,6 +12,7 @@ from prompts import (
12
  GENERATE_REPORT_PROMPT,
13
  CHAT_WITH_REPORT_PROMPT,
14
  GENERATE_AUDIENCE_NAME_PROMPT,
 
15
  persona_schema,
16
  answers_schema
17
  )
@@ -318,17 +319,120 @@ def chat_with_report(users: List[dict], question: str, questions: List[str]) ->
318
 
319
  def generate_audience_name(audience: str, scope: str) -> str:
320
  """
321
- Generate a concise, descriptive name for the audience based on the research scope.
322
 
323
  Args:
324
- audience: The target audience description
325
- scope: The research scope
326
-
327
  Returns:
328
- A concise, descriptive name for the audience
329
  """
330
  prompt = GENERATE_AUDIENCE_NAME_PROMPT.format(
331
  audience=audience,
332
  scope=scope
333
  )
334
- return call_llm(prompt=prompt,model_type="low",temperature=0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  GENERATE_REPORT_PROMPT,
13
  CHAT_WITH_REPORT_PROMPT,
14
  GENERATE_AUDIENCE_NAME_PROMPT,
15
+ UX_FSM_SIMPLE_PROMPT,
16
  persona_schema,
17
  answers_schema
18
  )
 
319
 
320
  def generate_audience_name(audience: str, scope: str) -> str:
321
  """
322
+ Generate a concise audience name based on the provided audience description and scope.
323
 
324
  Args:
325
+ audience: Detailed audience description
326
+ scope: Research scope
327
+
328
  Returns:
329
+ String containing a concise audience name
330
  """
331
  prompt = GENERATE_AUDIENCE_NAME_PROMPT.format(
332
  audience=audience,
333
  scope=scope
334
  )
335
+
336
+ audience_name = call_llm(prompt=prompt, temperature=0, model_type="low")
337
+ return audience_name.strip()
338
+
339
+ def ux_testing_fsm(persona: dict, task: str, image: str, available_actions: list, session_history: list = None) -> dict:
340
+ """
341
+ Conduct simple FSM-based UX testing with a persona.
342
+
343
+ Args:
344
+ persona: User persona to conduct testing with
345
+ task: The task the persona needs to accomplish
346
+ image: URL of the current interface image
347
+ available_actions: List of available actions in current state
348
+ session_history: List of previous steps in this session
349
+
350
+ Returns:
351
+ Dictionary with action_taken, thought, task_finished, and task_difficulty
352
+ """
353
+
354
+ # Format available actions
355
+ actions_text = ", ".join(available_actions)
356
+
357
+ # Format session history
358
+ if session_history:
359
+ history_text = "Previous steps in this session:\n"
360
+ for i, step in enumerate(session_history, 1):
361
+ history_text += f"Step {i}: Action '{step.get('action_taken', 'unknown')}' - {step.get('thought', 'No thought recorded')}\n"
362
+ else:
363
+ history_text = "This is the first step of the session."
364
+
365
+ prompt = UX_FSM_SIMPLE_PROMPT.format(
366
+ persona=persona,
367
+ task=task,
368
+ available_actions=actions_text,
369
+ session_history=history_text
370
+ )
371
+
372
+ # Define response format for structured JSON
373
+ response_format = {
374
+ "type": "json_schema",
375
+ "json_schema": {
376
+ "name": "ux_testing_response",
377
+ "schema": {
378
+ "type": "object",
379
+ "properties": {
380
+ "action_taken": {
381
+ "type": "string",
382
+ "description": "The action chosen from available actions",
383
+ "enum": available_actions
384
+ },
385
+ "thought": {
386
+ "type": "string",
387
+ "description": "Reasoning for the action"
388
+ },
389
+ "task_finished": {
390
+ "type": "boolean",
391
+ "description": "Whether the task is complete"
392
+ },
393
+ "task_difficulty": {
394
+ "type": ["number", "null"],
395
+ "minimum": 1.0,
396
+ "maximum": 5.0,
397
+ "description": "Difficulty rating if task is finished"
398
+ }
399
+ },
400
+ "required": ["action_taken", "thought", "task_finished", "task_difficulty"],
401
+ "additionalProperties": False
402
+ },
403
+ "strict": True
404
+ }
405
+ }
406
+
407
+ try:
408
+ # Call LLM with the image and structured response format
409
+ response = call_llm(
410
+ prompt=prompt,
411
+ temperature=0.7,
412
+ model_type="mid",
413
+ images=[image],
414
+ response_format=response_format
415
+ )
416
+
417
+ # Parse JSON response
418
+ parsed_response = json.loads(response)["ux_testing_response"]
419
+
420
+ # Validate action is in available actions
421
+ if parsed_response.get("action_taken") not in available_actions:
422
+ logger.warning(f"Persona chose invalid action: {parsed_response.get('action_taken')}. Using first available action.")
423
+ parsed_response["action_taken"] = available_actions[0] if available_actions else "unknown"
424
+
425
+ logger.info(f"UX FSM testing completed for persona: {persona.get('Name', 'Unknown')}")
426
+ logger.info(f"Action taken: {parsed_response.get('action_taken')}")
427
+ logger.info(f"Task finished: {parsed_response.get('task_finished')}")
428
+
429
+ return parsed_response
430
+
431
+ except Exception as e:
432
+ logger.error(f"Error during UX FSM testing for persona {persona.get('Name', 'Unknown')}: {e}")
433
+ return {
434
+ "action_taken": available_actions[0] if available_actions else "unknown",
435
+ "thought": f"Error occurred during testing: {str(e)}",
436
+ "task_finished": False,
437
+ "task_difficulty": None
438
+ }
prompts.py CHANGED
@@ -528,30 +528,97 @@ def persona_schema(n):
528
  return persona_schema
529
 
530
  def answers_schema(n):
531
- answers_schema={
532
- "type": "json_schema",
533
- "json_schema": {
534
- "name": "answers_list",
535
- "schema": {
536
- "type": "object",
537
- "properties": {
538
- "answers": {
539
- "type": "array",
540
- "description": f"A list of answers to questions, with exactly {n} elements.",
541
- "items": {
 
 
 
 
 
 
 
 
 
 
 
 
 
542
  "type": "string",
543
- "description": "Each answer corresponding to a question."
544
- },
545
- "minItems": n,
546
- "maxItems": n
547
- }
 
 
 
 
 
548
  },
549
- "required": [
550
- "answers"
551
- ],
552
- "additionalProperties": False
 
553
  },
554
- "strict": True
555
  }
 
 
 
556
  }
557
- return answers_schema
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
528
  return persona_schema
529
 
530
  def answers_schema(n):
531
+ return {
532
+ "type": "object",
533
+ "properties": {
534
+ "answers": {
535
+ "type": "array",
536
+ "items": {"type": "string"},
537
+ "minItems": n,
538
+ "maxItems": n,
539
+ "description": f"Array of exactly {n} answers to the questions"
540
+ }
541
+ },
542
+ "required": ["answers"],
543
+ "additionalProperties": False
544
+ }
545
+
546
+ def ux_fsm_schema(actions):
547
+ return{
548
+ "type": "object",
549
+ "properties": {
550
+ "ux_response": {
551
+ "type": "object",
552
+ "description": "The response from the UX testing",
553
+ "properties": {
554
+ "action_taken": {
555
  "type": "string",
556
+ "description": "The action the persona decided to take",
557
+ "enum": actions
558
+ },
559
+ "thought": {
560
+ "type": "string",
561
+ "description": "The persona's reasoning for taking this action"
562
+ },
563
+ "task_finished": {
564
+ "type": "boolean",
565
+ "description": "Whether the persona believes they have completed the task"
566
  },
567
+ "task_difficulty": {
568
+ "type": "number",
569
+ "description": "The difficulty of the task, from 1 (very easy) to 5 (very difficult). If not finished, set to None.",
570
+ "enum": [1, 2, 3, 4, 5, None]
571
+ }
572
  },
 
573
  }
574
+ },
575
+ "required": ["ux_response"],
576
+ "additionalProperties": False
577
  }
578
+
579
+ # Simple FSM UX Testing Prompt
580
+ UX_FSM_SIMPLE_PROMPT = """
581
+ You are conducting a UX testing session as the detailed user persona provided below. You are looking at a specific interface screen and need to decide what action to take to accomplish your given task.
582
+
583
+ **Persona Profile:**
584
+ {persona}
585
+
586
+ **Task to Accomplish:**
587
+ {task}
588
+
589
+ **Current Screen:**
590
+ You are viewing the interface shown in the provided image.
591
+
592
+ **Available Actions:**
593
+ Choose ONE of these actions: {available_actions}
594
+
595
+ **Session History:**
596
+ {session_history}
597
+
598
+ **Your Response:**
599
+
600
+ Provide your response as a JSON object with exactly these fields:
601
+
602
+ {{
603
+ "action_taken": "your_chosen_action",
604
+ "thought": "your reasoning for this action",
605
+ "task_finished": true/false,
606
+ "task_difficulty": 1.0-5.0 (only if task_finished is true, otherwise null)
607
+ }}
608
+
609
+ **Guidelines:**
610
+ * **action_taken**: Select exactly ONE action from the available actions list
611
+ * **thought**: Explain your reasoning for choosing this action. Why does this action make sense for accomplishing your task? What do you expect to happen? Consider your previous actions from the session history.
612
+ * **task_finished**: Answer true ONLY if you have completely finished the task (e.g., item successfully added to cart and you're at checkout), false if you need to continue
613
+ * **task_difficulty**: If task_finished is true, rate the overall difficulty from 1.0 (very easy) to 5.0 (very difficult). If task is not finished, set to null.
614
+
615
+ **Important Instructions:**
616
+ * Respond as the persona - use their knowledge, experience, and perspective
617
+ * Consider your session history when making decisions - don't repeat unnecessary actions
618
+ * Choose the action that best helps accomplish the given task
619
+ * Be specific and clear in your reasoning
620
+ * Consider your persona's technical comfort level and preferences
621
+ * Only choose actions from the provided available actions list
622
+ * **Task Completion**: Only mark task_finished as true when you have FULLY completed the entire task (not just found a product, but actually added it to cart and reached checkout)
623
+ * Respond with valid JSON only - no additional text
624
+ """
schemas.py CHANGED
@@ -287,10 +287,71 @@ class ChatWithTwinResponse(BaseModel):
287
  example="Based on my knowledge, my favorite color isn't specified, but I appreciate aesthetics in design."
288
  )
289
  new_memory: List[Dict[str, Any]] = Field(...,
290
- description="The updated memory of the digital twin, including the latest interaction.",
291
  example=[
292
- # ... previous memory items ...
293
- {'role': 'user', 'content': {'stimuli': [{'type': 'SPEECH', 'content': 'What is your favorite color?'}]}},
294
- {'role': 'assistant', 'content': {'actions': [{'type': 'TALK', 'content': 'Based on my knowledge... aesthetics in design.'}]}}
 
 
 
295
  ]
296
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
  example="Based on my knowledge, my favorite color isn't specified, but I appreciate aesthetics in design."
288
  )
289
  new_memory: List[Dict[str, Any]] = Field(...,
290
+ description="The updated memory list of the digital twin after the conversation.",
291
  example=[
292
+ {
293
+ "id": 1,
294
+ "kind": "chat_message",
295
+ "content": "The user asked about my favorite color, and I responded that it's not specified but I appreciate aesthetics.",
296
+ "importance": 2
297
+ }
298
  ]
299
  )
300
+
301
+ class UXTestingRequest(BaseModel):
302
+ user: Dict[str, Any] = Field(...,
303
+ description="User persona to conduct UX testing with",
304
+ example={
305
+ "Name": "Sarah Chen",
306
+ "Age": "32",
307
+ "Profession": "UX Designer",
308
+ "Location": "San Francisco"
309
+ }
310
+ )
311
+ task: str = Field(...,
312
+ description="The UX task the persona needs to accomplish",
313
+ example="Find and add a red t-shirt to your shopping cart"
314
+ )
315
+ image: str = Field(...,
316
+ description="URL of the current screen/interface image",
317
+ example="https://example.com/current-screen.png"
318
+ )
319
+ available_actions: List[str] = Field(...,
320
+ description="List of actions the persona can take in the current state",
321
+ example=["scroll_down", "click_search", "click_menu", "click_product"]
322
+ )
323
+ session_history: List[Dict] = Field(default=[],
324
+ description="Previous steps in this UX testing session",
325
+ example=[
326
+ {
327
+ "image": "https://example.com/homepage.png",
328
+ "action_taken": "click_search",
329
+ "thought": "I clicked search to find the product"
330
+ },
331
+ {
332
+ "image": "https://example.com/search-page.png",
333
+ "action_taken": "type_search_term",
334
+ "thought": "I typed 'red t-shirt' to find what I need"
335
+ }
336
+ ]
337
+ )
338
+
339
+ class UXTestingResponse(BaseModel):
340
+ action_taken: str = Field(...,
341
+ description="The action the persona decided to take",
342
+ example="click_search"
343
+ )
344
+ thought: str = Field(...,
345
+ description="The persona's reasoning and thought process for taking this action",
346
+ example="I need to find a red t-shirt, so clicking the search button seems like the most direct way to locate what I'm looking for."
347
+ )
348
+ task_finished: bool = Field(...,
349
+ description="Whether the persona believes they have completed the task",
350
+ example=False
351
+ )
352
+ task_difficulty: Optional[float] = Field(default=None,
353
+ description="If task is finished, rate the difficulty from 1.0 (very easy) to 5.0 (very difficult)",
354
+ ge=1.0,
355
+ le=5.0,
356
+ example=2.5
357
+ )
utils.py CHANGED
@@ -14,7 +14,7 @@ model_low="openai/gpt-4.1-nano"
14
  model_mid="openai/gpt-4.1-mini"
15
  model_high="openai/gpt-4.1"
16
 
17
- def call_llm(prompt: str, temperature: float,model_type: str,response_format=None,tools=None,shuffle=False,return_tokens=False) -> str:
18
  if shuffle:
19
  if model_type=="low":
20
  model = random.choice(models_low)
@@ -31,9 +31,19 @@ def call_llm(prompt: str, temperature: float,model_type: str,response_format=Non
31
  elif model_type=="high":
32
  model = model_high
33
 
34
- messages=[
35
- {"role": "user", "content": prompt},
36
- ]
 
 
 
 
 
 
 
 
 
 
37
 
38
  completion_args = {
39
  "model": model,
 
14
  model_mid="openai/gpt-4.1-mini"
15
  model_high="openai/gpt-4.1"
16
 
17
+ def call_llm(prompt: str, temperature: float, model_type: str, response_format=None, tools=None, shuffle=False, return_tokens=False, images=None) -> str:
18
  if shuffle:
19
  if model_type=="low":
20
  model = random.choice(models_low)
 
31
  elif model_type=="high":
32
  model = model_high
33
 
34
+ # Create message content - support both text-only and multimodal
35
+ if images:
36
+ # Multimodal message with images
37
+ content = [{"type": "text", "text": prompt}]
38
+ for image_url in images:
39
+ content.append({
40
+ "type": "image_url",
41
+ "image_url": {"url": image_url}
42
+ })
43
+ messages = [{"role": "user", "content": content}]
44
+ else:
45
+ # Text-only message
46
+ messages = [{"role": "user", "content": prompt}]
47
 
48
  completion_args = {
49
  "model": model,