Spaces:
Sleeping
Sleeping
added ux_testing api
Browse files- .gitignore +3 -1
- app.py +33 -1
- core.py +110 -6
- prompts.py +89 -22
- schemas.py +65 -4
- utils.py +14 -4
.gitignore
CHANGED
|
@@ -7,4 +7,6 @@ eval/__pycache__/
|
|
| 7 |
eval/synthetic/
|
| 8 |
test_lite_llm.py
|
| 9 |
test_gemini.py
|
| 10 |
-
test/
|
|
|
|
|
|
|
|
|
| 7 |
eval/synthetic/
|
| 8 |
test_lite_llm.py
|
| 9 |
test_gemini.py
|
| 10 |
+
test/
|
| 11 |
+
ux_test.py
|
| 12 |
+
images.json
|
app.py
CHANGED
|
@@ -10,7 +10,8 @@ from schemas import (
|
|
| 10 |
GenerateParametersRequest, GenerateParametersResponse,
|
| 11 |
GenerateTwinRequest, GenerateTwinResponse,
|
| 12 |
ChatWithTwinRequest, ChatWithTwinResponse,
|
| 13 |
-
GenerateUsersAnswersRequest
|
|
|
|
| 14 |
)
|
| 15 |
import uvicorn
|
| 16 |
import logging
|
|
@@ -229,4 +230,35 @@ def chat_with_twin_endpoint(request: ChatWithTwinRequest, x_api_key: str = Heade
|
|
| 229 |
# Return the results in the format defined by ChatWithTwinResponse
|
| 230 |
return ChatWithTwinResponse(answer=answer, new_memory=new_memory)
|
| 231 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 232 |
if __name__ == "__main__": uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=False)
|
|
|
|
| 10 |
GenerateParametersRequest, GenerateParametersResponse,
|
| 11 |
GenerateTwinRequest, GenerateTwinResponse,
|
| 12 |
ChatWithTwinRequest, ChatWithTwinResponse,
|
| 13 |
+
GenerateUsersAnswersRequest,
|
| 14 |
+
UXTestingRequest, UXTestingResponse
|
| 15 |
)
|
| 16 |
import uvicorn
|
| 17 |
import logging
|
|
|
|
| 230 |
# Return the results in the format defined by ChatWithTwinResponse
|
| 231 |
return ChatWithTwinResponse(answer=answer, new_memory=new_memory)
|
| 232 |
|
| 233 |
+
# Endpoint for UX Testing with Images
|
| 234 |
+
@app.post("/ux-testing",
|
| 235 |
+
response_model=UXTestingResponse,
|
| 236 |
+
summary="Conduct UX Testing with Finite State Machine",
|
| 237 |
+
description="Conduct interactive UX testing with a persona using images. The persona will choose an action to take, provide reasoning, and indicate if the task is complete."
|
| 238 |
+
)
|
| 239 |
+
def ux_testing_endpoint(request: UXTestingRequest, x_api_key: str = Header(...)):
|
| 240 |
+
if x_api_key != API_KEY:
|
| 241 |
+
logger.warning("Unauthorized access attempt to /ux-testing.")
|
| 242 |
+
raise HTTPException(status_code=403, detail="Invalid API Key")
|
| 243 |
+
|
| 244 |
+
# Import the core function
|
| 245 |
+
from core import ux_testing_fsm
|
| 246 |
+
|
| 247 |
+
logger.info(f"Starting UX FSM testing with persona: {request.user.get('Name', 'Unknown')}")
|
| 248 |
+
logger.info(f"Task: {request.task}")
|
| 249 |
+
logger.info(f"Available actions: {request.available_actions}")
|
| 250 |
+
|
| 251 |
+
# Call the core function
|
| 252 |
+
result = ux_testing_fsm(
|
| 253 |
+
persona=request.user,
|
| 254 |
+
task=request.task,
|
| 255 |
+
image=request.image,
|
| 256 |
+
available_actions=request.available_actions,
|
| 257 |
+
session_history=request.session_history
|
| 258 |
+
)
|
| 259 |
+
|
| 260 |
+
logger.info("UX FSM testing completed successfully.")
|
| 261 |
+
# Return the results in the format defined by UXTestingResponse
|
| 262 |
+
return UXTestingResponse(**result)
|
| 263 |
+
|
| 264 |
if __name__ == "__main__": uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=False)
|
core.py
CHANGED
|
@@ -12,6 +12,7 @@ from prompts import (
|
|
| 12 |
GENERATE_REPORT_PROMPT,
|
| 13 |
CHAT_WITH_REPORT_PROMPT,
|
| 14 |
GENERATE_AUDIENCE_NAME_PROMPT,
|
|
|
|
| 15 |
persona_schema,
|
| 16 |
answers_schema
|
| 17 |
)
|
|
@@ -318,17 +319,120 @@ def chat_with_report(users: List[dict], question: str, questions: List[str]) ->
|
|
| 318 |
|
| 319 |
def generate_audience_name(audience: str, scope: str) -> str:
|
| 320 |
"""
|
| 321 |
-
Generate a concise
|
| 322 |
|
| 323 |
Args:
|
| 324 |
-
audience:
|
| 325 |
-
scope:
|
| 326 |
-
|
| 327 |
Returns:
|
| 328 |
-
|
| 329 |
"""
|
| 330 |
prompt = GENERATE_AUDIENCE_NAME_PROMPT.format(
|
| 331 |
audience=audience,
|
| 332 |
scope=scope
|
| 333 |
)
|
| 334 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
GENERATE_REPORT_PROMPT,
|
| 13 |
CHAT_WITH_REPORT_PROMPT,
|
| 14 |
GENERATE_AUDIENCE_NAME_PROMPT,
|
| 15 |
+
UX_FSM_SIMPLE_PROMPT,
|
| 16 |
persona_schema,
|
| 17 |
answers_schema
|
| 18 |
)
|
|
|
|
| 319 |
|
| 320 |
def generate_audience_name(audience: str, scope: str) -> str:
|
| 321 |
"""
|
| 322 |
+
Generate a concise audience name based on the provided audience description and scope.
|
| 323 |
|
| 324 |
Args:
|
| 325 |
+
audience: Detailed audience description
|
| 326 |
+
scope: Research scope
|
| 327 |
+
|
| 328 |
Returns:
|
| 329 |
+
String containing a concise audience name
|
| 330 |
"""
|
| 331 |
prompt = GENERATE_AUDIENCE_NAME_PROMPT.format(
|
| 332 |
audience=audience,
|
| 333 |
scope=scope
|
| 334 |
)
|
| 335 |
+
|
| 336 |
+
audience_name = call_llm(prompt=prompt, temperature=0, model_type="low")
|
| 337 |
+
return audience_name.strip()
|
| 338 |
+
|
| 339 |
+
def ux_testing_fsm(persona: dict, task: str, image: str, available_actions: list, session_history: list = None) -> dict:
|
| 340 |
+
"""
|
| 341 |
+
Conduct simple FSM-based UX testing with a persona.
|
| 342 |
+
|
| 343 |
+
Args:
|
| 344 |
+
persona: User persona to conduct testing with
|
| 345 |
+
task: The task the persona needs to accomplish
|
| 346 |
+
image: URL of the current interface image
|
| 347 |
+
available_actions: List of available actions in current state
|
| 348 |
+
session_history: List of previous steps in this session
|
| 349 |
+
|
| 350 |
+
Returns:
|
| 351 |
+
Dictionary with action_taken, thought, task_finished, and task_difficulty
|
| 352 |
+
"""
|
| 353 |
+
|
| 354 |
+
# Format available actions
|
| 355 |
+
actions_text = ", ".join(available_actions)
|
| 356 |
+
|
| 357 |
+
# Format session history
|
| 358 |
+
if session_history:
|
| 359 |
+
history_text = "Previous steps in this session:\n"
|
| 360 |
+
for i, step in enumerate(session_history, 1):
|
| 361 |
+
history_text += f"Step {i}: Action '{step.get('action_taken', 'unknown')}' - {step.get('thought', 'No thought recorded')}\n"
|
| 362 |
+
else:
|
| 363 |
+
history_text = "This is the first step of the session."
|
| 364 |
+
|
| 365 |
+
prompt = UX_FSM_SIMPLE_PROMPT.format(
|
| 366 |
+
persona=persona,
|
| 367 |
+
task=task,
|
| 368 |
+
available_actions=actions_text,
|
| 369 |
+
session_history=history_text
|
| 370 |
+
)
|
| 371 |
+
|
| 372 |
+
# Define response format for structured JSON
|
| 373 |
+
response_format = {
|
| 374 |
+
"type": "json_schema",
|
| 375 |
+
"json_schema": {
|
| 376 |
+
"name": "ux_testing_response",
|
| 377 |
+
"schema": {
|
| 378 |
+
"type": "object",
|
| 379 |
+
"properties": {
|
| 380 |
+
"action_taken": {
|
| 381 |
+
"type": "string",
|
| 382 |
+
"description": "The action chosen from available actions",
|
| 383 |
+
"enum": available_actions
|
| 384 |
+
},
|
| 385 |
+
"thought": {
|
| 386 |
+
"type": "string",
|
| 387 |
+
"description": "Reasoning for the action"
|
| 388 |
+
},
|
| 389 |
+
"task_finished": {
|
| 390 |
+
"type": "boolean",
|
| 391 |
+
"description": "Whether the task is complete"
|
| 392 |
+
},
|
| 393 |
+
"task_difficulty": {
|
| 394 |
+
"type": ["number", "null"],
|
| 395 |
+
"minimum": 1.0,
|
| 396 |
+
"maximum": 5.0,
|
| 397 |
+
"description": "Difficulty rating if task is finished"
|
| 398 |
+
}
|
| 399 |
+
},
|
| 400 |
+
"required": ["action_taken", "thought", "task_finished", "task_difficulty"],
|
| 401 |
+
"additionalProperties": False
|
| 402 |
+
},
|
| 403 |
+
"strict": True
|
| 404 |
+
}
|
| 405 |
+
}
|
| 406 |
+
|
| 407 |
+
try:
|
| 408 |
+
# Call LLM with the image and structured response format
|
| 409 |
+
response = call_llm(
|
| 410 |
+
prompt=prompt,
|
| 411 |
+
temperature=0.7,
|
| 412 |
+
model_type="mid",
|
| 413 |
+
images=[image],
|
| 414 |
+
response_format=response_format
|
| 415 |
+
)
|
| 416 |
+
|
| 417 |
+
# Parse JSON response
|
| 418 |
+
parsed_response = json.loads(response)["ux_testing_response"]
|
| 419 |
+
|
| 420 |
+
# Validate action is in available actions
|
| 421 |
+
if parsed_response.get("action_taken") not in available_actions:
|
| 422 |
+
logger.warning(f"Persona chose invalid action: {parsed_response.get('action_taken')}. Using first available action.")
|
| 423 |
+
parsed_response["action_taken"] = available_actions[0] if available_actions else "unknown"
|
| 424 |
+
|
| 425 |
+
logger.info(f"UX FSM testing completed for persona: {persona.get('Name', 'Unknown')}")
|
| 426 |
+
logger.info(f"Action taken: {parsed_response.get('action_taken')}")
|
| 427 |
+
logger.info(f"Task finished: {parsed_response.get('task_finished')}")
|
| 428 |
+
|
| 429 |
+
return parsed_response
|
| 430 |
+
|
| 431 |
+
except Exception as e:
|
| 432 |
+
logger.error(f"Error during UX FSM testing for persona {persona.get('Name', 'Unknown')}: {e}")
|
| 433 |
+
return {
|
| 434 |
+
"action_taken": available_actions[0] if available_actions else "unknown",
|
| 435 |
+
"thought": f"Error occurred during testing: {str(e)}",
|
| 436 |
+
"task_finished": False,
|
| 437 |
+
"task_difficulty": None
|
| 438 |
+
}
|
prompts.py
CHANGED
|
@@ -528,30 +528,97 @@ def persona_schema(n):
|
|
| 528 |
return persona_schema
|
| 529 |
|
| 530 |
def answers_schema(n):
|
| 531 |
-
|
| 532 |
-
"type": "
|
| 533 |
-
"
|
| 534 |
-
|
| 535 |
-
"
|
| 536 |
-
|
| 537 |
-
|
| 538 |
-
|
| 539 |
-
|
| 540 |
-
|
| 541 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 542 |
"type": "string",
|
| 543 |
-
"description": "
|
| 544 |
-
|
| 545 |
-
|
| 546 |
-
|
| 547 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 548 |
},
|
| 549 |
-
"
|
| 550 |
-
|
| 551 |
-
|
| 552 |
-
|
|
|
|
| 553 |
},
|
| 554 |
-
"strict": True
|
| 555 |
}
|
|
|
|
|
|
|
|
|
|
| 556 |
}
|
| 557 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 528 |
return persona_schema
|
| 529 |
|
| 530 |
def answers_schema(n):
|
| 531 |
+
return {
|
| 532 |
+
"type": "object",
|
| 533 |
+
"properties": {
|
| 534 |
+
"answers": {
|
| 535 |
+
"type": "array",
|
| 536 |
+
"items": {"type": "string"},
|
| 537 |
+
"minItems": n,
|
| 538 |
+
"maxItems": n,
|
| 539 |
+
"description": f"Array of exactly {n} answers to the questions"
|
| 540 |
+
}
|
| 541 |
+
},
|
| 542 |
+
"required": ["answers"],
|
| 543 |
+
"additionalProperties": False
|
| 544 |
+
}
|
| 545 |
+
|
| 546 |
+
def ux_fsm_schema(actions):
|
| 547 |
+
return{
|
| 548 |
+
"type": "object",
|
| 549 |
+
"properties": {
|
| 550 |
+
"ux_response": {
|
| 551 |
+
"type": "object",
|
| 552 |
+
"description": "The response from the UX testing",
|
| 553 |
+
"properties": {
|
| 554 |
+
"action_taken": {
|
| 555 |
"type": "string",
|
| 556 |
+
"description": "The action the persona decided to take",
|
| 557 |
+
"enum": actions
|
| 558 |
+
},
|
| 559 |
+
"thought": {
|
| 560 |
+
"type": "string",
|
| 561 |
+
"description": "The persona's reasoning for taking this action"
|
| 562 |
+
},
|
| 563 |
+
"task_finished": {
|
| 564 |
+
"type": "boolean",
|
| 565 |
+
"description": "Whether the persona believes they have completed the task"
|
| 566 |
},
|
| 567 |
+
"task_difficulty": {
|
| 568 |
+
"type": "number",
|
| 569 |
+
"description": "The difficulty of the task, from 1 (very easy) to 5 (very difficult). If not finished, set to None.",
|
| 570 |
+
"enum": [1, 2, 3, 4, 5, None]
|
| 571 |
+
}
|
| 572 |
},
|
|
|
|
| 573 |
}
|
| 574 |
+
},
|
| 575 |
+
"required": ["ux_response"],
|
| 576 |
+
"additionalProperties": False
|
| 577 |
}
|
| 578 |
+
|
| 579 |
+
# Simple FSM UX Testing Prompt
|
| 580 |
+
UX_FSM_SIMPLE_PROMPT = """
|
| 581 |
+
You are conducting a UX testing session as the detailed user persona provided below. You are looking at a specific interface screen and need to decide what action to take to accomplish your given task.
|
| 582 |
+
|
| 583 |
+
**Persona Profile:**
|
| 584 |
+
{persona}
|
| 585 |
+
|
| 586 |
+
**Task to Accomplish:**
|
| 587 |
+
{task}
|
| 588 |
+
|
| 589 |
+
**Current Screen:**
|
| 590 |
+
You are viewing the interface shown in the provided image.
|
| 591 |
+
|
| 592 |
+
**Available Actions:**
|
| 593 |
+
Choose ONE of these actions: {available_actions}
|
| 594 |
+
|
| 595 |
+
**Session History:**
|
| 596 |
+
{session_history}
|
| 597 |
+
|
| 598 |
+
**Your Response:**
|
| 599 |
+
|
| 600 |
+
Provide your response as a JSON object with exactly these fields:
|
| 601 |
+
|
| 602 |
+
{{
|
| 603 |
+
"action_taken": "your_chosen_action",
|
| 604 |
+
"thought": "your reasoning for this action",
|
| 605 |
+
"task_finished": true/false,
|
| 606 |
+
"task_difficulty": 1.0-5.0 (only if task_finished is true, otherwise null)
|
| 607 |
+
}}
|
| 608 |
+
|
| 609 |
+
**Guidelines:**
|
| 610 |
+
* **action_taken**: Select exactly ONE action from the available actions list
|
| 611 |
+
* **thought**: Explain your reasoning for choosing this action. Why does this action make sense for accomplishing your task? What do you expect to happen? Consider your previous actions from the session history.
|
| 612 |
+
* **task_finished**: Answer true ONLY if you have completely finished the task (e.g., item successfully added to cart and you're at checkout), false if you need to continue
|
| 613 |
+
* **task_difficulty**: If task_finished is true, rate the overall difficulty from 1.0 (very easy) to 5.0 (very difficult). If task is not finished, set to null.
|
| 614 |
+
|
| 615 |
+
**Important Instructions:**
|
| 616 |
+
* Respond as the persona - use their knowledge, experience, and perspective
|
| 617 |
+
* Consider your session history when making decisions - don't repeat unnecessary actions
|
| 618 |
+
* Choose the action that best helps accomplish the given task
|
| 619 |
+
* Be specific and clear in your reasoning
|
| 620 |
+
* Consider your persona's technical comfort level and preferences
|
| 621 |
+
* Only choose actions from the provided available actions list
|
| 622 |
+
* **Task Completion**: Only mark task_finished as true when you have FULLY completed the entire task (not just found a product, but actually added it to cart and reached checkout)
|
| 623 |
+
* Respond with valid JSON only - no additional text
|
| 624 |
+
"""
|
schemas.py
CHANGED
|
@@ -287,10 +287,71 @@ class ChatWithTwinResponse(BaseModel):
|
|
| 287 |
example="Based on my knowledge, my favorite color isn't specified, but I appreciate aesthetics in design."
|
| 288 |
)
|
| 289 |
new_memory: List[Dict[str, Any]] = Field(...,
|
| 290 |
-
description="The updated memory of the digital twin
|
| 291 |
example=[
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
|
|
|
|
|
|
|
|
|
| 295 |
]
|
| 296 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 287 |
example="Based on my knowledge, my favorite color isn't specified, but I appreciate aesthetics in design."
|
| 288 |
)
|
| 289 |
new_memory: List[Dict[str, Any]] = Field(...,
|
| 290 |
+
description="The updated memory list of the digital twin after the conversation.",
|
| 291 |
example=[
|
| 292 |
+
{
|
| 293 |
+
"id": 1,
|
| 294 |
+
"kind": "chat_message",
|
| 295 |
+
"content": "The user asked about my favorite color, and I responded that it's not specified but I appreciate aesthetics.",
|
| 296 |
+
"importance": 2
|
| 297 |
+
}
|
| 298 |
]
|
| 299 |
)
|
| 300 |
+
|
| 301 |
+
class UXTestingRequest(BaseModel):
|
| 302 |
+
user: Dict[str, Any] = Field(...,
|
| 303 |
+
description="User persona to conduct UX testing with",
|
| 304 |
+
example={
|
| 305 |
+
"Name": "Sarah Chen",
|
| 306 |
+
"Age": "32",
|
| 307 |
+
"Profession": "UX Designer",
|
| 308 |
+
"Location": "San Francisco"
|
| 309 |
+
}
|
| 310 |
+
)
|
| 311 |
+
task: str = Field(...,
|
| 312 |
+
description="The UX task the persona needs to accomplish",
|
| 313 |
+
example="Find and add a red t-shirt to your shopping cart"
|
| 314 |
+
)
|
| 315 |
+
image: str = Field(...,
|
| 316 |
+
description="URL of the current screen/interface image",
|
| 317 |
+
example="https://example.com/current-screen.png"
|
| 318 |
+
)
|
| 319 |
+
available_actions: List[str] = Field(...,
|
| 320 |
+
description="List of actions the persona can take in the current state",
|
| 321 |
+
example=["scroll_down", "click_search", "click_menu", "click_product"]
|
| 322 |
+
)
|
| 323 |
+
session_history: List[Dict] = Field(default=[],
|
| 324 |
+
description="Previous steps in this UX testing session",
|
| 325 |
+
example=[
|
| 326 |
+
{
|
| 327 |
+
"image": "https://example.com/homepage.png",
|
| 328 |
+
"action_taken": "click_search",
|
| 329 |
+
"thought": "I clicked search to find the product"
|
| 330 |
+
},
|
| 331 |
+
{
|
| 332 |
+
"image": "https://example.com/search-page.png",
|
| 333 |
+
"action_taken": "type_search_term",
|
| 334 |
+
"thought": "I typed 'red t-shirt' to find what I need"
|
| 335 |
+
}
|
| 336 |
+
]
|
| 337 |
+
)
|
| 338 |
+
|
| 339 |
+
class UXTestingResponse(BaseModel):
|
| 340 |
+
action_taken: str = Field(...,
|
| 341 |
+
description="The action the persona decided to take",
|
| 342 |
+
example="click_search"
|
| 343 |
+
)
|
| 344 |
+
thought: str = Field(...,
|
| 345 |
+
description="The persona's reasoning and thought process for taking this action",
|
| 346 |
+
example="I need to find a red t-shirt, so clicking the search button seems like the most direct way to locate what I'm looking for."
|
| 347 |
+
)
|
| 348 |
+
task_finished: bool = Field(...,
|
| 349 |
+
description="Whether the persona believes they have completed the task",
|
| 350 |
+
example=False
|
| 351 |
+
)
|
| 352 |
+
task_difficulty: Optional[float] = Field(default=None,
|
| 353 |
+
description="If task is finished, rate the difficulty from 1.0 (very easy) to 5.0 (very difficult)",
|
| 354 |
+
ge=1.0,
|
| 355 |
+
le=5.0,
|
| 356 |
+
example=2.5
|
| 357 |
+
)
|
utils.py
CHANGED
|
@@ -14,7 +14,7 @@ model_low="openai/gpt-4.1-nano"
|
|
| 14 |
model_mid="openai/gpt-4.1-mini"
|
| 15 |
model_high="openai/gpt-4.1"
|
| 16 |
|
| 17 |
-
def call_llm(prompt: str, temperature: float,model_type: str,response_format=None,tools=None,shuffle=False,return_tokens=False) -> str:
|
| 18 |
if shuffle:
|
| 19 |
if model_type=="low":
|
| 20 |
model = random.choice(models_low)
|
|
@@ -31,9 +31,19 @@ def call_llm(prompt: str, temperature: float,model_type: str,response_format=Non
|
|
| 31 |
elif model_type=="high":
|
| 32 |
model = model_high
|
| 33 |
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
completion_args = {
|
| 39 |
"model": model,
|
|
|
|
| 14 |
model_mid="openai/gpt-4.1-mini"
|
| 15 |
model_high="openai/gpt-4.1"
|
| 16 |
|
| 17 |
+
def call_llm(prompt: str, temperature: float, model_type: str, response_format=None, tools=None, shuffle=False, return_tokens=False, images=None) -> str:
|
| 18 |
if shuffle:
|
| 19 |
if model_type=="low":
|
| 20 |
model = random.choice(models_low)
|
|
|
|
| 31 |
elif model_type=="high":
|
| 32 |
model = model_high
|
| 33 |
|
| 34 |
+
# Create message content - support both text-only and multimodal
|
| 35 |
+
if images:
|
| 36 |
+
# Multimodal message with images
|
| 37 |
+
content = [{"type": "text", "text": prompt}]
|
| 38 |
+
for image_url in images:
|
| 39 |
+
content.append({
|
| 40 |
+
"type": "image_url",
|
| 41 |
+
"image_url": {"url": image_url}
|
| 42 |
+
})
|
| 43 |
+
messages = [{"role": "user", "content": content}]
|
| 44 |
+
else:
|
| 45 |
+
# Text-only message
|
| 46 |
+
messages = [{"role": "user", "content": prompt}]
|
| 47 |
|
| 48 |
completion_args = {
|
| 49 |
"model": model,
|