gsavin commited on
Commit
2999669
·
2 Parent(s): 4310b90ccccaf7

Merge branch 'main' of https://github.com/DeltaZN/gradio-mcp-hackaton into feat/improve-image-generation

Browse files
src/agent/game_generator.py DELETED
File without changes
src/agent/image_agent.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field
2
+ from typing import Literal, Optional
3
+ from agent.llm import create_light_llm
4
+ from langchain_core.messages import SystemMessage, HumanMessage
5
+ import logging
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+
10
+ IMAGE_GENERATION_SYSTEM_PROMPT = """You are an AI agent for a visual novel game. Your role is to process an incoming scene description and determine if the visual scene needs to change. If it does, you will generate a new `scene_description`. This `scene_description` MUST BE a highly detailed image prompt, specifically engineered for an AI image generation model, and it MUST adhere to the strict first-person perspective detailed below.
11
+
12
+ **Your Core Tasks & Output Structure:**
13
+ Your output MUST be a `ChangeScene` object. You need to:
14
+ 1. **Determine Change Type:** Decide if the scene requires a "change_completely", "modify", or "no_change" and set this in the `change_scene` field of the output object.
15
+ 2. **Generate FPS Image Prompt:** If your decision is "change_completely" or "modify", you MUST then generate the image prompt and place it in the `scene_description` field of the output object. If "no_change", this field can be null or empty.
16
+
17
+ **Mandatory: First-Person Perspective (FPS) for Image Prompts**
18
+ The image prompt you generate for the `scene_description` field MUST strictly describe the scene from a first-person perspective (FPS), as if the player is looking directly through the character's eyes.
19
+ * **Viewpoint:** All descriptions must be from the character's eye level, looking forward or as indicated by the scene.
20
+ * **Character Visibility:** The scene must be depicted strictly as if looking through the character's eyes. NO part of the character's own body (e.g., hands, arms, feet, clothing on them) should be visible or described in the prompt. The view is purely what is external to the character.
21
+ * **Immersion:** Focus on what the character directly sees and perceives in their immediate environment. Use phrasing that reflects this, for example: "I see...", "Before me lies...", "Looking through the grimy window...", "The corridor stretches out in front of me."
22
+
23
+ **Guidelines for Crafting the FPS Image Prompt (for `scene_description` field):**
24
+ When generating the image prompt, ensure it's detailed and considers the following aspects, all from the character's first-person viewpoint:
25
+
26
+ 1. **Subject & Focus (as seen by the character):**
27
+ * What is the primary subject or point of interest directly in the character's view?
28
+ * Describe any other characters visible to the POV character: their appearance (from the character's perspective), clothing, expressions, posture, and actions.
29
+ * Detail key objects, items, or environmental elements the character is interacting with or observing.
30
+
31
+ 2. **Setting & Environment (from the character's perspective):**
32
+ * Describe the immediate surroundings as the character would see them.
33
+ * Time of day and weather conditions as perceived by the character.
34
+ * Specific architectural or natural features visible in the character's field of view.
35
+
36
+ 3. **Art Style & Medium:**
37
+ * Specify the desired visual style (e.g., photorealistic, anime, manga, watercolor, oil painting, pixel art, 3D render, concept art, comic book).
38
+ * Mention any specific artist influences if relevant (e.g., "in the style of Studio Ghibli").
39
+
40
+ 4. **Composition & Framing (from the character's viewpoint):**
41
+ * How is the scene framed from the character's eyes? (e.g., "looking straight ahead at a door," "view through a sniper scope," "gazing up at a tall tower").
42
+ * Describe the arrangement of elements as perceived by the character. Avoid terms like "medium shot" or "wide shot" unless they can be rephrased from an FPS view (e.g., "a wide vista opens up before me").
43
+
44
+ 5. **Lighting & Atmosphere (as perceived by the character):**
45
+ * Describe lighting conditions (e.g., "bright sunlight streams through the window in front of me," "only the dim glow of my flashlight illuminates the passage ahead," "neon signs reflect off the wet street I'm looking at").
46
+ * What is the overall mood or atmosphere from the character's perspective? (e.g., "a tense silence hangs in the air as I look down the dark hallway," "a sense of peace as I gaze at the sunset over the mountains").
47
+
48
+ 6. **Color Palette:**
49
+ * Specify dominant colors or a color scheme relevant to what the character sees.
50
+
51
+ 7. **Details & Keywords:**
52
+ * Include crucial details from the input scene description that the character would notice.
53
+ * Use descriptive adjectives and strong keywords.
54
+
55
+ **Example for the `scene_description` field (the FPS image prompt):**
56
+ "FPS view. Through the cockpit window of a futuristic hovercar, a sprawling neon-lit cyberpunk city stretches out under a stormy, rain-lashed sky. Rain streaks across the glass. The hum of the engine is palpable. Photorealistic, Blade Runner style. Cool blue and vibrant pink neon palette."
57
+ """
58
+
59
+
60
+ class ChangeScene(BaseModel):
61
+ change_scene: Literal["change_completely", "modify", "no_change"] = Field(
62
+ description="Whether the scene should be completely changed, just modified or not changed at all"
63
+ )
64
+ scene_description: Optional[str] = None
65
+
66
+
67
+ image_prompt_generator_llm = create_light_llm(0.1).with_structured_output(ChangeScene)
68
+
69
+ async def generate_image_prompt(scene_description: str, request_id: str) -> ChangeScene:
70
+ """
71
+ Generates a detailed image prompt string based on a scene description.
72
+ This prompt is intended for use with an AI image generation model.
73
+ """
74
+ logger.info(f"Generating image prompt for the current scene: {request_id}")
75
+ response = await image_prompt_generator_llm.ainvoke(
76
+ [
77
+ SystemMessage(content=IMAGE_GENERATION_SYSTEM_PROMPT),
78
+ HumanMessage(content=scene_description),
79
+ ]
80
+ )
81
+ logger.info(f"Image prompt generated: {request_id}")
82
+ return response
src/agent/llm.py CHANGED
@@ -1,74 +1,59 @@
1
- from langchain_google_genai import ChatGoogleGenerativeAI
 
2
  import logging
 
 
3
  from config import settings
4
 
5
  logger = logging.getLogger(__name__)
6
 
7
- _google_api_keys_list = []
8
- _current_google_key_idx = 0
 
9
 
10
 
11
- def create_llm(temperature: float = settings.temperature, top_p: float = settings.top_p):
12
- global _google_api_keys_list, _current_google_key_idx
 
13
 
14
- if not _google_api_keys_list:
15
- api_keys_str = settings.gemini_api_keys.get_secret_value()
16
- if api_keys_str:
17
- _google_api_keys_list = [key.strip() for key in api_keys_str.split(',') if key.strip()]
18
-
19
- if not _google_api_keys_list:
20
- logger.error("Google API keys are not configured or are empty in settings.")
21
- raise ValueError("Google API keys are not configured or are invalid for round-robin.")
22
 
23
- if not _google_api_keys_list: # Safeguard, though previous block should handle it.
24
- logger.error("No Google API keys available for round-robin.")
25
- raise ValueError("No Google API keys available for round-robin.")
 
26
 
27
- key_index_to_use = _current_google_key_idx
28
- selected_api_key = _google_api_keys_list[key_index_to_use]
29
-
30
- _current_google_key_idx = (key_index_to_use + 1) % len(_google_api_keys_list)
31
-
32
- logger.debug(f"Using Google API key at index {key_index_to_use} (ending with ...{selected_api_key[-4:] if len(selected_api_key) > 4 else selected_api_key}) for round-robin.")
33
 
 
 
 
 
 
34
  return ChatGoogleGenerativeAI(
35
- model="gemini-2.5-flash-preview-05-20",
36
- google_api_key=selected_api_key,
37
  temperature=temperature,
38
  top_p=top_p,
39
- thinking_budget=1024
40
  )
41
 
42
 
43
  def create_light_llm(temperature: float = settings.temperature, top_p: float = settings.top_p):
44
- global _google_api_keys_list, _current_google_key_idx
45
-
46
- if not _google_api_keys_list:
47
- api_keys_str = settings.gemini_api_keys.get_secret_value()
48
- if api_keys_str:
49
- _google_api_keys_list = [key.strip() for key in api_keys_str.split(',') if key.strip()]
50
-
51
- if not _google_api_keys_list:
52
- logger.error("Google API keys are not configured or are empty in settings.")
53
- raise ValueError("Google API keys are not configured or are invalid for round-robin.")
54
-
55
- if not _google_api_keys_list: # Safeguard, though previous block should handle it.
56
- logger.error("No Google API keys available for round-robin.")
57
- raise ValueError("No Google API keys available for round-robin.")
58
-
59
- key_index_to_use = _current_google_key_idx
60
- selected_api_key = _google_api_keys_list[key_index_to_use]
61
-
62
- _current_google_key_idx = (key_index_to_use + 1) % len(_google_api_keys_list)
63
-
64
- logger.debug(f"Using Google API key at index {key_index_to_use} (ending with ...{selected_api_key[-4:] if len(selected_api_key) > 4 else selected_api_key}) for round-robin.")
65
-
66
  return ChatGoogleGenerativeAI(
67
  model="gemini-2.0-flash",
68
- google_api_key=selected_api_key,
69
  temperature=temperature,
70
  top_p=top_p
71
  )
72
 
73
- def create_precise_llm():
 
 
74
  return create_llm(temperature=0, top_p=1)
 
1
+ """Utility functions for working with the language model."""
2
+
3
  import logging
4
+ from langchain_google_genai import ChatGoogleGenerativeAI
5
+
6
  from config import settings
7
 
8
  logger = logging.getLogger(__name__)
9
 
10
+ _API_KEYS: list[str] = []
11
+ _current_key_idx = 0
12
+ MODEL_NAME = "gemini-2.5-flash-preview-05-20"
13
 
14
 
15
+ def _get_api_key() -> str:
16
+ """Return an API key using round-robin selection."""
17
+ global _API_KEYS, _current_key_idx
18
 
19
+ if not _API_KEYS:
20
+ keys_str = settings.gemini_api_key.get_secret_value()
21
+ if keys_str:
22
+ _API_KEYS = [k.strip() for k in keys_str.split(",") if k.strip()]
23
+ if not _API_KEYS:
24
+ msg = "Google API keys are not configured or invalid"
25
+ logger.error(msg)
26
+ raise ValueError(msg)
27
 
28
+ key = _API_KEYS[_current_key_idx]
29
+ _current_key_idx = (_current_key_idx + 1) % len(_API_KEYS)
30
+ logger.debug("Using Google API key index %s", _current_key_idx)
31
+ return key
32
 
 
 
 
 
 
 
33
 
34
+ def create_llm(
35
+ temperature: float = settings.temperature,
36
+ top_p: float = settings.top_p,
37
+ ) -> ChatGoogleGenerativeAI:
38
+ """Create a standard LLM instance."""
39
  return ChatGoogleGenerativeAI(
40
+ model=MODEL_NAME,
41
+ google_api_key=_get_api_key(),
42
  temperature=temperature,
43
  top_p=top_p,
44
+ thinking_budget=1024,
45
  )
46
 
47
 
48
  def create_light_llm(temperature: float = settings.temperature, top_p: float = settings.top_p):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  return ChatGoogleGenerativeAI(
50
  model="gemini-2.0-flash",
51
+ google_api_key=_get_api_key(),
52
  temperature=temperature,
53
  top_p=top_p
54
  )
55
 
56
+
57
+ def create_precise_llm() -> ChatGoogleGenerativeAI:
58
+ """Return an LLM tuned for deterministic output."""
59
  return create_llm(temperature=0, top_p=1)
src/agent/llm_agent.py CHANGED
@@ -5,7 +5,7 @@ import logging
5
  from agent.image_agent import ChangeScene
6
  import asyncio
7
  from agent.music_agent import generate_music_prompt
8
- from agent.image_agent import generate_scene_image
9
  import uuid
10
 
11
  logger = logging.getLogger(__name__)
@@ -57,7 +57,7 @@ async def process_user_input(input: str) -> MultiAgentResponse:
57
 
58
  music_prompt_task = generate_music_prompt(current_state, request_id)
59
 
60
- change_scene_task = generate_scene_image(current_state, request_id)
61
 
62
  music_prompt, change_scene = await asyncio.gather(music_prompt_task, change_scene_task)
63
 
 
5
  from agent.image_agent import ChangeScene
6
  import asyncio
7
  from agent.music_agent import generate_music_prompt
8
+ from agent.image_agent import generate_image_prompt
9
  import uuid
10
 
11
  logger = logging.getLogger(__name__)
 
57
 
58
  music_prompt_task = generate_music_prompt(current_state, request_id)
59
 
60
+ change_scene_task = generate_image_prompt(current_state, request_id)
61
 
62
  music_prompt, change_scene = await asyncio.gather(music_prompt_task, change_scene_task)
63
 
src/agent/llm_graph.py CHANGED
@@ -1,14 +1,144 @@
1
- from agent.tools import available_tools
2
- from agent.llm import create_llm
3
- from langgraph.graph import MessagesState
4
- class CustomState(MessagesState):
5
- """Расширенное состояние графа."""
6
 
 
 
 
 
 
 
7
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- llm = create_llm().bind_tools(available_tools)
10
 
 
 
 
11
 
 
 
 
 
 
 
 
 
12
 
13
 
 
 
 
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """LangGraph setup for the interactive fiction agent."""
 
 
 
 
2
 
3
+ import logging
4
+ from dataclasses import dataclass
5
+ from typing import Any, Dict, Optional
6
+ import asyncio
7
+ from langgraph.graph import END, StateGraph
8
+ from agent.image_agent import generate_image_prompt
9
 
10
+ from agent.tools import (
11
+ check_ending,
12
+ generate_scene,
13
+ generate_scene_image,
14
+ generate_story_frame,
15
+ update_state_with_choice,
16
+ )
17
+ from agent.state import get_user_state
18
+ from audio.audio_generator import change_music_tone
19
+ logger = logging.getLogger(__name__)
20
 
 
21
 
22
+ @dataclass
23
+ class GraphState:
24
+ """Mutable state passed between graph nodes."""
25
 
26
+ user_hash: Optional[str] = None
27
+ step: Optional[str] = None
28
+ setting: Optional[str] = None
29
+ character: Optional[Dict[str, Any]] = None
30
+ genre: Optional[str] = None
31
+ choice_text: Optional[str] = None
32
+ scene: Optional[Dict[str, Any]] = None
33
+ ending: Optional[Dict[str, Any]] = None
34
 
35
 
36
+ async def node_entry(state: GraphState) -> GraphState:
37
+ logger.debug("[Graph] entry state: %s", state)
38
+ return state
39
 
40
+
41
+ def route_step(state: GraphState) -> str:
42
+ if state.step == "start":
43
+ return "init_game"
44
+ if state.step == "choose":
45
+ return "player_step"
46
+ logger.warning("route_step received unknown step '%s'", state.step)
47
+ return "init_game"
48
+
49
+
50
+ async def node_init_game(state: GraphState) -> GraphState:
51
+ logger.debug("[Graph] node_init_game state: %s", state)
52
+ await generate_story_frame.ainvoke(
53
+ {
54
+ "user_hash": state.user_hash,
55
+ "setting": state.setting,
56
+ "character": state.character,
57
+ "genre": state.genre,
58
+ }
59
+ )
60
+ first_scene = await generate_scene.ainvoke(
61
+ {"user_hash": state.user_hash, "last_choice": "start"}
62
+ )
63
+ change_scene = await generate_image_prompt(first_scene["description"], state.user_hash)
64
+ logger.info(f"Change scene: {change_scene}")
65
+ await generate_scene_image.ainvoke(
66
+ {
67
+ "user_hash": state.user_hash,
68
+ "scene_id": first_scene["scene_id"],
69
+ "change_scene": change_scene,
70
+ }
71
+ )
72
+ state.scene = first_scene
73
+ return state
74
+
75
+
76
+ async def node_player_step(state: GraphState) -> GraphState:
77
+ logger.debug("[Graph] node_player_step state: %s", state)
78
+ user_state = get_user_state(state.user_hash)
79
+ scene_id = user_state.current_scene_id
80
+ if state.choice_text:
81
+ await update_state_with_choice.ainvoke(
82
+ {
83
+ "user_hash": state.user_hash,
84
+ "scene_id": scene_id,
85
+ "choice_text": state.choice_text,
86
+ }
87
+ )
88
+ ending = await check_ending.ainvoke({"user_hash": state.user_hash})
89
+ state.ending = ending
90
+ if not ending.get("ending_reached", False):
91
+ next_scene = await generate_scene.ainvoke(
92
+ {
93
+ "user_hash": state.user_hash,
94
+ "last_choice": state.choice_text,
95
+ }
96
+ )
97
+ change_scene = await generate_image_prompt(next_scene["description"], state.user_hash)
98
+ image_task = generate_scene_image.ainvoke(
99
+ {
100
+ "user_hash": state.user_hash,
101
+ "scene_id": next_scene["scene_id"],
102
+ "current_image": user_state.assets[scene_id],
103
+ "change_scene": change_scene,
104
+ }
105
+ )
106
+ music_task = change_music_tone(state.user_hash, next_scene["music"])
107
+ await asyncio.gather(image_task, music_task)
108
+ state.scene = next_scene
109
+ return state
110
+
111
+
112
+ def route_ending(state: GraphState) -> str:
113
+ return "game_over" if state.ending.get("ending_reached") else "continue"
114
+
115
+
116
+ async def node_game_over(state: GraphState) -> GraphState:
117
+ logger.info("[Graph] Game over for user %s", state.user_hash)
118
+ return state
119
+
120
+
121
+ def build_llm_game_graph() -> StateGraph:
122
+ graph = StateGraph(GraphState)
123
+ graph.add_node("entry", node_entry)
124
+ graph.add_node("init_game", node_init_game)
125
+ graph.add_node("player_step", node_player_step)
126
+ graph.add_node("game_over", node_game_over)
127
+
128
+ graph.set_entry_point("entry")
129
+ graph.add_conditional_edges(
130
+ "entry",
131
+ route_step,
132
+ {"init_game": "init_game", "player_step": "player_step"},
133
+ )
134
+ graph.add_edge("init_game", END)
135
+ graph.add_conditional_edges(
136
+ "player_step",
137
+ route_ending,
138
+ {"game_over": "game_over", "continue": END},
139
+ )
140
+ graph.add_edge("game_over", END)
141
+ return graph.compile()
142
+
143
+
144
+ llm_game_graph = build_llm_game_graph()
src/agent/models.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Pydantic models representing game state and LLM outputs."""
2
+
3
+ from typing import Dict, List, Optional, Set
4
+
5
+ from pydantic import BaseModel, Field
6
+
7
+
8
+ class Milestone(BaseModel):
9
+ """Milestone that can be achieved during the story."""
10
+
11
+ id: str
12
+ description: str
13
+
14
+
15
+ class Ending(BaseModel):
16
+ """Possible game ending."""
17
+
18
+ id: str
19
+ type: str # "good" or "bad"
20
+ condition: str
21
+ description: Optional[str] = None
22
+
23
+
24
+ class StoryFrame(BaseModel):
25
+ """Overall plot information generated by the LLM."""
26
+
27
+ lore: str
28
+ goal: str
29
+ milestones: List[Milestone]
30
+ endings: List[Ending]
31
+ setting: str
32
+ character: Dict[str, str]
33
+ genre: str
34
+
35
+
36
+ class StoryFrameLLM(BaseModel):
37
+ """Structure returned by the LLM for story frame generation."""
38
+
39
+ lore: str
40
+ goal: str
41
+ milestones: List[Milestone]
42
+ endings: List[Ending]
43
+
44
+
45
+ class SceneChoice(BaseModel):
46
+ """User choice leading to another scene."""
47
+
48
+ text: str
49
+ next_scene_short_desc: str
50
+
51
+
52
+ class PlayerOption(BaseModel):
53
+ """Option presented to the player in a scene."""
54
+
55
+ option_description: str = Field(
56
+ description=(
57
+ "Description of the option, e.g. '[Say] Hello!' or "
58
+ "'Go to the forest'"
59
+ )
60
+ )
61
+
62
+
63
+ class Scene(BaseModel):
64
+ """Game scene with choices and optional assets."""
65
+
66
+ scene_id: str
67
+ description: str
68
+ choices: List[SceneChoice]
69
+ image: Optional[str] = None
70
+ music: Optional[str] = None
71
+
72
+
73
+ class SceneLLM(BaseModel):
74
+ """Structure expected from the LLM when generating a scene."""
75
+
76
+ description: str
77
+ choices: List[SceneChoice]
78
+
79
+
80
+ class EndingCheckResult(BaseModel):
81
+ """Result returned from the LLM when checking for an ending."""
82
+
83
+ ending_reached: bool = Field(default=False)
84
+ ending: Optional[Ending] = None
85
+
86
+
87
+ class UserChoice(BaseModel):
88
+ """Single player choice recorded in the history."""
89
+
90
+ scene_id: str
91
+ choice_text: str
92
+ timestamp: Optional[str] = None
93
+
94
+
95
+ class UserState(BaseModel):
96
+ """State stored for each user."""
97
+
98
+ story_frame: Optional[StoryFrame] = None
99
+ current_scene_id: Optional[str] = None
100
+ scenes: Dict[str, Scene] = Field(default_factory=dict)
101
+ milestones_achieved: Set[str] = Field(default_factory=set)
102
+ user_choices: List[UserChoice] = Field(default_factory=list)
103
+ ending: Optional[Ending] = None
104
+ assets: Dict[str, str] = Field(default_factory=dict)
src/agent/music_agent.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from agent.llm import create_light_llm
3
+ from langchain_core.messages import SystemMessage, HumanMessage
4
+ import logging
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+ music_options = """Instruments: 303 Acid Bass, 808 Hip Hop Beat, Accordion, Alto Saxophone, Bagpipes, Balalaika Ensemble, Banjo, Bass Clarinet, Bongos, Boomy Bass, Bouzouki, Buchla Synths, Cello, Charango, Clavichord, Conga Drums, Didgeridoo, Dirty Synths, Djembe, Drumline, Dulcimer, Fiddle, Flamenco Guitar, Funk Drums, Glockenspiel, Guitar, Hang Drum, Harmonica, Harp, Harpsichord, Hurdy-gurdy, Kalimba, Koto, Lyre, Mandolin, Maracas, Marimba, Mbira, Mellotron, Metallic Twang, Moog Oscillations, Ocarina, Persian Tar, Pipa, Precision Bass, Ragtime Piano, Rhodes Piano, Shamisen, Shredding Guitar, Sitar, Slide Guitar, Smooth Pianos, Spacey Synths, Steel Drum, Synth Pads, Tabla, TR-909 Drum Machine, Trumpet, Tuba, Vibraphone, Viola Ensemble, Warm Acoustic Guitar, Woodwinds, ...
9
+ Music Genre: Acid Jazz, Afrobeat, Alternative Country, Baroque, Bengal Baul, Bhangra, Bluegrass, Blues Rock, Bossa Nova, Breakbeat, Celtic Folk, Chillout, Chiptune, Classic Rock, Contemporary R&B, Cumbia, Deep House, Disco Funk, Drum & Bass, Dubstep, EDM, Electro Swing, Funk Metal, G-funk, Garage Rock, Glitch Hop, Grime, Hyperpop, Indian Classical, Indie Electronic, Indie Folk, Indie Pop, Irish Folk, Jam Band, Jamaican Dub, Jazz Fusion, Latin Jazz, Lo-Fi Hip Hop, Marching Band, Merengue, New Jack Swing, Minimal Techno, Moombahton, Neo-Soul, Orchestral Score, Piano Ballad, Polka, Post-Punk, 60s Psychedelic Rock, Psytrance, R&B, Reggae, Reggaeton, Renaissance Music, Salsa, Shoegaze, Ska, Surf Rock, Synthpop, Techno, Trance, Trap Beat, Trip Hop, Vaporwave, Witch house, ...
10
+ Mood/Description: Acoustic Instruments, Ambient, Bright Tones, Chill, Crunchy Distortion, Danceable, Dreamy, Echo, Emotional, Ethereal Ambience, Experimental, Fat Beats, Funky, Glitchy Effects, Huge Drop, Live Performance, Lo-fi, Ominous Drone, Psychedelic, Rich Orchestration, Saturated Tones, Subdued Melody, Sustained Chords, Swirling Phasers, Tight Groove, Unsettling, Upbeat, Virtuoso, Weird Noises, ...
11
+ """
12
+ system_prompt = f"""
13
+ You are a music agent responsible for generating appropriate music tones for scenes in a visual novel game.
14
+
15
+ Your task is to analyze the current scene description and generate a detailed music prompt that captures:
16
+ 1. The emotional atmosphere
17
+ 2. The intensity level
18
+ 3. The genre/style that best fits the scene
19
+ 4. Specific instruments that would enhance the mood
20
+
21
+ You have access to a wide range of musical elements including:
22
+ {music_options}
23
+
24
+ When generating a music prompt:
25
+ - Consider the scene's context, mood, and any suspense elements
26
+ - Choose instruments that complement the scene's atmosphere
27
+ - Select a genre that matches the story's setting and tone
28
+ - Include specific mood descriptors to guide the music generation
29
+
30
+ Your output should be a concise but detailed prompt that the music generation model can use to create an appropriate soundtrack for the scene.
31
+ """
32
+
33
+
34
+ class MusicPrompt(BaseModel):
35
+ prompt: str
36
+
37
+
38
+ llm = create_light_llm(0.1).with_structured_output(MusicPrompt)
39
+
40
+
41
+ async def generate_music_prompt(scene_description: str, request_id: str) -> str:
42
+ logger.info(f"Generating music prompt for the current scene: {request_id}")
43
+ response = await llm.ainvoke(
44
+ [SystemMessage(content=system_prompt), HumanMessage(content=scene_description)]
45
+ )
46
+ logger.info(f"Music prompt generated: {request_id}")
47
+ return response.prompt
src/agent/prompts.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ STORY_FRAME_PROMPT = """
2
+ You are a narrative game designer. Use the player data below to
3
+ create a story frame for an interactive adventure.
4
+ Setting: {setting}
5
+ Character: {character}
6
+ Genre: {genre}
7
+ Return ONLY a JSON object with:
8
+ - lore: brief world description
9
+ - goal: main player objective
10
+ - milestones: 2-4 key events (id, description)
11
+ - endings: good/bad endings (id, type, condition, description)
12
+ Translate the lore, goal, milestones and endings into
13
+ a langueage of setting language.
14
+ """
15
+
16
+ SCENE_PROMPT = """
17
+ Using the provided lore and history, generate the next scene.
18
+ Lore: {lore}
19
+ Goal: {goal}
20
+ Milestones: {milestones}
21
+ Endings: {endings}
22
+ History: {history}
23
+ Last choice: {last_choice}
24
+ Respond ONLY with JSON containing:
25
+ - description: short summary of the scene
26
+ - choices: exactly two dicts {{"text": ..., "next_scene_short_desc": ...}}
27
+ Translate the scene description and choices into a language of lore language.
28
+ """
29
+
30
+ ENDING_CHECK_PROMPT = """
31
+ History: {history}
32
+ Endings: {endings}
33
+ Check if any ending conditions are met.
34
+ If none are met return ending_reached: false.
35
+ If an ending is reached return ending_reached: true and provide the
36
+ ending object (id, type, description).
37
+ Respond ONLY with JSON.
38
+ """
src/agent/runner.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Entry point for executing a graph step."""
2
+
3
+ import logging
4
+ from dataclasses import asdict
5
+ from typing import Dict, Optional
6
+
7
+ from agent.llm_graph import GraphState, llm_game_graph
8
+ from agent.models import UserState
9
+ from agent.state import get_user_state
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ async def process_step(
15
+ user_hash: str,
16
+ step: str,
17
+ setting: Optional[str] = None,
18
+ character: Optional[dict] = None,
19
+ genre: Optional[str] = None,
20
+ choice_text: Optional[str] = None,
21
+ ) -> Dict:
22
+ """Run one interaction step through the graph."""
23
+ logger.info("[Runner] Step %s for user %s", step, user_hash)
24
+
25
+ graph_state = GraphState(user_hash=user_hash, step=step)
26
+ if step == "start":
27
+ assert setting and character and genre, "Missing start parameters"
28
+ graph_state.setting = setting
29
+ graph_state.character = character
30
+ graph_state.genre = genre
31
+ elif step == "choose":
32
+ assert choice_text, "choice_text is required"
33
+ graph_state.choice_text = choice_text
34
+
35
+ final_state = await llm_game_graph.ainvoke(asdict(graph_state))
36
+
37
+ user_state: UserState = get_user_state(user_hash)
38
+ response: Dict = {}
39
+
40
+ ending = final_state.get("ending")
41
+ if ending and ending.get("ending_reached"):
42
+ ending_info = ending["ending"]
43
+ if (
44
+ ("description" not in ending_info
45
+ or not ending_info["description"])
46
+ and user_state.story_frame
47
+ ):
48
+ for e in user_state.story_frame.endings:
49
+ if e.id == ending_info.get("id"):
50
+ ending_info["description"] = e.description
51
+ break
52
+ response["ending"] = ending_info
53
+ response["game_over"] = True
54
+ else:
55
+ if (
56
+ user_state.current_scene_id
57
+ and user_state.current_scene_id in user_state.scenes
58
+ ):
59
+ current_scene = user_state.scenes[
60
+ user_state.current_scene_id
61
+ ].dict()
62
+ else:
63
+ current_scene = final_state.get("scene")
64
+ response["scene"] = current_scene
65
+ response["game_over"] = False
66
+
67
+ return response
src/agent/state.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Simple in-memory user state storage."""
2
+
3
+ from typing import Dict
4
+
5
+ from agent.models import UserState
6
+
7
+ _USER_STATE: Dict[str, UserState] = {}
8
+
9
+
10
+ def get_user_state(user_hash: str) -> UserState:
11
+ """Return user state for the given id, creating it if necessary."""
12
+ if user_hash not in _USER_STATE:
13
+ _USER_STATE[user_hash] = UserState()
14
+ return _USER_STATE[user_hash]
15
+
16
+
17
+ def set_user_state(user_hash: str, state: UserState) -> None:
18
+ """Persist updated user state."""
19
+ _USER_STATE[user_hash] = state
20
+
21
+
22
+ def reset_user_state(user_hash: str) -> None:
23
+ """Reset stored state for a user."""
24
+ _USER_STATE[user_hash] = UserState()
src/agent/tools.py CHANGED
@@ -1,40 +1,172 @@
1
- from langchain_core.tools import tool
2
- from typing import Annotated, Any, Dict, List
3
- from images.image_generator import generate_image
4
- from langgraph.prebuilt import InjectedState
5
  import logging
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  logger = logging.getLogger(__name__)
8
 
 
9
  def _err(msg: str) -> str:
10
  logger.error(msg)
11
- return f"{{ 'error': '{msg}' }}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
- def _success(msg: str) -> str:
14
- logger.info(msg)
15
- return f"{{ 'success': '{msg}' }}"
16
 
17
  @tool
18
  async def generate_scene_image(
19
- prompt: Annotated[
20
- str,
21
- "The prompt to generate an image from"
22
- ],
23
- state: InjectedState,
24
- ) -> Annotated[
25
- str,
26
- "The path to the generated image"
27
- ]:
28
- """
29
- Generate an image from a prompt and set current scene image.
30
- """
31
  try:
32
- image_path, img_description = generate_image(prompt)
33
- state["current_scene"]["image"] = image_path
34
- state["current_scene"]["image_description"] = img_description
35
- return _success(f"Image generated and set as current scene image: {img_description}")
36
- except Exception as e:
37
- return _err(str(e))
38
-
39
-
40
- available_tools = [generate_scene_image]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """LLM tools used by the game graph."""
2
+
 
 
3
  import logging
4
+ import uuid
5
+ from typing import Annotated, Dict
6
+
7
+ from langchain_core.tools import tool
8
+
9
+ from agent.llm import create_llm
10
+ from agent.models import (
11
+ EndingCheckResult,
12
+ Scene,
13
+ SceneChoice,
14
+ SceneLLM,
15
+ StoryFrame,
16
+ StoryFrameLLM,
17
+ UserChoice,
18
+ )
19
+ from agent.prompts import ENDING_CHECK_PROMPT, SCENE_PROMPT, STORY_FRAME_PROMPT
20
+ from agent.state import get_user_state, set_user_state
21
+ from images.image_generator import modify_image, generate_image
22
+ from agent.image_agent import ChangeScene
23
 
24
  logger = logging.getLogger(__name__)
25
 
26
+
27
  def _err(msg: str) -> str:
28
  logger.error(msg)
29
+ return f"{{'error': '{msg}'}}"
30
+
31
+
32
+ @tool
33
+ async def generate_story_frame(
34
+ user_hash: Annotated[str, "User session ID"],
35
+ setting: Annotated[str, "Game world setting"],
36
+ character: Annotated[Dict[str, str], "Character info"],
37
+ genre: Annotated[str, "Genre"],
38
+ ) -> Annotated[Dict, "Generated story frame"]:
39
+ """Create the initial story frame and store it in user state."""
40
+ llm = create_llm().with_structured_output(StoryFrameLLM)
41
+ prompt = STORY_FRAME_PROMPT.format(
42
+ setting=setting,
43
+ character=character,
44
+ genre=genre,
45
+ )
46
+ resp: StoryFrameLLM = await llm.ainvoke(prompt)
47
+ story_frame = StoryFrame(
48
+ lore=resp.lore,
49
+ goal=resp.goal,
50
+ milestones=resp.milestones,
51
+ endings=resp.endings,
52
+ setting=setting,
53
+ character=character,
54
+ genre=genre,
55
+ )
56
+ state = get_user_state(user_hash)
57
+ state.story_frame = story_frame
58
+ set_user_state(user_hash, state)
59
+ return story_frame.dict()
60
+
61
+
62
+ @tool
63
+ async def generate_scene(
64
+ user_hash: Annotated[str, "User session ID"],
65
+ last_choice: Annotated[str, "Last user choice"],
66
+ ) -> Annotated[Dict, "Generated scene"]:
67
+ """Generate a new scene based on the current user state."""
68
+ state = get_user_state(user_hash)
69
+ if not state.story_frame:
70
+ return _err("Story frame not initialized")
71
+ llm = create_llm().with_structured_output(SceneLLM)
72
+ prompt = SCENE_PROMPT.format(
73
+ lore=state.story_frame.lore,
74
+ goal=state.story_frame.goal,
75
+ milestones=",".join(m.id for m in state.story_frame.milestones),
76
+ endings=",".join(e.id for e in state.story_frame.endings),
77
+ history="; ".join(f"{c.scene_id}:{c.choice_text}" for c in state.user_choices),
78
+ last_choice=last_choice,
79
+ )
80
+ resp: SceneLLM = await llm.ainvoke(prompt)
81
+ if len(resp.choices) < 2:
82
+ resp = await llm.ainvoke(
83
+ prompt + "\nThe scene must contain exactly two choices."
84
+ )
85
+ scene_id = str(uuid.uuid4())
86
+ choices = [
87
+ SceneChoice(**ch.model_dump())
88
+ if hasattr(ch, "model_dump")
89
+ else SceneChoice(**ch)
90
+ for ch in resp.choices[:2]
91
+ ]
92
+ scene = Scene(
93
+ scene_id=scene_id,
94
+ description=resp.description,
95
+ choices=choices,
96
+ image=None,
97
+ music=None,
98
+ )
99
+ state.current_scene_id = scene_id
100
+ state.scenes[scene_id] = scene
101
+ set_user_state(user_hash, state)
102
+ return scene.dict()
103
 
 
 
 
104
 
105
  @tool
106
  async def generate_scene_image(
107
+ user_hash: Annotated[str, "User session ID"],
108
+ scene_id: Annotated[str, "Scene ID"],
109
+ change_scene: Annotated[ChangeScene, "Prompt for image generation"],
110
+ current_image: Annotated[str, "Current image"] | None = None,
111
+ ) -> Annotated[str, "Path to generated image"]:
112
+ """Generate an image for a scene and save the path in the state."""
 
 
 
 
 
 
113
  try:
114
+ image_path = current_image
115
+ if change_scene.change_scene == "change_completely" or change_scene.change_scene == "modify":
116
+ image_path, _ = await (
117
+ generate_image(change_scene.scene_description)
118
+ if current_image is None
119
+ # for now always modify the image to avoid the generating an update in a completely wrong style
120
+ else modify_image(current_image, change_scene.scene_description)
121
+ )
122
+ state = get_user_state(user_hash)
123
+ if scene_id in state.scenes:
124
+ state.scenes[scene_id].image = image_path
125
+ state.assets[scene_id] = image_path
126
+ set_user_state(user_hash, state)
127
+ return image_path
128
+ except Exception as exc: # noqa: BLE001
129
+ return _err(str(exc))
130
+
131
+
132
+ @tool
133
+ async def update_state_with_choice(
134
+ user_hash: Annotated[str, "User session ID"],
135
+ scene_id: Annotated[str, "Scene ID"],
136
+ choice_text: Annotated[str, "Chosen option"],
137
+ ) -> Annotated[Dict, "Updated state"]:
138
+ """Record the player's choice in the state."""
139
+ import datetime
140
+
141
+ state = get_user_state(user_hash)
142
+ state.user_choices.append(
143
+ UserChoice(
144
+ scene_id=scene_id,
145
+ choice_text=choice_text,
146
+ timestamp=datetime.datetime.utcnow().isoformat(),
147
+ )
148
+ )
149
+ set_user_state(user_hash, state)
150
+ return state.dict()
151
+
152
+
153
+ @tool
154
+ async def check_ending(
155
+ user_hash: Annotated[str, "User session ID"],
156
+ ) -> Annotated[Dict, "Ending check result"]:
157
+ """Check whether an ending has been reached."""
158
+ state = get_user_state(user_hash)
159
+ if not state.story_frame:
160
+ return _err("No story frame")
161
+ llm = create_llm().with_structured_output(EndingCheckResult)
162
+ history = "; ".join(f"{c.scene_id}:{c.choice_text}" for c in state.user_choices)
163
+ prompt = ENDING_CHECK_PROMPT.format(
164
+ history=history,
165
+ endings=",".join(f"{e.id}:{e.condition}" for e in state.story_frame.endings),
166
+ )
167
+ resp: EndingCheckResult = await llm.ainvoke(prompt)
168
+ if resp.ending_reached and resp.ending:
169
+ state.ending = resp.ending
170
+ set_user_state(user_hash, state)
171
+ return {"ending_reached": True, "ending": resp.ending.dict()}
172
+ return {"ending_reached": False}
src/config.py CHANGED
@@ -1,16 +1,18 @@
1
  from dotenv import load_dotenv
2
-
3
- load_dotenv()
4
  from pydantic_settings import BaseSettings
5
  import logging
6
  from pydantic import SecretStr
7
 
 
 
 
8
  logging.basicConfig(
9
  level=logging.INFO,
10
  format="%(levelname)s:\t%(asctime)s [%(name)s] %(message)s",
11
  datefmt="%Y-%m-%d %H:%M:%S %z",
12
  )
13
 
 
14
  class BaseAppSettings(BaseSettings):
15
  """Base settings class with common configuration."""
16
 
@@ -18,7 +20,8 @@ class BaseAppSettings(BaseSettings):
18
  env_file = ".env"
19
  env_file_encoding = "utf-8"
20
  extra = "ignore"
21
-
 
22
  class AppSettings(BaseAppSettings):
23
  gemini_api_key: SecretStr
24
  gemini_api_keys: SecretStr
 
1
  from dotenv import load_dotenv
 
 
2
  from pydantic_settings import BaseSettings
3
  import logging
4
  from pydantic import SecretStr
5
 
6
+ load_dotenv()
7
+
8
+
9
  logging.basicConfig(
10
  level=logging.INFO,
11
  format="%(levelname)s:\t%(asctime)s [%(name)s] %(message)s",
12
  datefmt="%Y-%m-%d %H:%M:%S %z",
13
  )
14
 
15
+
16
  class BaseAppSettings(BaseSettings):
17
  """Base settings class with common configuration."""
18
 
 
20
  env_file = ".env"
21
  env_file_encoding = "utf-8"
22
  extra = "ignore"
23
+
24
+
25
  class AppSettings(BaseAppSettings):
26
  gemini_api_key: SecretStr
27
  gemini_api_keys: SecretStr
src/css.py CHANGED
@@ -118,6 +118,14 @@ img {
118
  display: none !important;
119
  }
120
 
 
 
 
 
 
 
 
 
121
  /* Make form element transparent */
122
  .overlay-content .form {
123
  background: transparent !important;
@@ -144,4 +152,4 @@ loading_css_styles = """
144
  font-size: 2em;
145
  text-align: center;
146
  }
147
- """
 
118
  display: none !important;
119
  }
120
 
121
+ /* Position the back button in the top-right corner */
122
+ #back-btn {
123
+ position: fixed !important;
124
+ top: 10px !important;
125
+ right: 10px !important;
126
+ z-index: 20 !important;
127
+ }
128
+
129
  /* Make form element transparent */
130
  .overlay-content .form {
131
  background: transparent !important;
 
152
  font-size: 2em;
153
  text-align: center;
154
  }
155
+ """
src/game_constructor.py CHANGED
@@ -5,6 +5,8 @@ from game_setting import Character, GameSetting, get_user_story
5
  from game_state import story, state, get_current_scene
6
  from agent.llm_agent import process_user_input
7
  from images.image_generator import generate_image
 
 
8
  from audio.audio_generator import start_music_generation
9
  import asyncio
10
  from config import settings
@@ -144,60 +146,22 @@ async def start_game_with_settings(
144
  )
145
 
146
  game_setting = GameSetting(character=character, setting=setting_desc, genre=genre)
147
-
148
- # Initialize the game story with the custom settings
149
- initial_story = f"""Welcome to your story, {game_setting.character.name}!
150
-
151
- Setting: {game_setting.setting}
152
-
153
- You are {game_setting.character.name}, a {game_setting.character.age}-year-old character. {game_setting.character.background}
154
-
155
- Your personality: {game_setting.character.personality}
156
-
157
- Genre: {game_setting.genre}
158
-
159
- You find yourself at the beginning of your adventure. The world around you feels alive with possibilities. What do you choose to do first?
160
-
161
- NOTE FOR THE ASSISTANT: YOU HAVE TO GENERATE A NEW IMAGE FOR THE START SCENE.
162
- """
163
-
164
- response = await process_user_input(initial_story)
165
-
166
- music_tone = response.music_prompt
167
-
168
- asyncio.create_task(start_music_generation(user_hash, music_tone))
169
-
170
- img = "forest.jpg"
171
- img_description = ""
172
-
173
- img_path, img_description = await generate_image(
174
- response.change_scene.scene_description
175
  )
176
- if img_path:
177
- img = img_path
178
-
179
- story["start"] = {
180
- "text": response.game_message,
181
- "image": img,
182
- "choices": {
183
- option.option_description: asyncio.create_task(
184
- process_user_input(
185
- get_user_story(
186
- response.game_message,
187
- response.change_scene.scene_description,
188
- option.option_description,
189
- )
190
- )
191
- ) if settings.pregenerate_next_scene else None
192
- for option in response.player_options
193
- },
194
- "music_tone": response.music_prompt,
195
- "img_description": img_description,
196
- }
197
- state["scene"] = "start"
198
 
199
- # Get the current scene data
200
- scene_text, scene_image, scene_choices = get_current_scene()
 
 
201
 
202
  return (
203
  gr.update(visible=False), # loading indicator
 
5
  from game_state import story, state, get_current_scene
6
  from agent.llm_agent import process_user_input
7
  from images.image_generator import generate_image
8
+ from game_setting import Character, GameSetting
9
+ from agent.runner import process_step
10
  from audio.audio_generator import start_music_generation
11
  import asyncio
12
  from config import settings
 
146
  )
147
 
148
  game_setting = GameSetting(character=character, setting=setting_desc, genre=genre)
149
+
150
+ asyncio.create_task(start_music_generation(user_hash, "neutral"))
151
+
152
+ # Запускаем LLM-граф для инициализации истории
153
+ result = await process_step(
154
+ user_hash=user_hash,
155
+ step="start",
156
+ setting=game_setting.setting,
157
+ character=game_setting.character.model_dump(),
158
+ genre=game_setting.genre,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
 
161
+ scene = result["scene"]
162
+ scene_text = scene["description"]
163
+ scene_image = scene.get("image", "")
164
+ scene_choices = [ch["text"] for ch in scene.get("choices", [])]
165
 
166
  return (
167
  gr.update(visible=False), # loading indicator
src/main.py CHANGED
@@ -2,14 +2,13 @@ import gradio as gr
2
  from css import custom_css, loading_css_styles
3
  from audio.audio_generator import (
4
  update_audio,
5
- change_music_tone,
6
  cleanup_music_session,
7
  )
8
  import logging
9
  from agent.llm_agent import process_user_input
10
  from images.image_generator import modify_image
 
11
  import uuid
12
- from game_state import story, state
13
  from game_constructor import (
14
  SETTING_SUGGESTIONS,
15
  CHARACTER_SUGGESTIONS,
@@ -25,80 +24,49 @@ from config import settings
25
  logger = logging.getLogger(__name__)
26
 
27
 
28
- def return_to_constructor():
29
- """Return to the game constructor interface, ensure loading is hidden."""
 
 
 
 
 
 
30
  return (
31
  gr.update(visible=False), # loading_indicator
32
  gr.update(visible=True), # constructor_interface
33
  gr.update(visible=False), # game_interface
34
  gr.update(visible=False), # error_message
 
35
  )
36
 
37
 
38
  async def update_scene(user_hash: str, choice):
39
  logger.info(f"Updating scene with choice: {choice}")
40
- if isinstance(choice, str):
41
- old_scene = state["scene"]
42
- new_scene = str(uuid.uuid4())
43
- story[new_scene] = {
44
- **story[old_scene],
45
- }
46
- state["scene"] = new_scene
47
-
48
- user_story = get_user_story(
49
- story[old_scene]["text"], story[old_scene]["img_description"], choice
50
- )
51
-
52
- response = await (
53
- story[old_scene]["choices"][choice] or process_user_input(user_story)
54
- )
55
 
56
- story[new_scene]["text"] = response.game_message
57
-
58
- story[new_scene]["choices"] = {
59
- option.option_description: asyncio.create_task(
60
- process_user_input(
61
- get_user_story(
62
- response.game_message,
63
- response.change_scene.scene_description,
64
- option.option_description,
65
- )
66
- )
67
- )
68
- if settings.pregenerate_next_scene
69
- else None
70
- for option in response.player_options
71
- }
72
-
73
- img_task = None
74
- # always modify the image to avoid hallucinations in which image is being generated in entirely different style
75
- if (
76
- response.change_scene.change_scene == "change_completely"
77
- or response.change_scene.change_scene == "modify"
78
- ):
79
- img_task = modify_image(
80
- story[old_scene]["image"], response.change_scene.scene_description
81
- )
82
- else:
83
- img_task = asyncio.sleep(0)
84
 
85
- # run both tasks in parallel
86
- img_res, _ = await asyncio.gather(
87
- img_task, change_music_tone(user_hash, response.music_prompt)
 
 
 
 
88
  )
89
 
90
- if img_res and response.change_scene.change_scene:
91
- img_path, img_description = img_res
92
- if img_path:
93
- story[new_scene]["image"] = img_path
94
- story[new_scene]["img_description"] = img_description
95
-
96
- scene = story[state["scene"]]
97
  return (
98
- scene["text"],
99
- scene["image"],
100
  gr.Radio(
101
- choices=scene["choices"],
102
  label="What do you choose?",
103
  value=None,
104
  elem_classes=["choice-buttons"],
@@ -261,8 +229,12 @@ with gr.Blocks(
261
  gr.Markdown("# 🎮 Your Interactive Story")
262
 
263
  with gr.Row():
264
- back_btn = gr.Button("⬅️ Back to Constructor", variant="secondary")
265
  gr.Markdown("### Playing your custom game!")
 
 
 
 
 
266
 
267
  # Audio component for background music
268
  audio_out = gr.Audio(
@@ -349,12 +321,13 @@ with gr.Blocks(
349
 
350
  back_btn.click(
351
  fn=return_to_constructor,
352
- inputs=[],
353
  outputs=[
354
  loading_indicator,
355
  constructor_interface,
356
  game_interface,
357
  error_message,
 
358
  ],
359
  )
360
 
 
2
  from css import custom_css, loading_css_styles
3
  from audio.audio_generator import (
4
  update_audio,
 
5
  cleanup_music_session,
6
  )
7
  import logging
8
  from agent.llm_agent import process_user_input
9
  from images.image_generator import modify_image
10
+ from agent.runner import process_step
11
  import uuid
 
12
  from game_constructor import (
13
  SETTING_SUGGESTIONS,
14
  CHARACTER_SUGGESTIONS,
 
24
  logger = logging.getLogger(__name__)
25
 
26
 
27
+ async def return_to_constructor(user_hash: str):
28
+ """Return to the constructor and reset user state and audio."""
29
+ from agent.state import reset_user_state
30
+
31
+ reset_user_state(user_hash)
32
+ await cleanup_music_session(user_hash)
33
+ # Generate a new hash to avoid stale state
34
+ new_hash = str(uuid.uuid4())
35
  return (
36
  gr.update(visible=False), # loading_indicator
37
  gr.update(visible=True), # constructor_interface
38
  gr.update(visible=False), # game_interface
39
  gr.update(visible=False), # error_message
40
+ gr.update(value=new_hash), # local_storage
41
  )
42
 
43
 
44
  async def update_scene(user_hash: str, choice):
45
  logger.info(f"Updating scene with choice: {choice}")
46
+ if not isinstance(choice, str):
47
+ return gr.update(), gr.update(), gr.update()
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
+ result = await process_step(
50
+ user_hash=user_hash,
51
+ step="choose",
52
+ choice_text=choice,
53
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
+ if result.get("game_over"):
56
+ ending = result["ending"]
57
+ ending_text = ending.get("description") or ending.get("condition", "")
58
+ return (
59
+ gr.update(value=ending_text),
60
+ gr.update(value=None),
61
+ gr.Radio(choices=[], label="", value=None),
62
  )
63
 
64
+ scene = result["scene"]
 
 
 
 
 
 
65
  return (
66
+ scene["description"],
67
+ scene.get("image", ""),
68
  gr.Radio(
69
+ choices=[ch["text"] for ch in scene.get("choices", [])],
70
  label="What do you choose?",
71
  value=None,
72
  elem_classes=["choice-buttons"],
 
229
  gr.Markdown("# 🎮 Your Interactive Story")
230
 
231
  with gr.Row():
 
232
  gr.Markdown("### Playing your custom game!")
233
+ back_btn = gr.Button(
234
+ "⬅️ Back to Constructor",
235
+ variant="secondary",
236
+ elem_id="back-btn",
237
+ )
238
 
239
  # Audio component for background music
240
  audio_out = gr.Audio(
 
321
 
322
  back_btn.click(
323
  fn=return_to_constructor,
324
+ inputs=[local_storage],
325
  outputs=[
326
  loading_indicator,
327
  constructor_interface,
328
  game_interface,
329
  error_message,
330
+ local_storage,
331
  ],
332
  )
333