Sandra Sanchez commited on
Commit
bcade95
·
1 Parent(s): 8736894

Add TTS functionality, adapt files accordingly

Browse files
Files changed (3) hide show
  1. .gitignore +2 -1
  2. app.py +37 -2
  3. mcp_server/server.py +30 -7
.gitignore CHANGED
@@ -14,7 +14,8 @@ wheels/
14
  .env
15
  venv/
16
 
17
- # Generated images
18
  generated_images/
19
  images/
20
  *.png
 
 
14
  .env
15
  venv/
16
 
17
+ # Generated data
18
  generated_images/
19
  images/
20
  *.png
21
+ temp/
app.py CHANGED
@@ -11,6 +11,7 @@ import io
11
  from PIL import Image
12
  import asyncio
13
  from mcp_server.server import create_mcp_server
 
14
 
15
 
16
 
@@ -26,6 +27,8 @@ models = client.models.list()
26
  TEMPLATES_DIR = Path(__file__).resolve().parent / "mcp_server" / "templates"
27
  GENERATED_IMAGES_DIR = Path(__file__).resolve().parent / "generated_images"
28
  GENERATED_IMAGES_DIR.mkdir(exist_ok=True)
 
 
29
 
30
 
31
  def load_scenarios():
@@ -81,6 +84,12 @@ async def call_translate_and_adapt_tool(story, language, culture):
81
  result = await server.request_handlers["translate_and_adapt"](context, story, language, culture)
82
  return result["adapted_story"]
83
 
 
 
 
 
 
 
84
  def show_translation(story, language, culture):
85
  return asyncio.run(call_translate_and_adapt_tool(story, language, culture))
86
 
@@ -95,6 +104,7 @@ def main():
95
 
96
  with gr.Blocks() as demo:
97
  gr.Markdown("# 🧸 Comfortool\n### Social stories with comforting illustrations to support autistic kids with daily challenges")
 
98
 
99
  # Inputs arriba: escenario, idioma, cultura, edad, vibe, personaje
100
  with gr.Row():
@@ -121,18 +131,43 @@ def main():
121
  image_out = gr.Image(label="Illustration")
122
 
123
  def on_generate(scenario_name, language, culture, age, gender, vibe, comfort_character):
 
124
  print("Generating story for:", scenario_name, language, culture, age, vibe, comfort_character)# 1. Genera la historia en inglés
125
  story = generate_story(scenario_name, language, culture, age, gender, vibe, comfort_character)
126
  print("Story generated:", story)
127
  image = generate_image(scenario_name, culture, age, gender, vibe, comfort_character)
128
- return format_story(story), image
 
129
 
130
  generate_btn.click(
131
  fn=on_generate,
132
  inputs=[dropdown, language_input, culture_input, age_input, vibe_input, comfort_character_input, gender_input],
133
- outputs=[story_out, image_out]
134
  )
135
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  print("Gradio app initialized.")
137
  return demo
138
 
 
11
  from PIL import Image
12
  import asyncio
13
  from mcp_server.server import create_mcp_server
14
+ import tempfile
15
 
16
 
17
 
 
27
  TEMPLATES_DIR = Path(__file__).resolve().parent / "mcp_server" / "templates"
28
  GENERATED_IMAGES_DIR = Path(__file__).resolve().parent / "generated_images"
29
  GENERATED_IMAGES_DIR.mkdir(exist_ok=True)
30
+ TEMP_DIR = Path(__file__).resolve().parent / "temp"
31
+ TEMP_DIR.mkdir(exist_ok=True)
32
 
33
 
34
  def load_scenarios():
 
84
  result = await server.request_handlers["translate_and_adapt"](context, story, language, culture)
85
  return result["adapted_story"]
86
 
87
+ async def call_voice_tool(story, language):
88
+ server = await create_mcp_server() # Solo para pruebas locales
89
+ context = {}
90
+ result = await server.request_handlers["generate_voice"](context, story, language)
91
+ return result.get("audio", None)
92
+
93
  def show_translation(story, language, culture):
94
  return asyncio.run(call_translate_and_adapt_tool(story, language, culture))
95
 
 
104
 
105
  with gr.Blocks() as demo:
106
  gr.Markdown("# 🧸 Comfortool\n### Social stories with comforting illustrations to support autistic kids with daily challenges")
107
+ story_state = gr.State("")
108
 
109
  # Inputs arriba: escenario, idioma, cultura, edad, vibe, personaje
110
  with gr.Row():
 
131
  image_out = gr.Image(label="Illustration")
132
 
133
  def on_generate(scenario_name, language, culture, age, gender, vibe, comfort_character):
134
+ global generated_story
135
  print("Generating story for:", scenario_name, language, culture, age, vibe, comfort_character)# 1. Genera la historia en inglés
136
  story = generate_story(scenario_name, language, culture, age, gender, vibe, comfort_character)
137
  print("Story generated:", story)
138
  image = generate_image(scenario_name, culture, age, gender, vibe, comfort_character)
139
+ formated_story = format_story(story)
140
+ return formated_story, image, story
141
 
142
  generate_btn.click(
143
  fn=on_generate,
144
  inputs=[dropdown, language_input, culture_input, age_input, vibe_input, comfort_character_input, gender_input],
145
+ outputs=[story_out, image_out, story_state]
146
  )
147
 
148
+
149
+ def on_voice(story, language):
150
+ if not story or story.strip() == "":
151
+ return None
152
+ audio_bytes = asyncio.run(call_voice_tool(story, language))
153
+ if audio_bytes:
154
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3", dir=str(TEMP_DIR)) as tmp_file:
155
+ tmp_file.write(audio_bytes)
156
+ audio_path = tmp_file.name
157
+ return audio_path
158
+ else:
159
+ return None
160
+
161
+ voice_btn = gr.Button("Listen to the story")
162
+ voice_out = gr.Audio(label="Story audio", type="filepath")
163
+
164
+ voice_btn.click(
165
+ fn=on_voice,
166
+ inputs=[story_state, language_input],
167
+ outputs=voice_out
168
+ )
169
+
170
+
171
  print("Gradio app initialized.")
172
  return demo
173
 
mcp_server/server.py CHANGED
@@ -3,6 +3,9 @@ from mcp.server import Server
3
  from mcp.server.stdio import stdio_server
4
  import os
5
  from openai import OpenAI
 
 
 
6
 
7
  async def create_mcp_server():
8
  server = Server(
@@ -16,18 +19,12 @@ async def create_mcp_server():
16
  if hasattr(server, "set_version"):
17
  server.set_version("0.1.0")
18
 
19
- # Tool única: echo
20
- async def echo(context, text):
21
- return {"result": f"Echo: {text}"}
22
-
23
- server.request_handlers["echo"] = echo
24
-
25
  # Tool: Translate and culturally adapt a story
26
  async def translate_and_adapt(context, story, language="en", culture="default", age="7", gender="female", vibe="Cartoon", comfort_character="Koala"):
27
  prompt = (
28
  f"Translate the following social story to {language} and adapt names, places, race, gender identity, customs, and style to {culture} culture. "
29
  f"Make it suitable for a child of age {age}. Do separate sentences with new lines. Use the illustration style '{vibe}' and include the comfort character '{comfort_character}' in the story if possible but without infringing copyright. "
30
- "Return only the adapted story text.\n\n"
31
  f"Story:\n{story}"
32
  )
33
  openai_api_key = os.environ.get("OPENAI_API_KEY")
@@ -38,9 +35,35 @@ async def create_mcp_server():
38
  )
39
  adapted_story = response.choices[0].message.content.strip()
40
  return {"adapted_story": adapted_story}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
  server.request_handlers["translate_and_adapt"] = translate_and_adapt
43
 
 
 
44
 
45
  return server
46
 
 
3
  from mcp.server.stdio import stdio_server
4
  import os
5
  from openai import OpenAI
6
+ import requests
7
+ import asyncio
8
+ import time
9
 
10
  async def create_mcp_server():
11
  server = Server(
 
19
  if hasattr(server, "set_version"):
20
  server.set_version("0.1.0")
21
 
 
 
 
 
 
 
22
  # Tool: Translate and culturally adapt a story
23
  async def translate_and_adapt(context, story, language="en", culture="default", age="7", gender="female", vibe="Cartoon", comfort_character="Koala"):
24
  prompt = (
25
  f"Translate the following social story to {language} and adapt names, places, race, gender identity, customs, and style to {culture} culture. "
26
  f"Make it suitable for a child of age {age}. Do separate sentences with new lines. Use the illustration style '{vibe}' and include the comfort character '{comfort_character}' in the story if possible but without infringing copyright. "
27
+ "Return only the adapted story text without excessive emotion (no exclamation marks).\n\n"
28
  f"Story:\n{story}"
29
  )
30
  openai_api_key = os.environ.get("OPENAI_API_KEY")
 
35
  )
36
  adapted_story = response.choices[0].message.content.strip()
37
  return {"adapted_story": adapted_story}
38
+
39
+ # Tool: TTS
40
+ async def generate_voice(context, story, language="en"):
41
+ openai_api_key = os.environ.get("OPENAI_API_KEY")
42
+ client = OpenAI(api_key=openai_api_key)
43
+ # Selección simple de voz según idioma
44
+ voice_map = {
45
+ "en": "nova",
46
+ "es": "onyx",
47
+ "fr": "fable",
48
+ "de": "echo",
49
+ "it": "alloy"
50
+ # Añade más según tus pruebas y preferencias
51
+ }
52
+ voice = voice_map.get(language, "nova") # Por defecto "nova"
53
+ response = client.audio.speech.create(
54
+ model="tts-1",
55
+ voice=voice,
56
+ input=story,
57
+ response_format="mp3"
58
+ )
59
+ audio_bytes = response.content
60
+ return {"audio": audio_bytes}
61
+
62
 
63
  server.request_handlers["translate_and_adapt"] = translate_and_adapt
64
 
65
+ server.request_handlers["generate_voice"] = generate_voice
66
+
67
 
68
  return server
69