Pepguy commited on
Commit
f7336ac
·
verified ·
1 Parent(s): d181a50

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -92
app.py CHANGED
@@ -1,11 +1,10 @@
1
- # To run this code you need to install the following dependencies:
2
  # pip install flask google-genai
3
 
4
  import os
5
- import io
6
  from flask import Flask, request, render_template_string, jsonify, send_file
7
  from google import genai
8
  from google.genai import types
 
9
 
10
  app = Flask(__name__)
11
 
@@ -14,27 +13,23 @@ HTML = """
14
  <html lang="en">
15
  <head>
16
  <meta charset="UTF-8" />
17
- <title>Gemini Audio Test</title>
18
  </head>
19
  <body style="font-family:sans-serif;padding:2rem;">
20
- <h1>Gemini-2.0-Flash Audio Generation</h1>
21
  <form id="genai-form">
22
- <textarea id="prompt" rows="6" cols="60" placeholder="Enter text to convert to speech"></textarea><br/><br/>
23
- <button type="submit">Generate Audio</button>
24
  </form>
25
- <div id="status" style="margin-top:1rem;color:#666;"></div>
26
- <div id="audio-container" style="margin-top:1rem;"></div>
27
 
28
  <script>
29
  const form = document.getElementById('genai-form');
30
  form.addEventListener('submit', async e => {
31
  e.preventDefault();
32
  const prompt = document.getElementById('prompt').value.trim();
33
- const status = document.getElementById('status');
34
- const audioContainer = document.getElementById('audio-container');
35
-
36
- status.textContent = 'Generating audio…';
37
- audioContainer.innerHTML = '';
38
 
39
  try {
40
  const resp = await fetch('/generate', {
@@ -48,34 +43,17 @@ HTML = """
48
  throw new Error(`Server returned ${resp.status}: ${errText}`);
49
  }
50
 
51
- // Get the audio file directly
52
- const blob = await resp.blob();
53
- const audioUrl = URL.createObjectURL(blob);
54
-
55
- status.textContent = 'Audio generated successfully!';
56
-
57
- // Create audio element
58
- const audio = document.createElement('audio');
59
- audio.controls = true;
60
- audio.style.width = '100%';
61
- audio.src = audioUrl;
62
- audioContainer.appendChild(audio);
63
-
64
- // Create download link
65
- const downloadLink = document.createElement('a');
66
- downloadLink.href = audioUrl;
67
- downloadLink.download = 'gemini_audio.wav';
68
- downloadLink.textContent = 'Download Audio';
69
- downloadLink.style.display = 'block';
70
- downloadLink.style.marginTop = '1rem';
71
- audioContainer.appendChild(downloadLink);
72
-
73
- // Auto-play
74
- audio.play().catch(err => console.log('Autoplay prevented:', err));
75
-
76
  } catch (err) {
77
  console.error(err);
78
- status.textContent = 'Fetch error: ' + err.message;
79
  }
80
  });
81
  </script>
@@ -83,54 +61,38 @@ HTML = """
83
  </html>
84
  """
85
 
86
- def generate_audio_from_gemini(prompt: str) -> tuple[bytes, str]:
87
- client = genai.Client(
88
- api_key="AIzaSyDolbPUZBPUPvQUu-RGktJmvnUpkcEKIYo",
89
- )
90
-
91
- #model = "gemini-2.0-flash-exp"
92
- model = "gemini-2.5-flash-preview-tts"
93
  contents = [
94
  types.Content(
95
  role="user",
96
  parts=[types.Part.from_text(text=prompt)],
97
  )
98
  ]
99
-
100
- # Configure for audio output
101
  config = types.GenerateContentConfig(
102
- response_modalities=["AUDIO"],
103
- speech_config=types.SpeechConfig(
104
- voice_config=types.VoiceConfig(
105
- prebuilt_voice_config=types.PrebuiltVoiceConfig(
106
- voice_name="Puck"
107
- )
108
- )
109
- )
110
  )
111
 
112
- # Generate content with audio
113
  response = client.models.generate_content(
114
- model=model,
115
  contents=contents,
116
  config=config,
117
  )
118
-
119
- # Log response structure for debugging
120
- app.logger.info(f"Response candidates: {len(response.candidates)}")
121
- if response.candidates:
122
- app.logger.info(f"Parts in response: {len(response.candidates[0].content.parts)}")
123
-
124
- # Extract audio data from response
125
- for part in response.candidates[0].content.parts:
126
- if hasattr(part, 'inline_data') and part.inline_data:
127
- mime_type = part.inline_data.mime_type
128
- audio_data = part.inline_data.data
129
- app.logger.info(f"Found audio: mime_type={mime_type}, size={len(audio_data)} bytes")
130
- return audio_data, mime_type
131
-
132
- raise ValueError("No audio data found in response")
133
-
134
  @app.route('/')
135
  def index():
136
  return render_template_string(HTML)
@@ -138,29 +100,26 @@ def index():
138
  @app.route('/generate', methods=['POST'])
139
  def gen():
140
  data = request.get_json(silent=True) or {}
141
- prompt = data.get("prompt", "")
142
- app.logger.info(f"Received prompt: {prompt!r}")
143
  if not prompt:
144
  return jsonify({"error": "No prompt provided"}), 400
 
145
  try:
146
- audio_bytes, mime_type = generate_audio_from_gemini(prompt)
147
- app.logger.info("Audio generation succeeded, size=%d bytes, mime=%s", len(audio_bytes), mime_type)
148
-
149
- # Send the audio file directly
150
- audio_io = io.BytesIO(audio_bytes)
151
- audio_io.seek(0)
152
-
153
- return send_file(
154
- audio_io,
155
- mimetype=mime_type,
156
- as_attachment=False,
157
- download_name='gemini_audio.wav'
158
- )
159
  except Exception as e:
160
- app.logger.exception("Audio generation failed")
161
  return jsonify({"error": str(e)}), 500
162
 
163
-
 
 
 
 
 
 
 
164
  if __name__ == "__main__":
165
  port = int(os.environ.get("PORT", 7860))
166
- app.run(host="0.0.0.0", port=port, debug=True)
 
 
1
  # pip install flask google-genai
2
 
3
  import os
 
4
  from flask import Flask, request, render_template_string, jsonify, send_file
5
  from google import genai
6
  from google.genai import types
7
+ import tempfile
8
 
9
  app = Flask(__name__)
10
 
 
13
  <html lang="en">
14
  <head>
15
  <meta charset="UTF-8" />
16
+ <title>Gemini TTS Test</title>
17
  </head>
18
  <body style="font-family:sans-serif;padding:2rem;">
19
+ <h1>Gemini-2.5-Flash-Preview-TTS Test</h1>
20
  <form id="genai-form">
21
+ <textarea id="prompt" rows="6" cols="60" placeholder="Enter text to synthesize"></textarea><br/><br/>
22
+ <button type="submit">Generate</button>
23
  </form>
24
+ <div id="output" style="margin-top:1rem;"></div>
 
25
 
26
  <script>
27
  const form = document.getElementById('genai-form');
28
  form.addEventListener('submit', async e => {
29
  e.preventDefault();
30
  const prompt = document.getElementById('prompt').value.trim();
31
+ const out = document.getElementById('output');
32
+ out.textContent = 'Generating…';
 
 
 
33
 
34
  try {
35
  const resp = await fetch('/generate', {
 
43
  throw new Error(`Server returned ${resp.status}: ${errText}`);
44
  }
45
 
46
+ const data = await resp.json();
47
+ if (data.error) {
48
+ out.textContent = 'Error: ' + data.error;
49
+ } else if (data.file) {
50
+ out.innerHTML = '<audio controls src="' + data.file + '"></audio>';
51
+ } else {
52
+ out.textContent = 'Unexpected response payload';
53
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  } catch (err) {
55
  console.error(err);
56
+ out.textContent = 'Fetch error: ' + err.message;
57
  }
58
  });
59
  </script>
 
61
  </html>
62
  """
63
 
64
+ client = genai.Client(api_key="AIzaSyDolbPUZBPUPvQUu-RGktJmvnUpkcEKIYo",)
65
+
66
+ def generate_audio_from_gemini(prompt: str) -> str:
 
 
 
 
67
  contents = [
68
  types.Content(
69
  role="user",
70
  parts=[types.Part.from_text(text=prompt)],
71
  )
72
  ]
73
+
 
74
  config = types.GenerateContentConfig(
75
+ response_mime_type="audio/wav"
 
 
 
 
 
 
 
76
  )
77
 
 
78
  response = client.models.generate_content(
79
+ model="gemini-2.5-flash-preview-tts",
80
  contents=contents,
81
  config=config,
82
  )
83
+
84
+ # response._raw_response is the actual binary audio
85
+ audio_bytes = response._raw_response
86
+ if not audio_bytes:
87
+ raise RuntimeError("No audio returned from Gemini")
88
+
89
+ # Write to a temporary file
90
+ fd, path = tempfile.mkstemp(suffix=".wav", prefix="tts_")
91
+ with os.fdopen(fd, "wb") as f:
92
+ f.write(audio_bytes)
93
+
94
+ return path
95
+
 
 
 
96
  @app.route('/')
97
  def index():
98
  return render_template_string(HTML)
 
100
  @app.route('/generate', methods=['POST'])
101
  def gen():
102
  data = request.get_json(silent=True) or {}
103
+ prompt = data.get("prompt", "").strip()
 
104
  if not prompt:
105
  return jsonify({"error": "No prompt provided"}), 400
106
+
107
  try:
108
+ file_path = generate_audio_from_gemini(prompt)
109
+ # expose the file as a static endpoint
110
+ return jsonify({"file": f"/audio/{os.path.basename(file_path)}"})
 
 
 
 
 
 
 
 
 
 
111
  except Exception as e:
112
+ app.logger.exception("Generation failed")
113
  return jsonify({"error": str(e)}), 500
114
 
115
+ @app.route('/audio/<filename>')
116
+ def serve_audio(filename):
117
+ tmpdir = tempfile.gettempdir()
118
+ file_path = os.path.join(tmpdir, filename)
119
+ if not os.path.exists(file_path):
120
+ return "File not found", 404
121
+ return send_file(file_path, mimetype="audio/wav")
122
+
123
  if __name__ == "__main__":
124
  port = int(os.environ.get("PORT", 7860))
125
+ app.run(host="0.0.0.0", port=port)