ai_voice

Sleeping

App Files Files Community

Pepguy commited on Oct 1, 2025

Commit

f7336ac

verified ·

1 Parent(s): d181a50

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -92

app.py CHANGED Viewed

@@ -1,11 +1,10 @@
-# To run this code you need to install the following dependencies:
 # pip install flask google-genai
 import os
-import io
 from flask import Flask, request, render_template_string, jsonify, send_file
 from google import genai
 from google.genai import types
 app = Flask(__name__)
@@ -14,27 +13,23 @@ HTML = """
 <html lang="en">
 <head>
   <meta charset="UTF-8" />
-  <title>Gemini Audio Test</title>
 </head>
 <body style="font-family:sans-serif;padding:2rem;">
-  <h1>Gemini-2.0-Flash Audio Generation</h1>
   <form id="genai-form">
-    <textarea id="prompt" rows="6" cols="60" placeholder="Enter text to convert to speech"></textarea><br/><br/>
-    <button type="submit">Generate Audio</button>
   </form>
-  <div id="status" style="margin-top:1rem;color:#666;"></div>
-  <div id="audio-container" style="margin-top:1rem;"></div>
   <script>
   const form = document.getElementById('genai-form');
   form.addEventListener('submit', async e => {
     e.preventDefault();
     const prompt = document.getElementById('prompt').value.trim();
-    const status = document.getElementById('status');
-    const audioContainer = document.getElementById('audio-container');
-    status.textContent = 'Generating audio…';
-    audioContainer.innerHTML = '';
     try {
       const resp = await fetch('/generate', {
@@ -48,34 +43,17 @@ HTML = """
         throw new Error(`Server returned ${resp.status}: ${errText}`);
       }
-      // Get the audio file directly
-      const blob = await resp.blob();
-      const audioUrl = URL.createObjectURL(blob);
-      status.textContent = 'Audio generated successfully!';
-      // Create audio element
-      const audio = document.createElement('audio');
-      audio.controls = true;
-      audio.style.width = '100%';
-      audio.src = audioUrl;
-      audioContainer.appendChild(audio);
-      // Create download link
-      const downloadLink = document.createElement('a');
-      downloadLink.href = audioUrl;
-      downloadLink.download = 'gemini_audio.wav';
-      downloadLink.textContent = 'Download Audio';
-      downloadLink.style.display = 'block';
-      downloadLink.style.marginTop = '1rem';
-      audioContainer.appendChild(downloadLink);
-      // Auto-play
-      audio.play().catch(err => console.log('Autoplay prevented:', err));
     } catch (err) {
       console.error(err);
-      status.textContent = 'Fetch error: ' + err.message;
     }
   });
 </script>
@@ -83,54 +61,38 @@ HTML = """
 </html>
 """
-def generate_audio_from_gemini(prompt: str) -> tuple[bytes, str]:
-    client = genai.Client(
-        api_key="AIzaSyDolbPUZBPUPvQUu-RGktJmvnUpkcEKIYo",
-    )
-    #model = "gemini-2.0-flash-exp"
-    model = "gemini-2.5-flash-preview-tts"
     contents = [
         types.Content(
             role="user",
             parts=[types.Part.from_text(text=prompt)],
         )
     ]
-    # Configure for audio output
     config = types.GenerateContentConfig(
-        response_modalities=["AUDIO"],
-        speech_config=types.SpeechConfig(
-            voice_config=types.VoiceConfig(
-                prebuilt_voice_config=types.PrebuiltVoiceConfig(
-                    voice_name="Puck"
-                )
-            )
-        )
     )
-    # Generate content with audio
     response = client.models.generate_content(
-        model=model,
         contents=contents,
         config=config,
     )
-    # Log response structure for debugging
-    app.logger.info(f"Response candidates: {len(response.candidates)}")
-    if response.candidates:
-        app.logger.info(f"Parts in response: {len(response.candidates[0].content.parts)}")
-    # Extract audio data from response
-    for part in response.candidates[0].content.parts:
-        if hasattr(part, 'inline_data') and part.inline_data:
-            mime_type = part.inline_data.mime_type
-            audio_data = part.inline_data.data
-            app.logger.info(f"Found audio: mime_type={mime_type}, size={len(audio_data)} bytes")
-            return audio_data, mime_type
-    raise ValueError("No audio data found in response")
 @app.route('/')
 def index():
     return render_template_string(HTML)
@@ -138,29 +100,26 @@ def index():
 @app.route('/generate', methods=['POST'])
 def gen():
     data = request.get_json(silent=True) or {}
-    prompt = data.get("prompt", "")
-    app.logger.info(f"Received prompt: {prompt!r}")
     if not prompt:
         return jsonify({"error": "No prompt provided"}), 400
     try:
-        audio_bytes, mime_type = generate_audio_from_gemini(prompt)
-        app.logger.info("Audio generation succeeded, size=%d bytes, mime=%s", len(audio_bytes), mime_type)
-        # Send the audio file directly
-        audio_io = io.BytesIO(audio_bytes)
-        audio_io.seek(0)
-        return send_file(
-            audio_io,
-            mimetype=mime_type,
-            as_attachment=False,
-            download_name='gemini_audio.wav'
-        )
     except Exception as e:
-        app.logger.exception("Audio generation failed")
         return jsonify({"error": str(e)}), 500
 if __name__ == "__main__":
     port = int(os.environ.get("PORT", 7860))
-    app.run(host="0.0.0.0", port=port, debug=True)

 # pip install flask google-genai
 import os
 from flask import Flask, request, render_template_string, jsonify, send_file
 from google import genai
 from google.genai import types
+import tempfile
 app = Flask(__name__)
 <html lang="en">
 <head>
   <meta charset="UTF-8" />
+  <title>Gemini TTS Test</title>
 </head>
 <body style="font-family:sans-serif;padding:2rem;">
+  <h1>Gemini-2.5-Flash-Preview-TTS Test</h1>
   <form id="genai-form">
+    <textarea id="prompt" rows="6" cols="60" placeholder="Enter text to synthesize"></textarea><br/><br/>
+    <button type="submit">Generate</button>
   </form>
+  <div id="output" style="margin-top:1rem;"></div>
   <script>
   const form = document.getElementById('genai-form');
   form.addEventListener('submit', async e => {
     e.preventDefault();
     const prompt = document.getElementById('prompt').value.trim();
+    const out = document.getElementById('output');
+    out.textContent = 'Generating…';
     try {
       const resp = await fetch('/generate', {
         throw new Error(`Server returned ${resp.status}: ${errText}`);
       }
+      const data = await resp.json();
+      if (data.error) {
+        out.textContent = 'Error: ' + data.error;
+      } else if (data.file) {
+        out.innerHTML = '<audio controls src="' + data.file + '"></audio>';
+      } else {
+        out.textContent = 'Unexpected response payload';
+      }
     } catch (err) {
       console.error(err);
+      out.textContent = 'Fetch error: ' + err.message;
     }
   });
 </script>
 </html>
 """
+client = genai.Client(api_key="AIzaSyDolbPUZBPUPvQUu-RGktJmvnUpkcEKIYo",)
+def generate_audio_from_gemini(prompt: str) -> str:
     contents = [
         types.Content(
             role="user",
             parts=[types.Part.from_text(text=prompt)],
         )
     ]
     config = types.GenerateContentConfig(
+        response_mime_type="audio/wav"
     )
     response = client.models.generate_content(
+        model="gemini-2.5-flash-preview-tts",
         contents=contents,
         config=config,
     )
+    # response._raw_response is the actual binary audio
+    audio_bytes = response._raw_response
+    if not audio_bytes:
+        raise RuntimeError("No audio returned from Gemini")
+    # Write to a temporary file
+    fd, path = tempfile.mkstemp(suffix=".wav", prefix="tts_")
+    with os.fdopen(fd, "wb") as f:
+        f.write(audio_bytes)
+    return path
 @app.route('/')
 def index():
     return render_template_string(HTML)
 @app.route('/generate', methods=['POST'])
 def gen():
     data = request.get_json(silent=True) or {}
+    prompt = data.get("prompt", "").strip()
     if not prompt:
         return jsonify({"error": "No prompt provided"}), 400
     try:
+        file_path = generate_audio_from_gemini(prompt)
+        # expose the file as a static endpoint
+        return jsonify({"file": f"/audio/{os.path.basename(file_path)}"})
     except Exception as e:
+        app.logger.exception("Generation failed")
         return jsonify({"error": str(e)}), 500
+@app.route('/audio/<filename>')
+def serve_audio(filename):
+    tmpdir = tempfile.gettempdir()
+    file_path = os.path.join(tmpdir, filename)
+    if not os.path.exists(file_path):
+        return "File not found", 404
+    return send_file(file_path, mimetype="audio/wav")
 if __name__ == "__main__":
     port = int(os.environ.get("PORT", 7860))
+    app.run(host="0.0.0.0", port=port)