Spaces:

Subayyal
/

Audio-to-Audio

Sleeping

App Files Files Community

Subayyal commited on Aug 31, 2025

Commit

80c64db

verified ·

1 Parent(s): 54e6287

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -2

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ from groq import Groq
 from pathlib import Path
 from pydub import AudioSegment
 # Fetch API key from Secrets
 api_key = os.environ.get("GrokAPI")
 if not api_key:
@@ -12,16 +13,27 @@ if not api_key:
 client = Groq(api_key=api_key)
 st.title("🎤 Audio → AI Text → Speech")
 audio_file = st.file_uploader("Upload audio", type=["wav", "m4a"])
 if audio_file:
     try:
         audio_path = Path("input.wav")
         audio_segment = AudioSegment.from_file(audio_file)
         audio_segment.export(audio_path, format="wav")
         transcription = client.audio.transcriptions.create(
             file=("input.wav", audio_path.read_bytes()),
             model="whisper-large-v3",
@@ -29,21 +41,29 @@ if audio_file:
         )
         st.text_area("📝 Question", transcription, height=150)
         completion = client.chat.completions.create(
             model="llama-3.1-8b-instant",
             messages=[{"role": "user", "content": transcription}],
             temperature=0.7,
-            max_completion_tokens=512,
         )
         answer_text = completion.choices[0].message.content
         st.text_area("💬 AI Answer", answer_text, height=200)
         speech_path = Path("answer.wav")
         response = client.audio.speech.create(
             model="playai-tts",
             voice="Aaliyah-PlayAI",
             response_format="wav",
-            input=answer_text
         )
         response.stream_to_file(speech_path)
         st.audio(str(speech_path), format="audio/wav")

 from pathlib import Path
 from pydub import AudioSegment
+# ------------------------------
 # Fetch API key from Secrets
 api_key = os.environ.get("GrokAPI")
 if not api_key:
 client = Groq(api_key=api_key)
+# ------------------------------
 st.title("🎤 Audio → AI Text → Speech")
 audio_file = st.file_uploader("Upload audio", type=["wav", "m4a"])
+def truncate_text(text, max_chars=1000):
+    """Limit text size for TTS"""
+    if len(text) > max_chars:
+        return text[:max_chars] + "\n\n⚠️ Answer truncated for TTS."
+    return text
 if audio_file:
     try:
+        # ------------------------------
+        # Convert to WAV
         audio_path = Path("input.wav")
         audio_segment = AudioSegment.from_file(audio_file)
         audio_segment.export(audio_path, format="wav")
+        # ------------------------------
+        # Transcribe audio
         transcription = client.audio.transcriptions.create(
             file=("input.wav", audio_path.read_bytes()),
             model="whisper-large-v3",
         )
         st.text_area("📝 Question", transcription, height=150)
+        # ------------------------------
+        # Generate AI answer (shorter)
         completion = client.chat.completions.create(
             model="llama-3.1-8b-instant",
             messages=[{"role": "user", "content": transcription}],
             temperature=0.7,
+            max_completion_tokens=150,  # short answer
         )
         answer_text = completion.choices[0].message.content
         st.text_area("💬 AI Answer", answer_text, height=200)
+        # ------------------------------
+        # Truncate answer to safe length for TTS
+        answer_text_limited = truncate_text(answer_text, max_chars=1000)
+        # ------------------------------
+        # Convert text → speech
         speech_path = Path("answer.wav")
         response = client.audio.speech.create(
             model="playai-tts",
             voice="Aaliyah-PlayAI",
             response_format="wav",
+            input=answer_text_limited
         )
         response.stream_to_file(speech_path)
         st.audio(str(speech_path), format="audio/wav")