Spaces:

yasserrmd
/

ggwave

Runtime error

App Files Files Community

yasserrmd commited on Feb 27, 2025

Commit

5f40ba9

verified ·

1 Parent(s): 4fb332f

Update app.py

Browse files

Files changed (1) hide show

app.py +86 -68

app.py CHANGED Viewed

@@ -75,77 +75,95 @@ def text_to_speech(input_text: TextInput):
 @app.post("/chat/")
 async def chat_with_llm(file: UploadFile = File(...)):
     """Process input WAV, send text to LLM, and return generated response as WAV."""
-    # Initialize ggwave instance
-    instance = ggwave.init()
-    # Read the file content into memory without saving to disk
-    file_content = await file.read()
-    # Create a BytesIO object to use with wav.read
-    with io.BytesIO(file_content) as buffer:
-        try:
-            fs, recorded_waveform = wav.read(buffer)
-            recorded_waveform = recorded_waveform.astype(np.float32) / 32767.0
-            waveform_bytes = recorded_waveform.tobytes()
-            user_message = ggwave.decode(instance, waveform_bytes)
-            if user_message is None:
-                return Response(
-                    content="No message detected in audio",
-                    media_type="text/plain",
-                    status_code=400
-                )
-            print("user_message: " + user_message.decode("utf-8"))
-            # Send to LLM
-            chat_completion = client.chat.completions.create(
-                messages=[
-                    {"role": "system", "content": "you are a helpful assistant. answer always in one sentence"},
-                    {"role": "user", "content": user_message.decode("utf-8")}
-                ],
-                model="llama-3.3-70b-versatile",
-            )
-            llm_response = chat_completion.choices[0].message.content
-            print(llm_response)
-            # Convert response to audio
-            encoded_waveform = ggwave.encode(llm_response, protocolId=1, volume=100)
-            # Convert byte data into float32 array
-            waveform_float32 = np.frombuffer(encoded_waveform, dtype=np.float32)
-            # Normalize float32 data to the range of int16
-            waveform_int16 = np.int16(waveform_float32 * 32767)
-            # Save to buffer instead of a file
-            buffer = io.BytesIO()
-            with wave.open(buffer, "wb") as wf:
-                wf.setnchannels(1)  # Mono audio
-                wf.setsampwidth(2)  # 2 bytes per sample (16-bit PCM)
-                wf.setframerate(48000)  # Sample rate
-                wf.writeframes(waveform_int16.tobytes())  # Write waveform as bytes
-            buffer.seek(0)
-            ggwave.free(instance)
-            return Response(
-                content=buffer.getvalue(),
-                media_type="audio/wav",
-                headers={
-                    "X-User-Message": user_message.decode("utf-8"),
-                    "X-LLM-Response": llm_response
-                }
-            )
-        except Exception as e:
-            print(f"Error processing audio: {str(e)}")
-            ggwave.free(instance)
             return Response(
-                content=f"Error processing audio: {str(e)}",
                 media_type="text/plain",
-                status_code=500
             )
 @app.post("/continuous-chat/")
 async def continuous_chat(
     file: UploadFile = File(...),

 @app.post("/chat/")
 async def chat_with_llm(file: UploadFile = File(...)):
     """Process input WAV, send text to LLM, and return generated response as WAV."""
+    try:
+        # Log file details
+        print(f"File received: {file.filename}, Content-Type: {file.content_type}")
+        # Read the file content into memory
+        file_content = await file.read()
+        if not file_content:
             return Response(
+                content="Empty file uploaded",
                 media_type="text/plain",
+                status_code=400
             )
+        # Initialize ggwave instance
+        instance = ggwave.init()
+        # Create a BytesIO object to use with wav.read
+        with io.BytesIO(file_content) as buffer:
+            try:
+                fs, recorded_waveform = wav.read(buffer)
+                recorded_waveform = recorded_waveform.astype(np.float32) / 32767.0
+                waveform_bytes = recorded_waveform.tobytes()
+                user_message = ggwave.decode(instance, waveform_bytes)
+                if user_message is None:
+                    return Response(
+                        content="No message detected in audio",
+                        media_type="text/plain",
+                        status_code=400
+                    )
+                print("Decoded user message:", user_message.decode("utf-8"))
+                # Send to LLM
+                chat_completion = client.chat.completions.create(
+                    messages=[
+                        {"role": "system", "content": "you are a helpful assistant. answer always in one sentence"},
+                        {"role": "user", "content": user_message.decode("utf-8")}
+                    ],
+                    model="llama-3.3-70b-versatile",
+                )
+                llm_response = chat_completion.choices[0].message.content
+                print("LLM Response:", llm_response)
+                # Convert response to audio
+                encoded_waveform = ggwave.encode(llm_response, protocolId=1, volume=100)
+                # Convert byte data into float32 array
+                waveform_float32 = np.frombuffer(encoded_waveform, dtype=np.float32)
+                # Normalize float32 data to the range of int16
+                waveform_int16 = np.int16(waveform_float32 * 32767)
+                # Save to buffer instead of a file
+                buffer = io.BytesIO()
+                with wave.open(buffer, "wb") as wf:
+                    wf.setnchannels(1)  # Mono audio
+                    wf.setsampwidth(2)  # 2 bytes per sample (16-bit PCM)
+                    wf.setframerate(48000)  # Sample rate
+                    wf.writeframes(waveform_int16.tobytes())  # Write waveform as bytes
+                buffer.seek(0)
+                ggwave.free(instance)
+                return Response(
+                    content=buffer.getvalue(),
+                    media_type="audio/wav",
+                    headers={
+                        "X-User-Message": user_message.decode("utf-8"),
+                        "X-LLM-Response": llm_response
+                    }
+                )
+            except Exception as e:
+                print(f"Error processing audio: {str(e)}")
+                ggwave.free(instance)
+                return Response(
+                    content=f"Error processing audio: {str(e)}",
+                    media_type="text/plain",
+                    status_code=500
+                )
+    except Exception as e:
+        print(f"Unexpected error: {str(e)}")
+        return Response(
+            content=f"Unexpected error: {str(e)}",
+            media_type="text/plain",
+            status_code=500
+        )
 @app.post("/continuous-chat/")
 async def continuous_chat(
     file: UploadFile = File(...),