Spaces:

yasserrmd
/

ggwave

Sleeping

App Files Files Community

yasserrmd commited on Feb 26

Commit

39d90db

verified ·

1 Parent(s): dad5570

Update app.py

Browse files

Files changed (1) hide show

app.py +90 -0

app.py CHANGED Viewed

@@ -28,6 +28,11 @@ async def serve_homepage():
     """Serve the chat interface HTML."""
     with open("static/index.html", "r") as f:
         return Response(content=f.read(), media_type="text/html")
 @app.post("/stt/")
 async def speech_to_text(file: UploadFile = File(...)):
@@ -132,6 +137,91 @@ async def chat_with_llm(file: UploadFile = File(...)):
                 }
             )
         except Exception as e:
             print(f"Error processing audio: {str(e)}")
             return Response(

     """Serve the chat interface HTML."""
     with open("static/index.html", "r") as f:
         return Response(content=f.read(), media_type="text/html")
+@app.get("/conv")
+async def serve_homepage():
+    """Serve the chat interface HTML."""
+    with open("static/conv.html", "r") as f:
+        return Response(content=f.read(), media_type="text/html")
 @app.post("/stt/")
 async def speech_to_text(file: UploadFile = File(...)):
                 }
             )
+        except Exception as e:
+            print(f"Error processing audio: {str(e)}")
+            return Response(
+                content=f"Error processing audio: {str(e)}",
+                media_type="text/plain",
+                status_code=500
+            )
+@app.post("/continuous-chat/")
+async def continuous_chat(
+    file: UploadFile = File(...),
+    chat_history: Optional[str] = Form(None)
+):
+    """Process input WAV with chat history, send text to LLM, and return response as WAV."""
+    # Initialize ggwave instance
+    instance = ggwave.init()
+    # Parse chat history if provided
+    messages = [{"role": "system", "content": "you are a helpful assistant. answer always in one sentence"}]
+    if chat_history:
+        try:
+            history = json.loads(chat_history)
+            for msg in history:
+                if msg["role"] in ["user", "assistant"]:
+                    messages.append(msg)
+        except Exception as e:
+            print(f"Error parsing chat history: {str(e)}")
+    # Read the file content into memory
+    file_content = await file.read()
+    # Process the audio file
+    with io.BytesIO(file_content) as buffer:
+        try:
+            fs, recorded_waveform = wav.read(buffer)
+            recorded_waveform = recorded_waveform.astype(np.float32) / 32767.0
+            waveform_bytes = recorded_waveform.tobytes()
+            user_message = ggwave.decode(instance, waveform_bytes)
+            if user_message is None:
+                return Response(
+                    content="No message detected in audio",
+                    media_type="text/plain",
+                    status_code=400
+                )
+            decoded_message = user_message.decode("utf-8")
+            print("user_message: " + decoded_message)
+            # Add user message to messages
+            messages.append({"role": "user", "content": decoded_message})
+            # Send to LLM with full chat history
+            chat_completion = client.chat.completions.create(
+                messages=messages,
+                model="llama-3.3-70b-versatile",
+            )
+            llm_response = chat_completion.choices[0].message.content
+            print(llm_response)
+            # Convert response to audio
+            encoded_waveform = ggwave.encode(llm_response, protocolId=1, volume=100)
+            waveform_float32 = np.frombuffer(encoded_waveform, dtype=np.float32)
+            waveform_int16 = np.int16(waveform_float32 * 32767)
+            # Save to buffer
+            buffer = io.BytesIO()
+            with wave.open(buffer, "wb") as wf:
+                wf.setnchannels(1)
+                wf.setsampwidth(2)
+                wf.setframerate(48000)
+                wf.writeframes(waveform_int16.tobytes())
+            buffer.seek(0)
+            return Response(
+                content=buffer.getvalue(),
+                media_type="audio/wav",
+                headers={
+                    "X-User-Message": decoded_message,
+                    "X-LLM-Response": llm_response
+                }
+            )
         except Exception as e:
             print(f"Error processing audio: {str(e)}")
             return Response(