talk-to-smolvox

Sleeping

App Files Files Community

Steveeeeeeen HF Staff commited on Feb 14

Commit

77dbc9a

verified ·

1 Parent(s): 1e991ac

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -21

app.py CHANGED Viewed

@@ -46,12 +46,15 @@ def transcribe(
     tf_input = [d for d in transformers_chat]
     output = pipe(
         {"audio": audio_sr, "turns": tf_input, "sampling_rate": target_sr},
         max_new_tokens=512,
     )
     transcription = whisper({"array": audio_sr.squeeze(), "sampling_rate": target_sr})
     conversation.append({"role": "user", "content": transcription["text"]})
     conversation.append({"role": "assistant", "content": output})
     transformers_chat.append({"role": "user", "content": transcription["text"]})
@@ -60,32 +63,61 @@ def transcribe(
     yield AdditionalOutputs(transformers_chat, conversation)
 with gr.Blocks() as demo:
     gr.HTML(
         """
-    <h1 style='text-align: center'>
-    Talk to Smolvox Smollm2(Powered by WebRTC ⚡️)
-    </h1>
-    <p style='text-align: center'>
-    Once you grant access to your microphone, you can talk naturally to Ultravox.
-    When you stop talking, the audio will be sent for processing.
-    </p>
-    <p style='text-align: center'>
-    Each conversation is limited to 90 seconds. Once the time limit is up you can rejoin the conversation.
-    </p>
-    """
     )
     with gr.Row():
-        transformers_chat = gr.State(
-            value=[
-                {
-                    "role": "system",
-                    "content": "You are a friendly and helpful character. You love to answer questions for people.",
-                }
-            ]
-        )
-        with gr.Group():
-            transcript = gr.Chatbot(label="transcript", type="messages")
             audio = WebRTC(
                 rtc_configuration=rtc_configuration,
                 label="Stream",
@@ -93,6 +125,7 @@ with gr.Blocks() as demo:
                 modality="audio",
             )
     audio.stream(
         ReplyOnPause(transcribe),
         inputs=[audio, transformers_chat, transcript],
@@ -106,5 +139,14 @@ with gr.Blocks() as demo:
         show_progress="hidden",
     )
 if __name__ == "__main__":
     demo.launch()

     tf_input = [d for d in transformers_chat]
+    # Generate response from the pipeline using the audio input
     output = pipe(
         {"audio": audio_sr, "turns": tf_input, "sampling_rate": target_sr},
         max_new_tokens=512,
     )
+    # Transcribe the audio using Whisper
     transcription = whisper({"array": audio_sr.squeeze(), "sampling_rate": target_sr})
+    # Update both conversation histories
     conversation.append({"role": "user", "content": transcription["text"]})
     conversation.append({"role": "assistant", "content": output})
     transformers_chat.append({"role": "user", "content": transcription["text"]})
     yield AdditionalOutputs(transformers_chat, conversation)
+def respond_text(
+    user_text: str,
+    transformers_chat: list[dict],
+    conversation: list[dict],
+):
+    if not user_text.strip():
+        # Do nothing if the textbox is empty
+        return transformers_chat, conversation
+    # Append the user message from the textbox
+    conversation.append({"role": "user", "content": user_text})
+    transformers_chat.append({"role": "user", "content": user_text})
+    # Generate a response using the pipeline.
+    # Here we assume the pipeline can also process text input via the "text" key.
+    output = pipe({"text": user_text, "turns": transformers_chat}, max_new_tokens=512)
+    conversation.append({"role": "assistant", "content": output})
+    transformers_chat.append({"role": "assistant", "content": output})
+    return transformers_chat, conversation
 with gr.Blocks() as demo:
     gr.HTML(
         """
+        <h1 style='text-align: center'>
+            Talk to Smolvox Smollm2 (Powered by WebRTC ⚡️)
+        </h1>
+        <p style='text-align: center'>
+            Once you grant access to your microphone, you can talk naturally to Ultravox.
+            When you stop talking, the audio will be sent for processing.
+        </p>
+        <p style='text-align: center'>
+            Each conversation is limited to 90 seconds. Once the time limit is up you can rejoin the conversation.
+        </p>
+        """
     )
+    # Shared conversation state
+    transformers_chat = gr.State(
+        value=[
+            {
+                "role": "system",
+                "content": "You are a friendly and helpful character. You love to answer questions for people.",
+            }
+        ]
+    )
     with gr.Row():
+        with gr.Column(scale=1):
+            transcript = gr.Chatbot(label="Transcript", type="messages")
+            text_input = gr.Textbox(
+                placeholder="Type your message here...", label="Your Message"
+            )
+            send_button = gr.Button("Send")
+        with gr.Column(scale=1):
             audio = WebRTC(
                 rtc_configuration=rtc_configuration,
                 label="Stream",
                 modality="audio",
             )
+    # Audio stream: when you stop speaking, process the audio input.
     audio.stream(
         ReplyOnPause(transcribe),
         inputs=[audio, transformers_chat, transcript],
         show_progress="hidden",
     )
+    # Text input: when you click "Send", process the typed message.
+    send_button.click(
+        respond_text,
+        inputs=[text_input, transformers_chat, transcript],
+        outputs=[transformers_chat, transcript],
+    )
+    # Optionally clear the text box after sending:
+    send_button.click(lambda: "", inputs=[], outputs=[text_input])
 if __name__ == "__main__":
     demo.launch()