Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import numpy as np | |
| from dotenv import load_dotenv | |
| from fastrtc import ( | |
| AdditionalOutputs, | |
| ReplyOnPause, | |
| Stream, | |
| WebRTCError, | |
| get_current_context, | |
| get_hf_turn_credentials, | |
| get_hf_turn_credentials_async, | |
| get_stt_model, | |
| get_tts_model, | |
| ) | |
| from huggingface_hub import InferenceClient | |
| load_dotenv() | |
| stt_model = get_stt_model() | |
| tts_model = get_tts_model() | |
| conversations: dict[str, list[dict[str, str]]] = {} | |
| def response( | |
| audio: tuple[int, np.ndarray], | |
| hf_token: str | None, | |
| ): | |
| if hf_token is None or hf_token == "": | |
| raise WebRTCError("HF Token is required") | |
| llm_client = InferenceClient(provider="auto", token=hf_token) | |
| context = get_current_context() | |
| if context.webrtc_id not in conversations: | |
| conversations[context.webrtc_id] = [ | |
| { | |
| "role": "system", | |
| "content": ( | |
| "You are a helpful assistant that can have engaging conversations." | |
| "Your responses must be very short and concise. No more than two sentences. " | |
| "Reasoning: low" | |
| ), | |
| } | |
| ] | |
| messages = conversations[context.webrtc_id] | |
| transcription = stt_model.stt(audio) | |
| messages.append({"role": "user", "content": transcription}) | |
| output = llm_client.chat.completions.create( # type: ignore | |
| model="openai/gpt-oss-20b", | |
| messages=messages, # type: ignore | |
| max_tokens=1024, | |
| stream=True, | |
| ) | |
| output_text = "" | |
| for chunk in output: | |
| output_text += chunk.choices[0].delta.content or "" | |
| messages.append({"role": "assistant", "content": output_text}) | |
| conversations[context.webrtc_id] = messages | |
| yield from tts_model.stream_tts_sync(output_text) | |
| yield AdditionalOutputs(messages) | |
| chatbot = gr.Chatbot(label="Chatbot", type="messages") | |
| token = gr.Textbox( | |
| label="HF Token", | |
| value="", | |
| type="password", | |
| ) | |
| stream = Stream( | |
| modality="audio", | |
| mode="send-receive", | |
| handler=ReplyOnPause(response), | |
| server_rtc_configuration=get_hf_turn_credentials(ttl=600*10000), | |
| rtc_configuration=get_hf_turn_credentials, | |
| additional_inputs=[token], | |
| additional_outputs=[chatbot], | |
| additional_outputs_handler=lambda old, new: new, | |
| ui_args={"title": "Talk To OpenAI GPT-OSS 20B (Powered by FastRTC ⚡️)"}, | |
| time_limit=90, | |
| concurrency_limit=5, | |
| ) | |
| stream.ui.launch() | |