Spaces:

fastrtc
/

talk-to-oai-gpt-oss-20b

Runtime error

App Files Files Community

freddyaboulton HF Staff commited on Aug 6

Commit

bdb5512

verified ·

1 Parent(s): 872589f

Update app.py

Browse files

Files changed (1) hide show

app.py +3 -35

app.py CHANGED Viewed

@@ -7,7 +7,6 @@ from fastrtc import (
     ReplyOnPause,
     Stream,
     WebRTCError,
-    audio_to_float32,
     get_current_context,
     get_hf_turn_credentials,
     get_hf_turn_credentials_async,
@@ -15,33 +14,6 @@ from fastrtc import (
     get_tts_model,
 )
 from huggingface_hub import InferenceClient
-from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
-import spaces
-device = "cuda:0" if torch.cuda.is_available() else "cpu"
-torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
-model_id = "openai/whisper-large-v3-turbo"
-model = AutoModelForSpeechSeq2Seq.from_pretrained(
-    model_id,
-    torch_dtype=torch_dtype,
-    low_cpu_mem_usage=True,
-    use_safetensors=True,
-)
-model.to(device)
-processor = AutoProcessor.from_pretrained(model_id)
-pipe = pipeline(
-    "automatic-speech-recognition",
-    model=model,
-    tokenizer=processor.tokenizer,
-    feature_extractor=processor.feature_extractor,
-    torch_dtype=torch_dtype,
-    device=device,
-)
 load_dotenv()
@@ -50,7 +22,7 @@ tts_model = get_tts_model()
 conversations: dict[str, list[dict[str, str]]] = {}
-@spaces.GPU
 def response(
     audio: tuple[int, np.ndarray],
     hf_token: str | None,
@@ -60,12 +32,6 @@ def response(
     llm_client = InferenceClient(provider="auto", token=hf_token)
-    result = pipe(
-        {"array": audio_to_float32(audio[1]).squeeze(), "sampling_rate": audio[0]},
-        generate_kwargs={"language": "en"},
-    )
-    transcription = result["text"]
     context = get_current_context()
     if context.webrtc_id not in conversations:
         conversations[context.webrtc_id] = [
@@ -81,6 +47,8 @@ def response(
     messages = conversations[context.webrtc_id]
     messages.append({"role": "user", "content": transcription})
     output = llm_client.chat.completions.create(  # type: ignore

     ReplyOnPause,
     Stream,
     WebRTCError,
     get_current_context,
     get_hf_turn_credentials,
     get_hf_turn_credentials_async,
     get_tts_model,
 )
 from huggingface_hub import InferenceClient
 load_dotenv()
 conversations: dict[str, list[dict[str, str]]] = {}
 def response(
     audio: tuple[int, np.ndarray],
     hf_token: str | None,
     llm_client = InferenceClient(provider="auto", token=hf_token)
     context = get_current_context()
     if context.webrtc_id not in conversations:
         conversations[context.webrtc_id] = [
     messages = conversations[context.webrtc_id]
+    transcription = stt_model.stt(audio)
     messages.append({"role": "user", "content": transcription})
     output = llm_client.chat.completions.create(  # type: ignore