Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,7 +3,6 @@ from transformers import pipeline, WhisperProcessor, WhisperForConditionalGenera
|
|
| 3 |
from diffusers import StableDiffusionPipeline
|
| 4 |
import torch
|
| 5 |
import numpy as np
|
| 6 |
-
import soundfile as sf
|
| 7 |
|
| 8 |
# Step 1: Prompt-to-Prompt Generation using BART (or any LLM except GPT or DeepSeek)
|
| 9 |
prompt_generator = pipeline("text2text-generation", model="facebook/bart-large-cnn")
|
|
@@ -37,11 +36,8 @@ processor = WhisperProcessor.from_pretrained("openai/whisper-large")
|
|
| 37 |
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large")
|
| 38 |
|
| 39 |
def transcribe_audio(audio: np.ndarray, sampling_rate: int) -> str:
|
| 40 |
-
#
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
# Convert audio to text using Whisper
|
| 44 |
-
audio_input = processor("temp_audio.wav", return_tensors="pt").input_features
|
| 45 |
predicted_ids = model.generate(audio_input)
|
| 46 |
transcription = processor.decode(predicted_ids[0], skip_special_tokens=True)
|
| 47 |
return transcription
|
|
|
|
| 3 |
from diffusers import StableDiffusionPipeline
|
| 4 |
import torch
|
| 5 |
import numpy as np
|
|
|
|
| 6 |
|
| 7 |
# Step 1: Prompt-to-Prompt Generation using BART (or any LLM except GPT or DeepSeek)
|
| 8 |
prompt_generator = pipeline("text2text-generation", model="facebook/bart-large-cnn")
|
|
|
|
| 36 |
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large")
|
| 37 |
|
| 38 |
def transcribe_audio(audio: np.ndarray, sampling_rate: int) -> str:
|
| 39 |
+
# Directly process the numpy array audio input
|
| 40 |
+
audio_input = processor(audio, sampling_rate=sampling_rate, return_tensors="pt").input_features
|
|
|
|
|
|
|
|
|
|
| 41 |
predicted_ids = model.generate(audio_input)
|
| 42 |
transcription = processor.decode(predicted_ids[0], skip_special_tokens=True)
|
| 43 |
return transcription
|