Spaces:
Sleeping
Sleeping
Commit ·
30f82a6
1
Parent(s): 0962e25
Rm torchaudio, use librosa
Browse files- app.py +3 -3
- requirements.txt +1 -1
app.py
CHANGED
|
@@ -13,10 +13,11 @@ from fastapi.staticfiles import StaticFiles
|
|
| 13 |
# AI + LLM
|
| 14 |
from transformers import WhisperProcessor, WhisperForConditionalGeneration
|
| 15 |
import torch
|
| 16 |
-
import
|
| 17 |
from google import genai
|
| 18 |
from google.genai import types
|
| 19 |
|
|
|
|
| 20 |
############################################
|
| 21 |
# ── Configuration ────────────────────────
|
| 22 |
############################################
|
|
@@ -130,8 +131,7 @@ async def voice_transcribe(file: UploadFile = File(...)): # noqa: B008
|
|
| 130 |
tmp_path = tmp.name
|
| 131 |
try:
|
| 132 |
# ── 1. Transcribe
|
| 133 |
-
|
| 134 |
-
speech = waveform[0].numpy() # Convert to numpy for WhisperProcessor
|
| 135 |
inputs = processor(speech, sampling_rate=sample_rate, return_tensors="pt")
|
| 136 |
input_features = inputs["input_features"].to("cpu")
|
| 137 |
generated_ids = model.generate(input_features)
|
|
|
|
| 13 |
# AI + LLM
|
| 14 |
from transformers import WhisperProcessor, WhisperForConditionalGeneration
|
| 15 |
import torch
|
| 16 |
+
import librosa
|
| 17 |
from google import genai
|
| 18 |
from google.genai import types
|
| 19 |
|
| 20 |
+
|
| 21 |
############################################
|
| 22 |
# ── Configuration ────────────────────────
|
| 23 |
############################################
|
|
|
|
| 131 |
tmp_path = tmp.name
|
| 132 |
try:
|
| 133 |
# ── 1. Transcribe
|
| 134 |
+
speech, sample_rate = librosa.load(tmp_path, sr=16000)
|
|
|
|
| 135 |
inputs = processor(speech, sampling_rate=sample_rate, return_tensors="pt")
|
| 136 |
input_features = inputs["input_features"].to("cpu")
|
| 137 |
generated_ids = model.generate(input_features)
|
requirements.txt
CHANGED
|
@@ -7,8 +7,8 @@ python-multipart # File uploads
|
|
| 7 |
# Voice‑to‑text (Whisper via 🤗 Transformers)
|
| 8 |
transformers==4.38.2 # ensure recent enough
|
| 9 |
torch
|
| 10 |
-
torchaudio>=2.1.0
|
| 11 |
huggingface_hub
|
|
|
|
| 12 |
|
| 13 |
# Gemini Flash 2.5
|
| 14 |
google-genai
|
|
|
|
| 7 |
# Voice‑to‑text (Whisper via 🤗 Transformers)
|
| 8 |
transformers==4.38.2 # ensure recent enough
|
| 9 |
torch
|
|
|
|
| 10 |
huggingface_hub
|
| 11 |
+
librosa
|
| 12 |
|
| 13 |
# Gemini Flash 2.5
|
| 14 |
google-genai
|