File size: 1,556 Bytes
8864fd4 d7d90f3 b88c7ba 8864fd4 3eb3019 7833c3a 8864fd4 7833c3a 8864fd4 7833c3a 8864fd4 f8741c8 8864fd4 7833c3a 8864fd4 d7d90f3 7833c3a d7d90f3 7833c3a d7d90f3 7833c3a d7d90f3 7833c3a 8864fd4 7833c3a 8864fd4 7833c3a 8864fd4 7833c3a 8864fd4 7833c3a 8864fd4 7833c3a 8864fd4 7833c3a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
import gradio as gr
import torch
from transformers import WhisperProcessor, WhisperForConditionalGeneration
import librosa
import numpy as np
import spaces
MODEL_ID = "userdotcs/whisper-small-turkish-phono-finetuned"
device = "cuda" if torch.cuda.is_available() else "cpu"
# İşlemci ve Modeli yükle
processor = WhisperProcessor.from_pretrained(MODEL_ID)
model = WhisperForConditionalGeneration.from_pretrained(MODEL_ID).to(device)
@spaces.GPU
def transcribe(audio):
if audio is None:
return "Ses dosyası algılanamadı."
sr, y = audio
# 1. Float32 Dönüşümü ve Normalizasyon (KRİTİK ADIM)
y = y.astype(np.float32)
y /= np.max(np.abs(y)) if np.max(np.abs(y)) > 0 else 1
# 2. Mono Yapma
if y.ndim > 1:
y = y.mean(axis=1)
# 3. Resampling
if sr != 16000:
y = librosa.resample(y, orig_sr=sr, target_sr=16000)
sr = 16000
# 4. Input Hazırlama
input_features = processor(y, sampling_rate=sr, return_tensors="pt").input_features.to(device)
# 5. Inference
with torch.no_grad():
# forced_decoder_ids ile modeli Türkçe'ye zorlayabilirsin (opsiyonel)
predicted_ids = model.generate(input_features, max_new_tokens=128)
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
return transcription
# Arayüz
demo = gr.Interface(
fn=transcribe,
inputs=gr.Audio(type="numpy"), # Gradio 4+ kullanıyorsan bu kalsın
outputs="text",
title="Whisper Small Turkish Phonetic Trancription"
)
demo.launch() |