Spaces:
Running on Zero
Running on Zero
| """Cohere Transcribe λ¨λ ν μ€νΈ β κ°λ²Όμ΄ λ²μ """ | |
| import os, time, tempfile | |
| import gradio as gr | |
| import numpy as np | |
| import spaces | |
| import torch | |
| import soundfile as sf | |
| import librosa | |
| _models = {} | |
| def _load_cohere(): | |
| if "cohere" not in _models: | |
| from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq | |
| print("Loading Cohere Transcribe 2B...") | |
| _models["proc"] = AutoProcessor.from_pretrained( | |
| "CohereLabs/cohere-transcribe-03-2026", trust_remote_code=True) | |
| _models["cohere"] = AutoModelForSpeechSeq2Seq.from_pretrained( | |
| "CohereLabs/cohere-transcribe-03-2026", | |
| trust_remote_code=True, torch_dtype=torch.bfloat16, device_map="auto") | |
| print("Loaded.") | |
| return _models["cohere"], _models["proc"] | |
| def transcribe(audio_input): | |
| if audio_input is None: | |
| return "μ€λμ€ μμ" | |
| if isinstance(audio_input, str): | |
| audio_np, sr = librosa.load(audio_input, sr=16000, mono=True) | |
| else: | |
| sr, audio_np = audio_input | |
| if len(audio_np.shape) > 1: audio_np = audio_np.mean(axis=1) | |
| if audio_np.dtype != np.float32: | |
| audio_np = audio_np.astype(np.float32) | |
| if np.abs(audio_np).max() > 1.0: audio_np = audio_np / 32768.0 | |
| if sr != 16000: audio_np = librosa.resample(audio_np, orig_sr=sr, target_sr=16000); sr = 16000 | |
| model, proc = _load_cohere() | |
| t0 = time.time() | |
| inputs = proc(audio_np, sampling_rate=16000, return_tensors="pt", language="ko") | |
| inputs = inputs.to(model.device, dtype=model.dtype) | |
| with torch.no_grad(): | |
| outputs = model.generate(**inputs, max_new_tokens=512) | |
| text = proc.decode(outputs[0], skip_special_tokens=True) | |
| elapsed = time.time() - t0 | |
| return f"[Cohere Transcribe 2B β {elapsed:.1f}μ΄]\n\n{text}" | |
| SAMPLE_DIR = os.path.join(os.path.dirname(__file__), "samples") | |
| SAMPLES = sorted([f for f in os.listdir(SAMPLE_DIR) if f.endswith(('.m4a','.wav'))]) if os.path.isdir(SAMPLE_DIR) else [] | |
| with gr.Blocks(title="Cohere Transcribe ν μ€νΈ") as demo: | |
| gr.Markdown("# Cohere Transcribe 2B β νκ΅μ΄ λ¨λ ν μ€νΈ") | |
| sample_dd = gr.Dropdown(SAMPLES, label="μν", value=SAMPLES[0] if SAMPLES else None) | |
| audio = gr.Audio(label="μ€λμ€", type="filepath") | |
| sample_dd.change(lambda n: os.path.join(SAMPLE_DIR, n) if n else None, [sample_dd], [audio]) | |
| btn = gr.Button("μ μ¬", variant="primary") | |
| out = gr.Textbox(label="κ²°κ³Ό", lines=10) | |
| btn.click(transcribe, [audio], [out]) | |
| demo.launch(show_error=True) | |