Spaces:

vagasions
/

cohere-ko-test

Sleeping

App Files Files Community

vagasions commited on 23 days ago

Commit

d34e167

verified ·

1 Parent(s): e964dd9

Upload folder using huggingface_hub

Browse files

Files changed (4) hide show

README.md +8 -8
app.py +60 -0
packages.txt +1 -0
requirements.txt +11 -0

README.md CHANGED Viewed

@@ -1,13 +1,13 @@
 ---
-title: Cohere Ko Test
-emoji: 🏢
-colorFrom: pink
-colorTo: green
 sdk: gradio
-sdk_version: 6.10.0
-python_version: '3.12'
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Cohere Transcribe Korean Test
+emoji: "🎤"
+colorFrom: blue
+colorTo: purple
 sdk: gradio
+sdk_version: 5.23.0
+python_version: "3.11"
 app_file: app.py
 pinned: false
+license: apache-2.0
+suggested_hardware: zero-a10g
 ---

app.py ADDED Viewed

	@@ -0,0 +1,60 @@

+"""Cohere Transcribe 단독 테스트 — 가벼운 버전"""
+import os, time, tempfile
+import gradio as gr
+import numpy as np
+import spaces
+import torch
+import soundfile as sf
+import librosa
+_models = {}
+def _load_cohere():
+    if "cohere" not in _models:
+        from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
+        print("Loading Cohere Transcribe 2B...")
+        _models["proc"] = AutoProcessor.from_pretrained(
+            "CohereLabs/cohere-transcribe-03-2026", trust_remote_code=True)
+        _models["cohere"] = AutoModelForSpeechSeq2Seq.from_pretrained(
+            "CohereLabs/cohere-transcribe-03-2026",
+            trust_remote_code=True, torch_dtype=torch.bfloat16, device_map="auto")
+        print("Loaded.")
+    return _models["cohere"], _models["proc"]
+@spaces.GPU(duration=60)
+def transcribe(audio_input):
+    if audio_input is None:
+        return "오디오 없음"
+    if isinstance(audio_input, str):
+        audio_np, sr = librosa.load(audio_input, sr=16000, mono=True)
+    else:
+        sr, audio_np = audio_input
+        if len(audio_np.shape) > 1: audio_np = audio_np.mean(axis=1)
+        if audio_np.dtype != np.float32:
+            audio_np = audio_np.astype(np.float32)
+            if np.abs(audio_np).max() > 1.0: audio_np = audio_np / 32768.0
+        if sr != 16000: audio_np = librosa.resample(audio_np, orig_sr=sr, target_sr=16000); sr = 16000
+    model, proc = _load_cohere()
+    t0 = time.time()
+    inputs = proc(audio_np, sampling_rate=16000, return_tensors="pt", language="ko")
+    inputs = inputs.to(model.device, dtype=model.dtype)
+    with torch.no_grad():
+        outputs = model.generate(**inputs, max_new_tokens=512)
+    text = proc.decode(outputs[0], skip_special_tokens=True)
+    elapsed = time.time() - t0
+    return f"[Cohere Transcribe 2B — {elapsed:.1f}초]\n\n{text}"
+SAMPLE_DIR = os.path.join(os.path.dirname(__file__), "samples")
+SAMPLES = sorted([f for f in os.listdir(SAMPLE_DIR) if f.endswith(('.m4a','.wav'))]) if os.path.isdir(SAMPLE_DIR) else []
+with gr.Blocks(title="Cohere Transcribe 테스트") as demo:
+    gr.Markdown("# Cohere Transcribe 2B — 한국어 단독 테스트")
+    sample_dd = gr.Dropdown(SAMPLES, label="샘플", value=SAMPLES[0] if SAMPLES else None)
+    audio = gr.Audio(label="오디오", type="filepath")
+    sample_dd.change(lambda n: os.path.join(SAMPLE_DIR, n) if n else None, [sample_dd], [audio])
+    btn = gr.Button("전사", variant="primary")
+    out = gr.Textbox(label="결과", lines=10)
+    btn.click(transcribe, [audio], [out])
+demo.launch(show_error=True)

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ ffmpeg

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+transformers>=4.52,!=5.0.*,!=5.1.*
+accelerate>=1.12.0
+huggingface_hub
+torch
+librosa
+soundfile
+spaces
+numpy>=1.24.0
+torchaudio
+sentencepiece
+protobuf