vagasions commited on
Commit
d34e167
ยท
verified ยท
1 Parent(s): e964dd9

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. README.md +8 -8
  2. app.py +60 -0
  3. packages.txt +1 -0
  4. requirements.txt +11 -0
README.md CHANGED
@@ -1,13 +1,13 @@
1
  ---
2
- title: Cohere Ko Test
3
- emoji: ๐Ÿข
4
- colorFrom: pink
5
- colorTo: green
6
  sdk: gradio
7
- sdk_version: 6.10.0
8
- python_version: '3.12'
9
  app_file: app.py
10
  pinned: false
 
 
11
  ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Cohere Transcribe Korean Test
3
+ emoji: "๐ŸŽค"
4
+ colorFrom: blue
5
+ colorTo: purple
6
  sdk: gradio
7
+ sdk_version: 5.23.0
8
+ python_version: "3.11"
9
  app_file: app.py
10
  pinned: false
11
+ license: apache-2.0
12
+ suggested_hardware: zero-a10g
13
  ---
 
 
app.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Cohere Transcribe ๋‹จ๋… ํ…Œ์ŠคํŠธ โ€” ๊ฐ€๋ฒผ์šด ๋ฒ„์ „"""
2
+ import os, time, tempfile
3
+ import gradio as gr
4
+ import numpy as np
5
+ import spaces
6
+ import torch
7
+ import soundfile as sf
8
+ import librosa
9
+
10
+ _models = {}
11
+
12
+ def _load_cohere():
13
+ if "cohere" not in _models:
14
+ from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
15
+ print("Loading Cohere Transcribe 2B...")
16
+ _models["proc"] = AutoProcessor.from_pretrained(
17
+ "CohereLabs/cohere-transcribe-03-2026", trust_remote_code=True)
18
+ _models["cohere"] = AutoModelForSpeechSeq2Seq.from_pretrained(
19
+ "CohereLabs/cohere-transcribe-03-2026",
20
+ trust_remote_code=True, torch_dtype=torch.bfloat16, device_map="auto")
21
+ print("Loaded.")
22
+ return _models["cohere"], _models["proc"]
23
+
24
+ @spaces.GPU(duration=60)
25
+ def transcribe(audio_input):
26
+ if audio_input is None:
27
+ return "์˜ค๋””์˜ค ์—†์Œ"
28
+ if isinstance(audio_input, str):
29
+ audio_np, sr = librosa.load(audio_input, sr=16000, mono=True)
30
+ else:
31
+ sr, audio_np = audio_input
32
+ if len(audio_np.shape) > 1: audio_np = audio_np.mean(axis=1)
33
+ if audio_np.dtype != np.float32:
34
+ audio_np = audio_np.astype(np.float32)
35
+ if np.abs(audio_np).max() > 1.0: audio_np = audio_np / 32768.0
36
+ if sr != 16000: audio_np = librosa.resample(audio_np, orig_sr=sr, target_sr=16000); sr = 16000
37
+
38
+ model, proc = _load_cohere()
39
+ t0 = time.time()
40
+ inputs = proc(audio_np, sampling_rate=16000, return_tensors="pt", language="ko")
41
+ inputs = inputs.to(model.device, dtype=model.dtype)
42
+ with torch.no_grad():
43
+ outputs = model.generate(**inputs, max_new_tokens=512)
44
+ text = proc.decode(outputs[0], skip_special_tokens=True)
45
+ elapsed = time.time() - t0
46
+ return f"[Cohere Transcribe 2B โ€” {elapsed:.1f}์ดˆ]\n\n{text}"
47
+
48
+ SAMPLE_DIR = os.path.join(os.path.dirname(__file__), "samples")
49
+ SAMPLES = sorted([f for f in os.listdir(SAMPLE_DIR) if f.endswith(('.m4a','.wav'))]) if os.path.isdir(SAMPLE_DIR) else []
50
+
51
+ with gr.Blocks(title="Cohere Transcribe ํ…Œ์ŠคํŠธ") as demo:
52
+ gr.Markdown("# Cohere Transcribe 2B โ€” ํ•œ๊ตญ์–ด ๋‹จ๋… ํ…Œ์ŠคํŠธ")
53
+ sample_dd = gr.Dropdown(SAMPLES, label="์ƒ˜ํ”Œ", value=SAMPLES[0] if SAMPLES else None)
54
+ audio = gr.Audio(label="์˜ค๋””์˜ค", type="filepath")
55
+ sample_dd.change(lambda n: os.path.join(SAMPLE_DIR, n) if n else None, [sample_dd], [audio])
56
+ btn = gr.Button("์ „์‚ฌ", variant="primary")
57
+ out = gr.Textbox(label="๊ฒฐ๊ณผ", lines=10)
58
+ btn.click(transcribe, [audio], [out])
59
+
60
+ demo.launch(show_error=True)
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ffmpeg
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ transformers>=4.52,!=5.0.*,!=5.1.*
2
+ accelerate>=1.12.0
3
+ huggingface_hub
4
+ torch
5
+ librosa
6
+ soundfile
7
+ spaces
8
+ numpy>=1.24.0
9
+ torchaudio
10
+ sentencepiece
11
+ protobuf