pidgin-whisper / app.py
michaelodafe's picture
Add punctuation + capitalization formatting layer
345b60e verified
"""
Pidgin Whisper — HuggingFace Space demo (dark mode).
Loads `openai/whisper-large-v3-turbo` + the LoRA adapter from
`michaelodafe/whisper-pidgin-v1`, merges in-memory, and transcribes
audio uploaded or recorded by the user.
Targets free-CPU Spaces; latency is ~5–15 s per clip on CPU.
"""
# --- workaround for gradio_client bug ---
# JSON Schema permits `additionalProperties: false/true` (a bool).
# Some gradio_client versions don't handle this, crashing `api_info`
# with `TypeError: argument of type 'bool' is not iterable`. That
# crash bubbles up as "No API found" in the browser. Patch before
# importing gradio so the routes module picks up the fixed helpers.
import gradio_client.utils as _gcu
_orig_json_to_py = _gcu._json_schema_to_python_type
def _safe_json_to_py(schema, defs=None):
if isinstance(schema, bool):
return "Any" if schema else "None"
return _orig_json_to_py(schema, defs)
_gcu._json_schema_to_python_type = _safe_json_to_py
_orig_get_type = _gcu.get_type
def _safe_get_type(schema):
if isinstance(schema, bool):
return "Any" if schema else "None"
return _orig_get_type(schema)
_gcu.get_type = _safe_get_type
# --- end workaround ---
import os
import re
import gradio as gr
import librosa
import numpy as np
import torch
from peft import PeftModel
from transformers import WhisperForConditionalGeneration, WhisperProcessor
BASE = "openai/whisper-large-v3-turbo"
ADAPTER = "michaelodafe/whisper-pidgin-v1"
MAX_SECONDS = 30
# Shared decode helpers (hotword prompt + punctuation/casing formatter).
from decode import INITIAL_PROMPT, format_output
print("Loading processor + base model + adapter...")
processor = WhisperProcessor.from_pretrained(BASE, language="english", task="transcribe")
base = WhisperForConditionalGeneration.from_pretrained(BASE, torch_dtype=torch.float32)
peft_model = PeftModel.from_pretrained(base, ADAPTER)
model = peft_model.merge_and_unload().eval()
model.generation_config.language = "english"
model.generation_config.task = "transcribe"
model.generation_config.forced_decoder_ids = None
model.generation_config.suppress_tokens = []
try:
prompt_ids = processor.get_prompt_ids(INITIAL_PROMPT, return_tensors="pt")
if prompt_ids.dim() > 1:
prompt_ids = prompt_ids.squeeze()
except Exception as e:
print(f"prompt_ids disabled: {e}")
prompt_ids = None
print("Model ready.")
@torch.no_grad()
def transcribe(audio):
if audio is None:
return "Please record or upload a Pidgin audio clip first."
try:
arr, sr = librosa.load(audio, sr=16000, mono=True)
arr = arr.astype("float32")
if len(arr) < 1600:
return "Audio is too short — speak for at least half a second."
if len(arr) > MAX_SECONDS * 16000:
arr = arr[: MAX_SECONDS * 16000]
inputs = processor(arr, sampling_rate=16000, return_tensors="pt")
gen_kwargs = {"max_length": 225}
if prompt_ids is not None:
gen_kwargs["prompt_ids"] = prompt_ids
output_ids = model.generate(inputs.input_features, **gen_kwargs)
text = processor.batch_decode(output_ids, skip_special_tokens=True)[0]
return format_output(text)
except Exception as e:
return f"⚠️ Error: {type(e).__name__}: {e}"
# Dark-mode palette
PAGE_BG = "#0b1220"
CARD_BG = "#141c2e"
CARD_BORDER = "#293548"
INPUT_BG = "#0f172a"
TEXT_PRIMARY = "#f1f5f9"
TEXT_MUTED = "#94a3b8"
NG_GREEN = "#008751" # flag green (kept identical for visual identity)
UI_GREEN = "#22c55e" # brighter for dark contrast
UI_GREEN_HOVER = "#16a34a"
LINK_GREEN = "#4ade80"
WARN_BG = "#3f2a08"
WARN_TEXT = "#fde68a"
WARN_BORDER = "#854d0e"
BADGE_GREEN_BG = "#14532d"
BADGE_GREEN_TEXT = "#86efac"
GRADIO_BADGE_BG = "#7c2d12"
GRADIO_BADGE_TEXT = "#fdba74"
THEME = gr.themes.Soft(
primary_hue="green",
secondary_hue="orange",
neutral_hue="slate",
font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"],
).set(
# backgrounds
body_background_fill=PAGE_BG, body_background_fill_dark=PAGE_BG,
background_fill_primary=CARD_BG, background_fill_primary_dark=CARD_BG,
background_fill_secondary=CARD_BG, background_fill_secondary_dark=CARD_BG,
block_background_fill=CARD_BG, block_background_fill_dark=CARD_BG,
input_background_fill=INPUT_BG, input_background_fill_dark=INPUT_BG,
# text
body_text_color=TEXT_PRIMARY, body_text_color_dark=TEXT_PRIMARY,
body_text_color_subdued=TEXT_MUTED, body_text_color_subdued_dark=TEXT_MUTED,
block_label_text_color=TEXT_PRIMARY, block_label_text_color_dark=TEXT_PRIMARY,
block_title_text_color=TEXT_PRIMARY, block_title_text_color_dark=TEXT_PRIMARY,
# borders
border_color_primary=CARD_BORDER, border_color_primary_dark=CARD_BORDER,
input_border_color=CARD_BORDER, input_border_color_dark=CARD_BORDER,
block_border_color=CARD_BORDER, block_border_color_dark=CARD_BORDER,
# primary button
button_primary_background_fill=UI_GREEN, button_primary_background_fill_dark=UI_GREEN,
button_primary_background_fill_hover=UI_GREEN_HOVER,
button_primary_background_fill_hover_dark=UI_GREEN_HOVER,
button_primary_text_color="white", button_primary_text_color_dark="white",
button_primary_border_color=UI_GREEN, button_primary_border_color_dark=UI_GREEN,
# secondary button
button_secondary_background_fill="#1e293b", button_secondary_background_fill_dark="#1e293b",
button_secondary_text_color=TEXT_PRIMARY, button_secondary_text_color_dark=TEXT_PRIMARY,
button_secondary_border_color=CARD_BORDER, button_secondary_border_color_dark=CARD_BORDER,
# general layout
block_radius="12px",
block_border_width="1px",
)
CSS = f"""
/* Force dark even if user OS prefers light */
:root, html, body {{ color-scheme: dark !important; }}
.gradio-container {{ max-width: 760px !important; margin: 0 auto !important;
padding: 24px 16px 32px !important;
background: {PAGE_BG} !important; }}
/* --- Hero --- */
#pw-hero {{ padding: 8px 4px 24px; }}
#pw-hero-row {{ display: flex; align-items: flex-start; gap: 16px; flex-wrap: wrap; }}
#pw-flag {{ width: 44px; height: 64px; border-radius: 6px; flex-shrink: 0;
background: linear-gradient(to right, {NG_GREEN} 33%, #ffffff 33%, #ffffff 66%, {NG_GREEN} 66%);
box-shadow: 0 2px 6px rgba(0,0,0,0.4); }}
#pw-title-block {{ flex: 1; min-width: 220px; }}
#pw-title-line {{ display: flex; align-items: center; gap: 12px; flex-wrap: wrap; }}
#pw-title {{ font-size: 2.0rem; font-weight: 800; color: {TEXT_PRIMARY}; margin: 0;
letter-spacing: -0.02em; }}
#pw-sub {{ color: {TEXT_MUTED}; font-size: 0.95rem; margin-top: 2px; }}
.pw-badge {{ display: inline-flex; align-items: center; gap: 6px;
padding: 3px 10px; border-radius: 999px;
font-size: 0.78rem; font-weight: 600;
background: {BADGE_GREEN_BG}; color: {BADGE_GREEN_TEXT}; }}
.pw-badge::before {{ content: "●"; font-size: 0.5rem; color: {LINK_GREEN}; }}
#pw-desc {{ color: {TEXT_PRIMARY}; margin-top: 16px; font-size: 0.95rem; line-height: 1.55; opacity: 0.92; }}
#pw-links {{ margin-top: 8px; font-size: 0.9rem; }}
#pw-links a {{ color: {LINK_GREEN}; text-decoration: none; font-weight: 600; }}
#pw-links a:hover {{ text-decoration: underline; }}
#pw-links .sep {{ color: {TEXT_MUTED}; margin: 0 6px; }}
/* --- Cards (wraps audio + textbox) --- */
.pw-card {{ background: {CARD_BG} !important; border: 1px solid {CARD_BORDER} !important;
border-radius: 12px !important;
box-shadow: 0 1px 2px rgba(0, 0, 0, 0.3) !important;
padding: 16px !important; margin-bottom: 16px !important; }}
.pw-card-head {{ display: flex; justify-content: space-between; align-items: center;
margin-bottom: 12px; font-size: 0.95rem; font-weight: 600;
color: {TEXT_PRIMARY}; }}
.pw-card-head .icon {{ margin-right: 6px; }}
/* --- Big primary button --- */
.pw-btn button {{ width: 100% !important; padding: 14px 20px !important;
font-size: 1rem !important; font-weight: 700 !important;
border-radius: 10px !important;
background: {UI_GREEN} !important;
color: white !important;
border: none !important;
box-shadow: 0 1px 3px rgba(34, 197, 94, 0.3) !important; }}
.pw-btn button:hover {{ background: {UI_GREEN_HOVER} !important; }}
/* --- Warning banner --- */
#pw-warn {{ background: {WARN_BG}; color: {WARN_TEXT};
border: 1px solid {WARN_BORDER}; border-radius: 10px;
padding: 10px 14px; font-size: 0.88rem;
display: flex; align-items: center; gap: 10px;
margin-bottom: 20px; }}
/* --- Footer --- */
#pw-footer {{ display: flex; justify-content: space-between; align-items: center;
flex-wrap: wrap; gap: 12px;
margin-top: 18px; padding-top: 18px;
border-top: 1px solid {CARD_BORDER};
color: {TEXT_MUTED}; font-size: 0.85rem; }}
#pw-footer a {{ color: {LINK_GREEN}; text-decoration: none; font-weight: 600; }}
#pw-footer a:hover {{ text-decoration: underline; }}
.pw-gradio-badge {{ background: {GRADIO_BADGE_BG}; color: {GRADIO_BADGE_TEXT};
font-weight: 600;
padding: 3px 10px; border-radius: 999px; font-size: 0.78rem; }}
/* --- Force-dark Gradio internals --- */
.gradio-container, .gradio-container * {{ color-scheme: dark; }}
.gradio-container .gr-textbox textarea,
.gradio-container input[type="text"],
.gradio-container textarea {{ background: {INPUT_BG} !important;
color: {TEXT_PRIMARY} !important;
border-color: {CARD_BORDER} !important; }}
.gradio-container .placeholder,
.gradio-container ::placeholder {{ color: {TEXT_MUTED} !important; opacity: 0.7; }}
/* trim Gradio's default block paddings inside our cards */
.pw-card .gradio-block {{ padding: 0 !important; border: none !important;
background: transparent !important; box-shadow: none !important; }}
"""
# Force `?__theme=dark` so Gradio loads its dark internals (audio waveform colors,
# dropdowns, etc.). Updates URL without a reload; takes effect immediately.
FORCE_DARK_JS = """
() => {
const params = new URLSearchParams(window.location.search);
if (params.get('__theme') !== 'dark') {
params.set('__theme', 'dark');
const newUrl = window.location.pathname + '?' + params.toString();
window.history.replaceState({}, '', newUrl);
}
document.documentElement.classList.add('dark');
document.body && document.body.classList.add('dark');
}
"""
with gr.Blocks(theme=THEME, css=CSS, title="Pidgin Whisper", fill_height=False,
js=FORCE_DARK_JS) as demo:
gr.HTML(
"""
<div id="pw-hero">
<div id="pw-hero-row">
<div id="pw-flag" aria-label="Nigeria flag"></div>
<div id="pw-title-block">
<div id="pw-title-line">
<h1 id="pw-title">Pidgin Whisper</h1>
<span class="pw-badge">Open</span>
</div>
<div id="pw-sub">Nigerian Pidgin English Speech-to-Text</div>
</div>
</div>
<div id="pw-desc">
Speak Pidgin into the mic or upload a clip — the model will transcribe.
Trained on ~8.6&nbsp;h of curated Pidgin audio.
</div>
<div id="pw-links">
<a href="https://huggingface.co/michaelodafe/whisper-pidgin-v1">Model</a>
<span class="sep">·</span>
<a href="https://huggingface.co/datasets/michaelodafe/pidgin-asr-combined">Dataset</a>
<span class="sep">·</span>
<a href="https://github.com/michaelodafe/Naija-Pidgin-Whisper">GitHub</a>
</div>
</div>
"""
)
with gr.Group(elem_classes=["pw-card"]):
gr.HTML(
'<div class="pw-card-head">'
'<span><span class="icon">🎙️</span>Record or upload Pidgin audio (≤30s)</span>'
"</div>"
)
audio_in = gr.Audio(
sources=["microphone", "upload"],
type="filepath",
label="",
show_label=False,
show_download_button=True,
)
btn = gr.Button("✨ Transcribe", variant="primary", size="lg", elem_classes=["pw-btn"])
with gr.Group(elem_classes=["pw-card"]):
gr.HTML(
'<div class="pw-card-head">'
'<span><span class="icon">📋</span>Transcription</span>'
"</div>"
)
out = gr.Textbox(
placeholder="Your Pidgin transcript will appear here…",
label="",
show_label=False,
lines=4,
show_copy_button=True,
)
gr.HTML(
'<div id="pw-warn">ⓘ Free CPU tier — first request after a quiet period takes ~30–60 s.</div>'
)
btn.click(fn=transcribe, inputs=audio_in, outputs=out)
gr.HTML(
"""
<div id="pw-footer">
<div>
Built with 💚 by
<a href="https://huggingface.co/michaelodafe">@michaelodafe</a>
<span style="margin:0 6px; color:#475569;">·</span>
⭐ <a href="https://github.com/michaelodafe/Naija-Pidgin-Whisper">Star on GitHub</a>
</div>
<div>
Built with <span class="pw-gradio-badge">Gradio</span>
</div>
</div>
"""
)
demo.queue().launch(show_api=False)