Spaces:
Runtime error
Runtime error
| # app.py β SpellTalk (polished UI for HF Spaces) | |
| import os | |
| os.environ["YOLO_CONFIG_DIR"] = "/tmp/Ultralytics" # writable dir on Spaces | |
| import time, string | |
| from collections import deque, Counter | |
| import cv2 | |
| import numpy as np | |
| import gradio as gr | |
| from ultralytics import YOLO | |
| from huggingface_hub import hf_hub_download | |
| # ---- FastRTC WebRTC component | |
| from fastrtc import WebRTC | |
| # ================== Model ================== | |
| weights_path = hf_hub_download("atalaydenknalbant/asl-yolo-models", filename="yolo11n.pt") | |
| model = YOLO(weights_path) | |
| id2label = model.names | |
| VALID = {lbl.upper() for lbl in id2label.values() if len(lbl) == 1 and lbl in string.ascii_letters} | |
| # ================== App State (globals) ================== | |
| CONF_THRES = 0.35 | |
| HIST_LEN = 9 # history window (frames) | |
| COMMIT_STREAK = 5 # frames of agreement to lock a letter | |
| WORD_BREAK_SECONDS = 1.0 | |
| history = deque(maxlen=HIST_LEN) | |
| current_word, sentence = [], [] | |
| last_commit_time = time.time() | |
| HELP_CHART_URL = "https://commons.wikimedia.org/wiki/Special:FilePath/Asl_alphabet_gallaudet_ann.png" | |
| # ================== Helpers ================== | |
| def most_common(q): | |
| items = [x for x in q if x is not None] | |
| if not items: | |
| return None, 0 | |
| c = Counter(items).most_common(1)[0] | |
| return c[0], c[1] | |
| def reset_word(): | |
| global current_word, history | |
| current_word = [] | |
| history.clear() | |
| def reset_sentence(): | |
| global sentence, current_word, history | |
| sentence = [] | |
| current_word = [] | |
| history.clear() | |
| # ================== Video Frame Handler ================== | |
| def process_frame(frame: np.ndarray): | |
| """ | |
| FastRTC handler: receives RGB (H, W, 3); returns annotated RGB. | |
| Keeps running state in globals so words persist across frames. | |
| """ | |
| global history, current_word, sentence, last_commit_time | |
| if frame is None: | |
| return None | |
| # mirror like a selfie camera | |
| frame = cv2.flip(frame, 1) | |
| # run detection | |
| try: | |
| res = model.predict(frame, verbose=False, conf=CONF_THRES) | |
| except Exception as e: | |
| cv2.putText(frame, f"Model error: {e}", (10, 30), | |
| cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2) | |
| return frame | |
| letter, conf = None, 0.0 | |
| if res and len(res[0].boxes) > 0: | |
| scores = res[0].boxes.conf.cpu().numpy() | |
| k = scores.argmax() | |
| idx = int(res[0].boxes.cls[k].item()) | |
| conf = float(scores[k]) | |
| cand = id2label.get(idx, "").upper() | |
| if cand in VALID: | |
| letter = cand | |
| history.append(letter) | |
| top, count = most_common(history) | |
| now = time.time() | |
| if top and count >= COMMIT_STREAK: | |
| current_word.append(top) | |
| history.clear() | |
| last_commit_time = now | |
| if (top is None) and (now - last_commit_time > WORD_BREAK_SECONDS) and current_word: | |
| sentence.append("".join(current_word)) | |
| current_word = [] | |
| last_commit_time = now | |
| # ---------- overlay ---------- | |
| h, w = frame.shape[:2] | |
| overlay = frame.copy() | |
| cv2.rectangle(overlay, (0, 0), (w, 120), (0, 0, 0), -1) # header bar | |
| frame = cv2.addWeighted(overlay, 0.55, frame, 0.45, 0) | |
| label = top if top else "β" | |
| cv2.putText(frame, f"Letter: {label} (stability {count}/{COMMIT_STREAK})", | |
| (16, 64), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 255, 255), 2) | |
| pct = int((w - 32) * (min(1.0, count/COMMIT_STREAK) if top else 0.0)) | |
| cv2.rectangle(frame, (16, 74), (16 + pct, 84), (255, 255, 255), -1) | |
| word_str = "".join(current_word) | |
| sent_str = " ".join(sentence[-12:]) if sentence else "" | |
| cv2.putText(frame, f"Word: {word_str} | Sentence: {sent_str}", | |
| (16, 104), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 1) | |
| return frame | |
| # ================== Gradio UI ================== | |
| THEME = gr.themes.Soft( | |
| primary_hue="blue", | |
| neutral_hue="slate" | |
| ) | |
| CSS = """ | |
| /* container width and centering */ | |
| #app {max-width: 980px; margin: 0 auto;} | |
| /* sticky title */ | |
| .sticky-head {position: sticky; top: 0; z-index: 5; backdrop-filter: blur(6px);} | |
| .sticky-head > * {margin: 0 !important;} | |
| /* webcam panel */ | |
| .webrtc-wrap {display:flex; justify-content:center;} | |
| .webrtc-wrap .wrap {width: 100%;} | |
| /* shrink the help chart */ | |
| .help-chart img {max-height: 360px; width: auto;} | |
| /* tighter accordions */ | |
| .gr-accordion .label {font-weight: 600;} | |
| /* mobile niceties */ | |
| @media (max-width: 768px){ | |
| .help-chart img {max-height: 240px;} | |
| } | |
| """ | |
| with gr.Blocks(title="SpellTalk β ASL fingerspelling (live)", theme=THEME, css=CSS, elem_id="app") as demo: | |
| with gr.Column(elem_classes="sticky-head"): | |
| gr.Markdown("## ποΈ SpellTalk β ASL fingerspelling β words (live stream)") | |
| gr.Markdown( | |
| "Click **Start camera** below. Hold one hand in frame, shoulder-high, palm per the chart. " | |
| "When a letter is stable, it locks into the **Word**. Pause briefly to end a word." | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| with gr.Group(elem_classes="webrtc-wrap"): | |
| webrtc = WebRTC(label="Webcam (live)", mode="send-receive", modality="video") | |
| with gr.Row(): | |
| clear_word = gr.Button("Clear word") | |
| clear_sent = gr.Button("Clear sentence") | |
| with gr.Accordion("Settings", open=False): | |
| th = gr.Slider(0.05, 0.9, value=CONF_THRES, step=0.05, label="Confidence threshold") | |
| hist = gr.Slider(3, 21, value=HIST_LEN, step=1, label="History length (frames)") | |
| streak = gr.Slider(2, 12, value=COMMIT_STREAK, step=1, label="Stability to lock (frames)") | |
| pause = gr.Slider(0.3, 2.5, value=WORD_BREAK_SECONDS, step=0.1, label="Pause β new word (seconds)") | |
| with gr.Column(scale=2): | |
| with gr.Group(): | |
| gr.Markdown("**ASL alphabet (reference)**") | |
| gr.Image(value=HELP_CHART_URL, show_label=False, elem_classes="help-chart", interactive=False) | |
| gr.Markdown( | |
| "Tips:\n" | |
| "- Good, even lighting; plain background.\n" | |
| "- Keep hand within the frame; avoid fast motion.\n" | |
| "- Letters like **J**/**Z** need motion; this model is tuned mostly for AβY." | |
| ) | |
| # Wire up runtime updates for global settings | |
| def _set_conf(v): | |
| global CONF_THRES; CONF_THRES = float(v) | |
| def _set_hist(v): | |
| global HIST_LEN, history; HIST_LEN = int(v); history = deque(history, maxlen=HIST_LEN) | |
| def _set_streak(v): | |
| global COMMIT_STREAK; COMMIT_STREAK = int(v) | |
| def _set_pause(v): | |
| global WORD_BREAK_SECONDS; WORD_BREAK_SECONDS = float(v) | |
| th.change(_set_conf, th, None) | |
| hist.change(_set_hist, hist, None) | |
| streak.change(_set_streak, streak, None) | |
| pause.change(_set_pause, pause, None) | |
| clear_word.click(lambda: reset_word(), None, None) | |
| clear_sent.click(lambda: reset_sentence(), None, None) | |
| # Stream hookup (10 min per session keeps Spaces happy) | |
| webrtc.stream(fn=process_frame, inputs=[webrtc], outputs=[webrtc], time_limit=600) | |
| if __name__ == "__main__": | |
| demo.queue() | |
| demo.launch() | |