hjulerm
Update HDC text-to-pictogram space
3da5f3e
import re
import json
import os
import nltk
import gradio as gr
from transformers import pipeline
from nltk.corpus import wordnet
# Ensure WordNet data is available
nltk.download("wordnet", quiet=True)
# ── HDC imports ───────────────────────────────────────────────────────────────
from hdc_text2picto import encode_word, PictogramMemory
# ── ASR model ─────────────────────────────────────────────────────────────────
asr = pipeline(
"automatic-speech-recognition",
model="openai/whisper-tiny.en",
device="cpu",
)
# ── HDC: build prototype memory from cached core pictograms ───────────────────
#
# At startup we load the locally cached ARASAAC core vocabulary JSON and build
# a PictogramMemory by encoding every keyword (+ WordNet synonyms) for each
# pictogram. This replaces the per-word ARASAAC API call at inference time:
# retrieval is entirely local and offline after startup.
SYNSET_SUFFIX_TO_WN = {
"n": wordnet.NOUN, "v": wordnet.VERB,
"a": wordnet.ADJ, "s": wordnet.ADJ, "r": wordnet.ADV,
}
SYNSET_SUFFIX_TO_POS = {"n": "NOUN", "v": "VERB", "a": "ADJ", "s": "ADJ", "r": "ADV"}
CONFIDENCE_THRESHOLD = 0.0 # always return nearest pictogram; badge shows confidence
def pos_from_synsets(synsets: list[str]) -> str:
if synsets:
return SYNSET_SUFFIX_TO_POS.get(synsets[0].split("-")[-1], "OTHER")
return "OTHER"
def get_synonyms(keyword: str, synsets: list[str]) -> list[str]:
wn_pos = SYNSET_SUFFIX_TO_WN.get(synsets[0].split("-")[-1]) if synsets else None
synonyms = set()
for ss in wordnet.synsets(keyword, pos=wn_pos):
for lemma in ss.lemmas():
syn = lemma.name().replace("_", " ").lower()
if syn != keyword.lower():
synonyms.add(syn)
return list(synonyms)
def build_memory(core_pictos: list[dict]) -> PictogramMemory:
"""Encode all core pictogram keywords (+ WordNet synonyms) into prototypes."""
memory = PictogramMemory()
for p in core_pictos:
pid = p["_id"]
synsets = p.get("synsets", [])
keywords = [kw for kw in p.get("keywords", []) if kw.get("keyword")]
if not keywords:
continue
label = keywords[0]["keyword"]
pos = pos_from_synsets(synsets)
# Encode using pos="OTHER" and synsets=[] to match inference-time encoding,
# where POS and synsets are unknown. This ensures training and inference
# composites are built the same way, so cosine similarity is meaningful.
seen = set()
for kw in keywords:
word = kw["keyword"]
if word.lower() not in seen:
seen.add(word.lower())
memory.add(pid, encode_word(word, "OTHER", "NONE", []), label)
# WordNet synonym injection (encoded the same way)
for syn in get_synonyms(word, synsets):
if syn not in seen:
seen.add(syn)
memory.add(pid, encode_word(syn, "OTHER", "NONE", []), label)
memory.build()
return memory
print("Building HDC prototype memory from core vocabulary...")
_cache_path = os.path.join(os.path.dirname(__file__), "core_pictograms.json")
with open(_cache_path) as f:
_core_pictos = json.load(f)
memory = build_memory(_core_pictos)
print(f" Ready β€” {len(memory.protos)} pictogram prototypes loaded.")
# ── HDC lookup ────────────────────────────────────────────────────────────────
def hdc_lookup(word: str) -> tuple[int | None, float, str]:
"""
Encode a word as an HDC composite vector and retrieve the nearest pictogram
prototype. POS and synsets are unknown at inference time so we use defaults;
the semantic content from the GloVe embedding carries most of the signal.
Returns (picto_id, similarity, label) or (None, 0.0, "") if below threshold.
"""
query_hv = encode_word(word, pos="OTHER", ner="NONE", synsets=[])
results = memory.retrieve(query_hv, top_k=1)
pid, label, sim = results[0]
if sim >= CONFIDENCE_THRESHOLD:
return pid, sim, label
return None, sim, ""
# ── Image URL ─────────────────────────────────────────────────────────────────
def picto_url(picto_id: int, size: int = 500) -> str:
return f"https://static.arasaac.org/pictograms/{picto_id}/{picto_id}_{size}.png"
# ── Tokeniser ─────────────────────────────────────────────────────────────────
def tokenize(text: str) -> list[str]:
return [re.sub(r"[^\w'-]", "", tok) for tok in text.split() if re.sub(r"[^\w'-]", "", tok)]
# ── Render pictograms ─────────────────────────────────────────────────────────
def render_pictos(text: str) -> str:
if not text or not text.strip():
return "<p style='color:gray;text-align:center;padding:20px;'>No text to display.</p>"
cards = []
for word in tokenize(text):
picto_id, sim, label = hdc_lookup(word)
if picto_id:
img = (
f'<img src="{picto_url(picto_id)}" alt="{word}" title="{label} (sim={sim:.2f})" '
f'style="width:110px;height:110px;object-fit:contain;">'
)
# Similarity badge: green if confident, orange if marginal
badge_color = "#4caf50" if sim >= 0.15 else "#ff9800"
badge = (
f'<span style="font-size:0.7rem;background:{badge_color};color:white;'
f'border-radius:4px;padding:1px 4px;">{sim:.2f}</span>'
)
label_style = "font-size:0.85rem;margin-top:4px;word-break:break-word;font-weight:600;"
label_html = f'<p style="{label_style}">{word}</p>{badge}'
else:
img = (
'<div style="width:110px;height:110px;background:#f0f0f0;border-radius:8px;'
'display:flex;align-items:center;justify-content:center;font-size:2rem;color:#bbb;">?</div>'
)
label_style = "font-size:0.85rem;margin-top:4px;word-break:break-word;color:#aaa;"
label_html = f'<p style="{label_style}">{word}</p>'
cards.append(
f'<div style="display:flex;flex-direction:column;align-items:center;width:130px;'
f'padding:8px;background:white;border-radius:10px;box-shadow:0 1px 4px rgba(0,0,0,0.1);">'
f'{img}{label_html}</div>'
)
return (
'<div style="display:flex;flex-wrap:wrap;gap:12px;justify-content:center;'
'padding:20px;background:#f5f5f5;border-radius:12px;">'
+ "".join(cards) + "</div>"
)
# ── Processing functions ──────────────────────────────────────────────────────
def process_audio(audio_path):
if audio_path is None:
return "", "<p style='color:gray;text-align:center;padding:20px;'>No audio provided.</p>"
result = asr(audio_path)
text = result["text"].strip()
return text, render_pictos(text)
def process_text(text):
return render_pictos(text)
# ── Gradio UI ─────────────────────────────────────────────────────────────────
with gr.Blocks(title="Speech/Text β†’ ARASAAC Pictograms (HDC)") as demo:
gr.Markdown(
"""
# 🧠 Speech / Text β†’ ARASAAC Pictograms (HDC)
Convert spoken or written English into ARASAAC pictograms using
**Hyperdimensional Computing** for offline, semantic word-to-pictogram retrieval.
Uses [Whisper tiny](https://huggingface.co/openai/whisper-tiny.en) for speech recognition.
Pictogram lookup uses HDC prototype memory built from ~855 core vocabulary pictograms
and WordNet synonym injection β€” no API call per word at inference time.
The similarity score badge on each card shows retrieval confidence
(🟒 β‰₯ 0.15 Β· 🟠 < 0.15 Β· **?** below threshold).
"""
)
with gr.Tab("🎀 Audio"):
audio_input = gr.Audio(
sources=["microphone", "upload"],
type="filepath",
label="Record or upload audio (.wav)",
)
transcribe_btn = gr.Button("Transcribe & Generate Pictograms", variant="primary")
transcribed_box = gr.Textbox(
label="Transcribed text (editable β€” press Enter to regenerate pictograms)",
lines=2,
interactive=True,
)
audio_picto_output = gr.HTML()
transcribe_btn.click(
fn=process_audio,
inputs=audio_input,
outputs=[transcribed_box, audio_picto_output],
)
transcribed_box.submit(
fn=process_text,
inputs=transcribed_box,
outputs=audio_picto_output,
)
with gr.Tab("✍️ Text"):
text_input = gr.Textbox(
label="Input text",
placeholder="e.g. I want to eat an apple",
lines=2,
)
text_btn = gr.Button("Generate Pictograms", variant="primary")
text_picto_output = gr.HTML()
text_btn.click(fn=process_text, inputs=text_input, outputs=text_picto_output)
text_input.submit(fn=process_text, inputs=text_input, outputs=text_picto_output)
if __name__ == "__main__":
demo.launch(theme=gr.themes.Soft())