Spaces:
Sleeping
Sleeping
| import re | |
| import json | |
| import os | |
| import nltk | |
| import gradio as gr | |
| from transformers import pipeline | |
| from nltk.corpus import wordnet | |
| # Ensure WordNet data is available | |
| nltk.download("wordnet", quiet=True) | |
| # ββ HDC imports βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| from hdc_text2picto import encode_word, PictogramMemory | |
| # ββ ASR model βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| asr = pipeline( | |
| "automatic-speech-recognition", | |
| model="openai/whisper-tiny.en", | |
| device="cpu", | |
| ) | |
| # ββ HDC: build prototype memory from cached core pictograms βββββββββββββββββββ | |
| # | |
| # At startup we load the locally cached ARASAAC core vocabulary JSON and build | |
| # a PictogramMemory by encoding every keyword (+ WordNet synonyms) for each | |
| # pictogram. This replaces the per-word ARASAAC API call at inference time: | |
| # retrieval is entirely local and offline after startup. | |
| SYNSET_SUFFIX_TO_WN = { | |
| "n": wordnet.NOUN, "v": wordnet.VERB, | |
| "a": wordnet.ADJ, "s": wordnet.ADJ, "r": wordnet.ADV, | |
| } | |
| SYNSET_SUFFIX_TO_POS = {"n": "NOUN", "v": "VERB", "a": "ADJ", "s": "ADJ", "r": "ADV"} | |
| CONFIDENCE_THRESHOLD = 0.0 # always return nearest pictogram; badge shows confidence | |
| def pos_from_synsets(synsets: list[str]) -> str: | |
| if synsets: | |
| return SYNSET_SUFFIX_TO_POS.get(synsets[0].split("-")[-1], "OTHER") | |
| return "OTHER" | |
| def get_synonyms(keyword: str, synsets: list[str]) -> list[str]: | |
| wn_pos = SYNSET_SUFFIX_TO_WN.get(synsets[0].split("-")[-1]) if synsets else None | |
| synonyms = set() | |
| for ss in wordnet.synsets(keyword, pos=wn_pos): | |
| for lemma in ss.lemmas(): | |
| syn = lemma.name().replace("_", " ").lower() | |
| if syn != keyword.lower(): | |
| synonyms.add(syn) | |
| return list(synonyms) | |
| def build_memory(core_pictos: list[dict]) -> PictogramMemory: | |
| """Encode all core pictogram keywords (+ WordNet synonyms) into prototypes.""" | |
| memory = PictogramMemory() | |
| for p in core_pictos: | |
| pid = p["_id"] | |
| synsets = p.get("synsets", []) | |
| keywords = [kw for kw in p.get("keywords", []) if kw.get("keyword")] | |
| if not keywords: | |
| continue | |
| label = keywords[0]["keyword"] | |
| pos = pos_from_synsets(synsets) | |
| # Encode using pos="OTHER" and synsets=[] to match inference-time encoding, | |
| # where POS and synsets are unknown. This ensures training and inference | |
| # composites are built the same way, so cosine similarity is meaningful. | |
| seen = set() | |
| for kw in keywords: | |
| word = kw["keyword"] | |
| if word.lower() not in seen: | |
| seen.add(word.lower()) | |
| memory.add(pid, encode_word(word, "OTHER", "NONE", []), label) | |
| # WordNet synonym injection (encoded the same way) | |
| for syn in get_synonyms(word, synsets): | |
| if syn not in seen: | |
| seen.add(syn) | |
| memory.add(pid, encode_word(syn, "OTHER", "NONE", []), label) | |
| memory.build() | |
| return memory | |
| print("Building HDC prototype memory from core vocabulary...") | |
| _cache_path = os.path.join(os.path.dirname(__file__), "core_pictograms.json") | |
| with open(_cache_path) as f: | |
| _core_pictos = json.load(f) | |
| memory = build_memory(_core_pictos) | |
| print(f" Ready β {len(memory.protos)} pictogram prototypes loaded.") | |
| # ββ HDC lookup ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def hdc_lookup(word: str) -> tuple[int | None, float, str]: | |
| """ | |
| Encode a word as an HDC composite vector and retrieve the nearest pictogram | |
| prototype. POS and synsets are unknown at inference time so we use defaults; | |
| the semantic content from the GloVe embedding carries most of the signal. | |
| Returns (picto_id, similarity, label) or (None, 0.0, "") if below threshold. | |
| """ | |
| query_hv = encode_word(word, pos="OTHER", ner="NONE", synsets=[]) | |
| results = memory.retrieve(query_hv, top_k=1) | |
| pid, label, sim = results[0] | |
| if sim >= CONFIDENCE_THRESHOLD: | |
| return pid, sim, label | |
| return None, sim, "" | |
| # ββ Image URL βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def picto_url(picto_id: int, size: int = 500) -> str: | |
| return f"https://static.arasaac.org/pictograms/{picto_id}/{picto_id}_{size}.png" | |
| # ββ Tokeniser βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def tokenize(text: str) -> list[str]: | |
| return [re.sub(r"[^\w'-]", "", tok) for tok in text.split() if re.sub(r"[^\w'-]", "", tok)] | |
| # ββ Render pictograms βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def render_pictos(text: str) -> str: | |
| if not text or not text.strip(): | |
| return "<p style='color:gray;text-align:center;padding:20px;'>No text to display.</p>" | |
| cards = [] | |
| for word in tokenize(text): | |
| picto_id, sim, label = hdc_lookup(word) | |
| if picto_id: | |
| img = ( | |
| f'<img src="{picto_url(picto_id)}" alt="{word}" title="{label} (sim={sim:.2f})" ' | |
| f'style="width:110px;height:110px;object-fit:contain;">' | |
| ) | |
| # Similarity badge: green if confident, orange if marginal | |
| badge_color = "#4caf50" if sim >= 0.15 else "#ff9800" | |
| badge = ( | |
| f'<span style="font-size:0.7rem;background:{badge_color};color:white;' | |
| f'border-radius:4px;padding:1px 4px;">{sim:.2f}</span>' | |
| ) | |
| label_style = "font-size:0.85rem;margin-top:4px;word-break:break-word;font-weight:600;" | |
| label_html = f'<p style="{label_style}">{word}</p>{badge}' | |
| else: | |
| img = ( | |
| '<div style="width:110px;height:110px;background:#f0f0f0;border-radius:8px;' | |
| 'display:flex;align-items:center;justify-content:center;font-size:2rem;color:#bbb;">?</div>' | |
| ) | |
| label_style = "font-size:0.85rem;margin-top:4px;word-break:break-word;color:#aaa;" | |
| label_html = f'<p style="{label_style}">{word}</p>' | |
| cards.append( | |
| f'<div style="display:flex;flex-direction:column;align-items:center;width:130px;' | |
| f'padding:8px;background:white;border-radius:10px;box-shadow:0 1px 4px rgba(0,0,0,0.1);">' | |
| f'{img}{label_html}</div>' | |
| ) | |
| return ( | |
| '<div style="display:flex;flex-wrap:wrap;gap:12px;justify-content:center;' | |
| 'padding:20px;background:#f5f5f5;border-radius:12px;">' | |
| + "".join(cards) + "</div>" | |
| ) | |
| # ββ Processing functions ββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def process_audio(audio_path): | |
| if audio_path is None: | |
| return "", "<p style='color:gray;text-align:center;padding:20px;'>No audio provided.</p>" | |
| result = asr(audio_path) | |
| text = result["text"].strip() | |
| return text, render_pictos(text) | |
| def process_text(text): | |
| return render_pictos(text) | |
| # ββ Gradio UI βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Blocks(title="Speech/Text β ARASAAC Pictograms (HDC)") as demo: | |
| gr.Markdown( | |
| """ | |
| # π§ Speech / Text β ARASAAC Pictograms (HDC) | |
| Convert spoken or written English into ARASAAC pictograms using | |
| **Hyperdimensional Computing** for offline, semantic word-to-pictogram retrieval. | |
| Uses [Whisper tiny](https://huggingface.co/openai/whisper-tiny.en) for speech recognition. | |
| Pictogram lookup uses HDC prototype memory built from ~855 core vocabulary pictograms | |
| and WordNet synonym injection β no API call per word at inference time. | |
| The similarity score badge on each card shows retrieval confidence | |
| (π’ β₯ 0.15 Β· π < 0.15 Β· **?** below threshold). | |
| """ | |
| ) | |
| with gr.Tab("π€ Audio"): | |
| audio_input = gr.Audio( | |
| sources=["microphone", "upload"], | |
| type="filepath", | |
| label="Record or upload audio (.wav)", | |
| ) | |
| transcribe_btn = gr.Button("Transcribe & Generate Pictograms", variant="primary") | |
| transcribed_box = gr.Textbox( | |
| label="Transcribed text (editable β press Enter to regenerate pictograms)", | |
| lines=2, | |
| interactive=True, | |
| ) | |
| audio_picto_output = gr.HTML() | |
| transcribe_btn.click( | |
| fn=process_audio, | |
| inputs=audio_input, | |
| outputs=[transcribed_box, audio_picto_output], | |
| ) | |
| transcribed_box.submit( | |
| fn=process_text, | |
| inputs=transcribed_box, | |
| outputs=audio_picto_output, | |
| ) | |
| with gr.Tab("βοΈ Text"): | |
| text_input = gr.Textbox( | |
| label="Input text", | |
| placeholder="e.g. I want to eat an apple", | |
| lines=2, | |
| ) | |
| text_btn = gr.Button("Generate Pictograms", variant="primary") | |
| text_picto_output = gr.HTML() | |
| text_btn.click(fn=process_text, inputs=text_input, outputs=text_picto_output) | |
| text_input.submit(fn=process_text, inputs=text_input, outputs=text_picto_output) | |
| if __name__ == "__main__": | |
| demo.launch(theme=gr.themes.Soft()) |