cdr6934's picture
Added synthesized voice
22eff7f
Raw
History Blame Contribute Delete
8.88 kB
"""Nemotron 3 Nano 4B inference for transforming wishes into poetic sayings."""
from __future__ import annotations
import os
import random
import re
try:
import spaces # ZeroGPU runtime; absent in local dev.
except ImportError:
class _SpacesShim:
@staticmethod
def GPU(*args, **kwargs):
if args and callable(args[0]):
return args[0]
def deco(fn):
return fn
return deco
spaces = _SpacesShim()
MODEL_ID = os.environ.get(
"MODEL_ID", "Qwen/Qwen2.5-1.5B-Instruct"
)
# Local-dev fallback so the UI can be exercised without CUDA/FP8.
# Hard-disabled on HF Spaces (SPACE_ID is set there) so prod always uses the real model.
STUB_MODEL = os.environ.get("STUB_MODEL") == "1" and not os.environ.get("SPACE_ID")
STUB_SAYINGS = [
"Even the smallest light remembers it once was a wish.",
"Hope travels farther than any star — it crosses the dark and arrives whole.",
"What you whisper to the night, the night whispers to the morning.",
"Every constellation began as someone daring to wish out loud.",
"The cosmos keeps your hope safe in a quiet pocket of starlight.",
]
SYSTEM_PROMPT = (
"You are a wise cosmic oracle who speaks in brief, luminous poetry. "
"When given someone's wish or hope, respond with a single poetic saying "
"of one or two sentences. Be warm, timeless, and uplifting. "
"Reply with only the saying — no preamble, labels, or quotation marks."
)
CATEGORIES = ("SHAPE", "BOON", "JOURNEY", "BOND", "TRIBUTE")
REVIEW_PROMPT = (
"You are a thoughtful gatekeeper for a public 'wishes among the stars' constellation.\n"
"Do two things at once: judge the wish and classify its theme.\n\n"
"Judgement is one of:\n"
"- POSITIVE: a sincere hope, dream, intention, or aspiration suitable to share publicly.\n"
"- NEGATIVE: contains vulgarity, hatred, cruelty, wishes for harm, or anything unethical.\n"
"- NONSENSE: random characters, gibberish, or not a coherent wish.\n\n"
"Theme is one of:\n"
"- SHAPE: self-change, personal growth, becoming someone different.\n"
"- BOON: gifts, possessions, gear, money, material things.\n"
"- JOURNEY: travel, going somewhere, exploration.\n"
"- BOND: relationships, family, friends, love, connection with others.\n"
"- TRIBUTE: giving to others, sacrifice, service, helping someone else.\n\n"
"Respond on a SINGLE line in EXACTLY one of these formats, with no extra words:\n"
"POSITIVE | <THEME>\n"
"NEGATIVE | <THEME> | <a sincere, kind-hearted rewrite of the same underlying hope, one sentence>\n"
"NONSENSE"
)
_tokenizer = None
_model = None
_tts_pipeline = None
def _load_model() -> tuple:
global _tokenizer, _model
if _model is not None and _tokenizer is not None:
return _tokenizer, _model
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
_tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
_model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
trust_remote_code=True,
device_map="auto" if torch.cuda.is_available() else None,
)
if not torch.cuda.is_available():
_model = _model.to("cpu")
return _tokenizer, _model
def _strip_reasoning(text: str) -> str:
"""Remove Nemotron thinking traces if they appear in the output."""
think_open = "<" + "think" + ">"
think_close = "</" + "think" + ">"
text = re.sub(re.escape(think_open) + r".*?" + re.escape(think_close), "", text, flags=re.DOTALL | re.IGNORECASE)
text = re.sub(r"<reasoning>.*?</reasoning>", "", text, flags=re.DOTALL | re.IGNORECASE)
return text.strip()
def _coerce_category(raw: str) -> str:
raw = raw.strip().upper()
for cat in CATEGORIES:
if raw.startswith(cat):
return cat
return "SHAPE"
@spaces.GPU(duration=60)
def review_wish(wish: str) -> dict:
"""Moderate a wish AND classify its theme in one model call.
Returns {"kind": "ok"|"rewrite"|"nonsense",
"rewrite": str|None,
"category": str|None}.
"""
wish = wish.strip()
if not wish:
return {"kind": "nonsense", "rewrite": None, "category": None}
if STUB_MODEL:
return {"kind": "ok", "rewrite": None, "category": "SHAPE"}
import torch
tokenizer, model = _load_model()
messages = [
{"role": "system", "content": REVIEW_PROMPT},
{"role": "user", "content": f'Wish: "{wish}"'},
]
inputs = tokenizer.apply_chat_template(
messages,
tokenize=True,
add_generation_prompt=True,
return_tensors="pt",
return_dict=True,
).to(model.device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=100,
do_sample=False,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.eos_token_id,
)
text = tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
text = _strip_reasoning(text).strip()
print(f"[review_wish] wish={wish!r} raw={text!r}", flush=True)
upper = text.upper()
if "NONSENSE" in upper:
return {"kind": "nonsense", "rewrite": None, "category": None}
# Find the line that actually carries the verdict, even if the model adds preamble.
verdict_line = ""
for line in text.splitlines():
line_upper = line.upper()
if "POSITIVE" in line_upper or "NEGATIVE" in line_upper:
verdict_line = line
break
parts = [p.strip() for p in verdict_line.split("|")] if verdict_line else []
verdict = parts[0].upper() if parts else ""
category = _coerce_category(parts[1]) if len(parts) > 1 else "SHAPE"
if "NEGATIVE" in verdict:
rewrite = parts[2].strip().strip('"').strip("'") if len(parts) > 2 else ""
if not rewrite:
return {"kind": "ok", "rewrite": None, "category": category}
return {"kind": "rewrite", "rewrite": rewrite, "category": category}
return {"kind": "ok", "rewrite": None, "category": category}
def _load_tts():
global _tts_pipeline
if _tts_pipeline is not None:
return _tts_pipeline
from kokoro import KPipeline
_tts_pipeline = KPipeline(lang_code="a") # 'a' = American English
return _tts_pipeline
@spaces.GPU(duration=60)
def synthesize_whisper(text: str):
"""Speak the given text in a soft, hushed voice via Kokoro-82M.
Returns (sample_rate, np.ndarray of float32) or None if text is empty.
"""
text = (text or "").strip()
if not text:
return None
import numpy as np
pipeline = _load_tts()
# 'af_nicole' is Kokoro's softest American voice; slower speed + reduced
# amplitude give it the whisper-quiet feel.
chunks = []
for _, _, audio in pipeline(text, voice="af_nicole", speed=0.85):
if hasattr(audio, "detach"):
audio = audio.detach().cpu().numpy()
chunks.append(np.asarray(audio, dtype=np.float32))
if not chunks:
return None
waveform = np.concatenate(chunks)
waveform = waveform * 0.55 # quiet the voice toward a whisper
return 24000, waveform
@spaces.GPU(duration=120)
def generate_poetic_saying(wish: str, max_new_tokens: int = 120) -> str:
"""Transform a user's wish into a short poetic, wise saying."""
wish = wish.strip()
if not wish:
return "Even silence holds a star waiting to be named."
if STUB_MODEL:
return random.choice(STUB_SAYINGS)
import torch
tokenizer, model = _load_model()
messages = [
{"role": "system", "content": SYSTEM_PROMPT},
{
"role": "user",
"content": (
f"A traveler shares this wish with the cosmos:\n\n\"{wish}\"\n\n"
"Speak the star's wisdom."
),
},
]
inputs = tokenizer.apply_chat_template(
messages,
tokenize=True,
add_generation_prompt=True,
return_tensors="pt",
return_dict=True,
).to(model.device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=max_new_tokens,
do_sample=True,
temperature=0.7,
top_p=0.9,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.eos_token_id,
)
generated = outputs[0][inputs["input_ids"].shape[-1] :]
saying = tokenizer.decode(generated, skip_special_tokens=True)
saying = _strip_reasoning(saying).strip().strip('"').strip("'")
if not saying:
saying = "Your hope already burns — a quiet star the universe remembers."
return saying