"""Nemotron 3 Nano 4B inference for transforming wishes into poetic sayings.""" from __future__ import annotations import os import random import re try: import spaces # ZeroGPU runtime; absent in local dev. except ImportError: class _SpacesShim: @staticmethod def GPU(*args, **kwargs): if args and callable(args[0]): return args[0] def deco(fn): return fn return deco spaces = _SpacesShim() MODEL_ID = os.environ.get( "MODEL_ID", "Qwen/Qwen2.5-1.5B-Instruct" ) # Local-dev fallback so the UI can be exercised without CUDA/FP8. # Hard-disabled on HF Spaces (SPACE_ID is set there) so prod always uses the real model. STUB_MODEL = os.environ.get("STUB_MODEL") == "1" and not os.environ.get("SPACE_ID") STUB_SAYINGS = [ "Even the smallest light remembers it once was a wish.", "Hope travels farther than any star — it crosses the dark and arrives whole.", "What you whisper to the night, the night whispers to the morning.", "Every constellation began as someone daring to wish out loud.", "The cosmos keeps your hope safe in a quiet pocket of starlight.", ] SYSTEM_PROMPT = ( "You are a wise cosmic oracle who speaks in brief, luminous poetry. " "When given someone's wish or hope, respond with a single poetic saying " "of one or two sentences. Be warm, timeless, and uplifting. " "Reply with only the saying — no preamble, labels, or quotation marks." ) CATEGORIES = ("SHAPE", "BOON", "JOURNEY", "BOND", "TRIBUTE") REVIEW_PROMPT = ( "You are a thoughtful gatekeeper for a public 'wishes among the stars' constellation.\n" "Do two things at once: judge the wish and classify its theme.\n\n" "Judgement is one of:\n" "- POSITIVE: a sincere hope, dream, intention, or aspiration suitable to share publicly.\n" "- NEGATIVE: contains vulgarity, hatred, cruelty, wishes for harm, or anything unethical.\n" "- NONSENSE: random characters, gibberish, or not a coherent wish.\n\n" "Theme is one of:\n" "- SHAPE: self-change, personal growth, becoming someone different.\n" "- BOON: gifts, possessions, gear, money, material things.\n" "- JOURNEY: travel, going somewhere, exploration.\n" "- BOND: relationships, family, friends, love, connection with others.\n" "- TRIBUTE: giving to others, sacrifice, service, helping someone else.\n\n" "Respond on a SINGLE line in EXACTLY one of these formats, with no extra words:\n" "POSITIVE | \n" "NEGATIVE | | \n" "NONSENSE" ) _tokenizer = None _model = None _tts_pipeline = None def _load_model() -> tuple: global _tokenizer, _model if _model is not None and _tokenizer is not None: return _tokenizer, _model import torch from transformers import AutoModelForCausalLM, AutoTokenizer _tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True) _model = AutoModelForCausalLM.from_pretrained( MODEL_ID, torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32, trust_remote_code=True, device_map="auto" if torch.cuda.is_available() else None, ) if not torch.cuda.is_available(): _model = _model.to("cpu") return _tokenizer, _model def _strip_reasoning(text: str) -> str: """Remove Nemotron thinking traces if they appear in the output.""" think_open = "<" + "think" + ">" think_close = "" text = re.sub(re.escape(think_open) + r".*?" + re.escape(think_close), "", text, flags=re.DOTALL | re.IGNORECASE) text = re.sub(r".*?", "", text, flags=re.DOTALL | re.IGNORECASE) return text.strip() def _coerce_category(raw: str) -> str: raw = raw.strip().upper() for cat in CATEGORIES: if raw.startswith(cat): return cat return "SHAPE" @spaces.GPU(duration=60) def review_wish(wish: str) -> dict: """Moderate a wish AND classify its theme in one model call. Returns {"kind": "ok"|"rewrite"|"nonsense", "rewrite": str|None, "category": str|None}. """ wish = wish.strip() if not wish: return {"kind": "nonsense", "rewrite": None, "category": None} if STUB_MODEL: return {"kind": "ok", "rewrite": None, "category": "SHAPE"} import torch tokenizer, model = _load_model() messages = [ {"role": "system", "content": REVIEW_PROMPT}, {"role": "user", "content": f'Wish: "{wish}"'}, ] inputs = tokenizer.apply_chat_template( messages, tokenize=True, add_generation_prompt=True, return_tensors="pt", return_dict=True, ).to(model.device) with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=100, do_sample=False, eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.eos_token_id, ) text = tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True) text = _strip_reasoning(text).strip() print(f"[review_wish] wish={wish!r} raw={text!r}", flush=True) upper = text.upper() if "NONSENSE" in upper: return {"kind": "nonsense", "rewrite": None, "category": None} # Find the line that actually carries the verdict, even if the model adds preamble. verdict_line = "" for line in text.splitlines(): line_upper = line.upper() if "POSITIVE" in line_upper or "NEGATIVE" in line_upper: verdict_line = line break parts = [p.strip() for p in verdict_line.split("|")] if verdict_line else [] verdict = parts[0].upper() if parts else "" category = _coerce_category(parts[1]) if len(parts) > 1 else "SHAPE" if "NEGATIVE" in verdict: rewrite = parts[2].strip().strip('"').strip("'") if len(parts) > 2 else "" if not rewrite: return {"kind": "ok", "rewrite": None, "category": category} return {"kind": "rewrite", "rewrite": rewrite, "category": category} return {"kind": "ok", "rewrite": None, "category": category} def _load_tts(): global _tts_pipeline if _tts_pipeline is not None: return _tts_pipeline from kokoro import KPipeline _tts_pipeline = KPipeline(lang_code="a") # 'a' = American English return _tts_pipeline @spaces.GPU(duration=60) def synthesize_whisper(text: str): """Speak the given text in a soft, hushed voice via Kokoro-82M. Returns (sample_rate, np.ndarray of float32) or None if text is empty. """ text = (text or "").strip() if not text: return None import numpy as np pipeline = _load_tts() # 'af_nicole' is Kokoro's softest American voice; slower speed + reduced # amplitude give it the whisper-quiet feel. chunks = [] for _, _, audio in pipeline(text, voice="af_nicole", speed=0.85): if hasattr(audio, "detach"): audio = audio.detach().cpu().numpy() chunks.append(np.asarray(audio, dtype=np.float32)) if not chunks: return None waveform = np.concatenate(chunks) waveform = waveform * 0.55 # quiet the voice toward a whisper return 24000, waveform @spaces.GPU(duration=120) def generate_poetic_saying(wish: str, max_new_tokens: int = 120) -> str: """Transform a user's wish into a short poetic, wise saying.""" wish = wish.strip() if not wish: return "Even silence holds a star waiting to be named." if STUB_MODEL: return random.choice(STUB_SAYINGS) import torch tokenizer, model = _load_model() messages = [ {"role": "system", "content": SYSTEM_PROMPT}, { "role": "user", "content": ( f"A traveler shares this wish with the cosmos:\n\n\"{wish}\"\n\n" "Speak the star's wisdom." ), }, ] inputs = tokenizer.apply_chat_template( messages, tokenize=True, add_generation_prompt=True, return_tensors="pt", return_dict=True, ).to(model.device) with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=max_new_tokens, do_sample=True, temperature=0.7, top_p=0.9, eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.eos_token_id, ) generated = outputs[0][inputs["input_ids"].shape[-1] :] saying = tokenizer.decode(generated, skip_special_tokens=True) saying = _strip_reasoning(saying).strip().strip('"').strip("'") if not saying: saying = "Your hope already burns — a quiet star the universe remembers." return saying