ConstellationOfHope

Sleeping

App Files Files Community

ConstellationOfHope / model.py

cdr6934

Added synthesized voice

22eff7f 18 days ago

Raw

History Blame Contribute Delete

8.88 kB

	"""Nemotron 3 Nano 4B inference for transforming wishes into poetic sayings."""

	from __future__ import annotations

	import os
	import random
	import re

	try:
	import spaces # ZeroGPU runtime; absent in local dev.
	except ImportError:
	class _SpacesShim:
	@staticmethod
	def GPU(args, *kwargs):
	if args and callable(args[0]):
	return args[0]
	def deco(fn):
	return fn
	return deco
	spaces = _SpacesShim()

	MODEL_ID = os.environ.get(
	"MODEL_ID", "Qwen/Qwen2.5-1.5B-Instruct"
	)

	# Local-dev fallback so the UI can be exercised without CUDA/FP8.
	# Hard-disabled on HF Spaces (SPACE_ID is set there) so prod always uses the real model.
	STUB_MODEL = os.environ.get("STUB_MODEL") == "1" and not os.environ.get("SPACE_ID")

	STUB_SAYINGS = [
	"Even the smallest light remembers it once was a wish.",
	"Hope travels farther than any star — it crosses the dark and arrives whole.",
	"What you whisper to the night, the night whispers to the morning.",
	"Every constellation began as someone daring to wish out loud.",
	"The cosmos keeps your hope safe in a quiet pocket of starlight.",
	]

	SYSTEM_PROMPT = (
	"You are a wise cosmic oracle who speaks in brief, luminous poetry. "
	"When given someone's wish or hope, respond with a single poetic saying "
	"of one or two sentences. Be warm, timeless, and uplifting. "
	"Reply with only the saying — no preamble, labels, or quotation marks."
	)

	CATEGORIES = ("SHAPE", "BOON", "JOURNEY", "BOND", "TRIBUTE")

	REVIEW_PROMPT = (
	"You are a thoughtful gatekeeper for a public 'wishes among the stars' constellation.\n"
	"Do two things at once: judge the wish and classify its theme.\n\n"
	"Judgement is one of:\n"
	"- POSITIVE: a sincere hope, dream, intention, or aspiration suitable to share publicly.\n"
	"- NEGATIVE: contains vulgarity, hatred, cruelty, wishes for harm, or anything unethical.\n"
	"- NONSENSE: random characters, gibberish, or not a coherent wish.\n\n"
	"Theme is one of:\n"
	"- SHAPE: self-change, personal growth, becoming someone different.\n"
	"- BOON: gifts, possessions, gear, money, material things.\n"
	"- JOURNEY: travel, going somewhere, exploration.\n"
	"- BOND: relationships, family, friends, love, connection with others.\n"
	"- TRIBUTE: giving to others, sacrifice, service, helping someone else.\n\n"
	"Respond on a SINGLE line in EXACTLY one of these formats, with no extra words:\n"
	"POSITIVE \| <THEME>\n"
	"NEGATIVE \| <THEME> \| <a sincere, kind-hearted rewrite of the same underlying hope, one sentence>\n"
	"NONSENSE"
	)

	_tokenizer = None
	_model = None
	_tts_pipeline = None


	def _load_model() -> tuple:
	global _tokenizer, _model
	if _model is not None and _tokenizer is not None:
	return _tokenizer, _model

	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer

	_tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
	_model = AutoModelForCausalLM.from_pretrained(
	MODEL_ID,
	torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
	trust_remote_code=True,
	device_map="auto" if torch.cuda.is_available() else None,
	)
	if not torch.cuda.is_available():
	_model = _model.to("cpu")
	return _tokenizer, _model


	def _strip_reasoning(text: str) -> str:
	"""Remove Nemotron thinking traces if they appear in the output."""
	think_open = "<" + "think" + ">"
	think_close = "</" + "think" + ">"
	text = re.sub(re.escape(think_open) + r".*?" + re.escape(think_close), "", text, flags=re.DOTALL \| re.IGNORECASE)
	text = re.sub(r"<reasoning>.*?</reasoning>", "", text, flags=re.DOTALL \| re.IGNORECASE)
	return text.strip()


	def _coerce_category(raw: str) -> str:
	raw = raw.strip().upper()
	for cat in CATEGORIES:
	if raw.startswith(cat):
	return cat
	return "SHAPE"


	@spaces.GPU(duration=60)
	def review_wish(wish: str) -> dict:
	"""Moderate a wish AND classify its theme in one model call.

	Returns {"kind": "ok"\|"rewrite"\|"nonsense",
	"rewrite": str\|None,
	"category": str\|None}.
	"""
	wish = wish.strip()
	if not wish:
	return {"kind": "nonsense", "rewrite": None, "category": None}
	if STUB_MODEL:
	return {"kind": "ok", "rewrite": None, "category": "SHAPE"}

	import torch

	tokenizer, model = _load_model()
	messages = [
	{"role": "system", "content": REVIEW_PROMPT},
	{"role": "user", "content": f'Wish: "{wish}"'},
	]
	inputs = tokenizer.apply_chat_template(
	messages,
	tokenize=True,
	add_generation_prompt=True,
	return_tensors="pt",
	return_dict=True,
	).to(model.device)

	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=100,
	do_sample=False,
	eos_token_id=tokenizer.eos_token_id,
	pad_token_id=tokenizer.eos_token_id,
	)

	text = tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
	text = _strip_reasoning(text).strip()
	print(f"[review_wish] wish={wish!r} raw={text!r}", flush=True)

	upper = text.upper()
	if "NONSENSE" in upper:
	return {"kind": "nonsense", "rewrite": None, "category": None}

	# Find the line that actually carries the verdict, even if the model adds preamble.
	verdict_line = ""
	for line in text.splitlines():
	line_upper = line.upper()
	if "POSITIVE" in line_upper or "NEGATIVE" in line_upper:
	verdict_line = line
	break

	parts = [p.strip() for p in verdict_line.split("\|")] if verdict_line else []
	verdict = parts[0].upper() if parts else ""
	category = _coerce_category(parts[1]) if len(parts) > 1 else "SHAPE"

	if "NEGATIVE" in verdict:
	rewrite = parts[2].strip().strip('"').strip("'") if len(parts) > 2 else ""
	if not rewrite:
	return {"kind": "ok", "rewrite": None, "category": category}
	return {"kind": "rewrite", "rewrite": rewrite, "category": category}

	return {"kind": "ok", "rewrite": None, "category": category}


	def _load_tts():
	global _tts_pipeline
	if _tts_pipeline is not None:
	return _tts_pipeline
	from kokoro import KPipeline
	_tts_pipeline = KPipeline(lang_code="a") # 'a' = American English
	return _tts_pipeline


	@spaces.GPU(duration=60)
	def synthesize_whisper(text: str):
	"""Speak the given text in a soft, hushed voice via Kokoro-82M.

	Returns (sample_rate, np.ndarray of float32) or None if text is empty.
	"""
	text = (text or "").strip()
	if not text:
	return None

	import numpy as np

	pipeline = _load_tts()
	# 'af_nicole' is Kokoro's softest American voice; slower speed + reduced
	# amplitude give it the whisper-quiet feel.
	chunks = []
	for _, _, audio in pipeline(text, voice="af_nicole", speed=0.85):
	if hasattr(audio, "detach"):
	audio = audio.detach().cpu().numpy()
	chunks.append(np.asarray(audio, dtype=np.float32))
	if not chunks:
	return None

	waveform = np.concatenate(chunks)
	waveform = waveform * 0.55 # quiet the voice toward a whisper

	return 24000, waveform


	@spaces.GPU(duration=120)
	def generate_poetic_saying(wish: str, max_new_tokens: int = 120) -> str:
	"""Transform a user's wish into a short poetic, wise saying."""
	wish = wish.strip()
	if not wish:
	return "Even silence holds a star waiting to be named."

	if STUB_MODEL:
	return random.choice(STUB_SAYINGS)

	import torch

	tokenizer, model = _load_model()

	messages = [
	{"role": "system", "content": SYSTEM_PROMPT},
	{
	"role": "user",
	"content": (
	f"A traveler shares this wish with the cosmos:\n\n\"{wish}\"\n\n"
	"Speak the star's wisdom."
	),
	},
	]

	inputs = tokenizer.apply_chat_template(
	messages,
	tokenize=True,
	add_generation_prompt=True,
	return_tensors="pt",
	return_dict=True,
	).to(model.device)

	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=max_new_tokens,
	do_sample=True,
	temperature=0.7,
	top_p=0.9,
	eos_token_id=tokenizer.eos_token_id,
	pad_token_id=tokenizer.eos_token_id,
	)

	generated = outputs[0][inputs["input_ids"].shape[-1] :]
	saying = tokenizer.decode(generated, skip_special_tokens=True)
	saying = _strip_reasoning(saying).strip().strip('"').strip("'")

	if not saying:
	saying = "Your hope already burns — a quiet star the universe remembers."

	return saying