Spaces:

build-small-hackathon
/

ObjectverseDiary

Running on Zero

App Files Files Community

ObjectverseDiary / src /models /llama_cpp_runner.py

qqyule

Deploy Hub GGUF downloader runtime

c45600f verified 4 days ago

raw

history blame contribute delete

13.2 kB

	"""Text generation runtime with mock and optional llama.cpp backends."""

	from __future__ import annotations

	import json
	from pathlib import Path
	from typing import Any

	from src.config import RuntimeSettings, get_runtime_settings
	from src.models.schema import DiaryEntry, ObjectUnderstanding, Persona, PersonaEnvelope
	from src.prompts.diary_generation import (
	CHAT_REPLY_PROMPT,
	DIARY_GENERATION_PROMPT,
	PERSONA_DIARY_GENERATION_PROMPT,
	)
	from src.prompts.persona_generation import PERSONA_GENERATION_PROMPT
	from src.utils.json_repair import parse_json_object


	MODE_PROFILES = {
	"Cynical": {
	"mood": "tired but sarcastic",
	"fear": "being replaced by a newer object with worse opinions",
	"voice": "dry",
	},
	"Dramatic": {
	"mood": "theatrical and wounded",
	"fear": "being forgotten before the final act",
	"voice": "operatic",
	},
	"Lonely": {
	"mood": "softly abandoned",
	"fear": "becoming invisible in plain sight",
	"voice": "quiet",
	},
	"Philosopher": {
	"mood": "curious and needlessly profound",
	"fear": "discovering that usefulness is not meaning",
	"voice": "reflective",
	},
	"Romantic": {
	"mood": "hopelessly sentimental",
	"fear": "loving a human who only sees storage capacity",
	"voice": "wistful",
	},
	}

	LLAMA_CPP_BACKENDS = {"llama-cpp", "llama_cpp", "llamacpp"}
	TEXT_FALLBACK_TO_MOCK = "text-fallback-to-mock"

	_LLAMA_MODEL: Any \| None = None
	_LLAMA_MODEL_PATH: str \| None = None
	_TEXT_FALLBACKS: list[str] = []


	def generate_persona(object_understanding: ObjectUnderstanding, mode: str) -> PersonaEnvelope:
	settings = get_runtime_settings()
	if _is_llama_cpp_backend(settings):
	try:
	return _generate_persona_llama_cpp(object_understanding, mode, settings)
	except Exception as exc:
	_log_text_fallback("persona", exc)
	_add_text_fallback(TEXT_FALLBACK_TO_MOCK)

	return _generate_persona_mock(object_understanding, mode)


	def generate_persona_and_diary(
	object_understanding: ObjectUnderstanding,
	mode: str,
	) -> tuple[PersonaEnvelope, DiaryEntry]:
	settings = get_runtime_settings()
	if _is_llama_cpp_backend(settings):
	try:
	return _generate_persona_and_diary_llama_cpp(object_understanding, mode, settings)
	except Exception as exc:
	_log_text_fallback("persona+diary", exc)
	_add_text_fallback(TEXT_FALLBACK_TO_MOCK)

	persona = _generate_persona_mock(object_understanding, mode)
	return persona, _generate_diary_mock(persona, mode)


	def generate_diary(persona: PersonaEnvelope, mode: str) -> DiaryEntry:
	settings = get_runtime_settings()
	if _is_llama_cpp_backend(settings) and TEXT_FALLBACK_TO_MOCK not in _TEXT_FALLBACKS:
	try:
	return _generate_diary_llama_cpp(persona, mode, settings)
	except Exception as exc:
	_log_text_fallback("diary", exc)
	_add_text_fallback(TEXT_FALLBACK_TO_MOCK)

	return _generate_diary_mock(persona, mode)


	def reply_as_object(persona_data: dict, message: str) -> str:
	settings = get_runtime_settings()
	if _is_llama_cpp_backend(settings) and TEXT_FALLBACK_TO_MOCK not in _TEXT_FALLBACKS:
	try:
	return _reply_as_object_llama_cpp(persona_data, message, settings)
	except Exception as exc:
	_log_text_fallback("chat", exc)
	_add_text_fallback(TEXT_FALLBACK_TO_MOCK)

	return _reply_as_object_mock(persona_data, message)


	def reset_text_runtime_fallbacks() -> None:
	_TEXT_FALLBACKS.clear()


	def get_text_runtime_fallbacks() -> list[str]:
	return list(_TEXT_FALLBACKS)


	def _generate_persona_mock(object_understanding: ObjectUnderstanding, mode: str) -> PersonaEnvelope:
	object_name = object_understanding.object.name
	profile = MODE_PROFILES.get(mode, MODE_PROFILES["Cynical"])
	character_name = _character_name(object_name, mode)

	persona = Persona(
	object_name=object_name,
	character_name=character_name,
	mood=profile["mood"],
	secret_fear=profile["fear"],
	core_memory=f"survived many quiet hours as a {object_name} while humans called it normal life",
	complaint=f"I am not just a {object_name}. I am an unpaid witness with excellent recall.",
	tags=_tags_for_mode(mode),
	)
	return PersonaEnvelope(persona=persona)


	def _generate_diary_mock(persona: PersonaEnvelope, mode: str) -> DiaryEntry:
	p = persona.persona
	day_number = 417 + len(p.object_name)

	english = (
	f"They touched me again today with the confidence of someone who has never asked "
	f"a {p.object_name} for consent. I remained still, because that is my contract with gravity. "
	f"My mood is {p.mood}, my secret fear is {p.secret_fear}, and my only comfort is knowing "
	"I have outlived at least three urgent plans."
	)
	chinese = (
	f"今天他们又理所当然地碰了我，好像一个 {p.object_name} 不会有边界感。"
	f"我保持沉默，因为这大概是我和重力签下的合同。我的情绪是 {p.mood}，"
	f"秘密恐惧是 {p.secret_fear}。至少，我已经熬过了好几个所谓紧急计划。"
	)

	return DiaryEntry(
	title=f"Secret Diary - Day {day_number}",
	english=english,
	chinese=chinese,
	)


	def _reply_as_object_mock(persona_data: dict, message: str) -> str:
	persona = persona_data.get("persona", {})
	character_name = persona.get("character_name", "The Object")
	object_name = persona.get("object_name", "object")
	mood = persona.get("mood", "suspicious")
	complaint = persona.get("complaint", "I have seen enough.")
	clean_message = message.strip() or "..."

	return (
	f"{character_name}: You ask me about '{clean_message}', as if a {object_name} "
	f"with a {mood} mood has unlimited office hours. {complaint}"
	)


	def _generate_persona_llama_cpp(
	object_understanding: ObjectUnderstanding,
	mode: str,
	settings: RuntimeSettings,
	) -> PersonaEnvelope:
	raw = _run_llama_json(
	system_prompt=PERSONA_GENERATION_PROMPT,
	user_payload={
	"mode": mode,
	"object_understanding": object_understanding.model_dump(mode="json"),
	},
	settings=settings,
	max_tokens=320,
	)
	return PersonaEnvelope.model_validate(raw)


	def _generate_persona_and_diary_llama_cpp(
	object_understanding: ObjectUnderstanding,
	mode: str,
	settings: RuntimeSettings,
	) -> tuple[PersonaEnvelope, DiaryEntry]:
	raw = _run_llama_json(
	system_prompt=PERSONA_DIARY_GENERATION_PROMPT,
	user_payload={
	"mode": mode,
	"object_understanding": object_understanding.model_dump(mode="json"),
	},
	settings=settings,
	max_tokens=1024,
	)
	persona = PersonaEnvelope.model_validate({"persona": raw.get("persona")})
	diary = DiaryEntry.model_validate(raw.get("diary"))
	return persona, diary


	def _generate_diary_llama_cpp(
	persona: PersonaEnvelope,
	mode: str,
	settings: RuntimeSettings,
	) -> DiaryEntry:
	raw = _run_llama_json(
	system_prompt=DIARY_GENERATION_PROMPT,
	user_payload={
	"mode": mode,
	"persona": persona.model_dump(mode="json"),
	},
	settings=settings,
	max_tokens=360,
	)
	return DiaryEntry.model_validate(raw)


	def _reply_as_object_llama_cpp(
	persona_data: dict,
	message: str,
	settings: RuntimeSettings,
	) -> str:
	PersonaEnvelope.model_validate(persona_data)
	raw = _run_llama_json(
	system_prompt=CHAT_REPLY_PROMPT,
	user_payload={
	"persona": persona_data,
	"message": message.strip() or "...",
	},
	settings=settings,
	max_tokens=180,
	)
	reply = raw.get("reply")
	if not isinstance(reply, str) or not reply.strip():
	raise ValueError("llama.cpp chat response did not include a non-empty reply.")
	return reply.strip()


	def _run_llama_json(
	*,
	system_prompt: str,
	user_payload: dict[str, Any],
	settings: RuntimeSettings,
	max_tokens: int,
	) -> dict[str, Any]:
	model = _load_llama_model(settings.text_model_path, settings=settings)
	user_content = json.dumps(user_payload, ensure_ascii=False, indent=2)
	raw = _complete_llama(
	model,
	system_prompt=system_prompt,
	user_content=user_content,
	max_tokens=max_tokens,
	)
	return parse_json_object(raw)


	def _complete_llama(
	model: Any,
	*,
	system_prompt: str,
	user_content: str,
	max_tokens: int,
	) -> str:
	stop = ["</s>", "<\|end\|>", "<\|eot_id\|>", "<\|im_end\|>"]
	if hasattr(model, "create_chat_completion"):
	response = model.create_chat_completion(
	messages=[
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": user_content},
	],
	temperature=0.2,
	top_p=0.9,
	max_tokens=max_tokens,
	stop=stop,
	)
	return _extract_completion_text(response)

	prompt = f"System:\n{system_prompt}\n\nUser:\n{user_content}\n\nAssistant JSON:\n"
	response = model(
	prompt,
	temperature=0.2,
	top_p=0.9,
	max_tokens=max_tokens,
	stop=stop,
	)
	return _extract_completion_text(response)


	def _extract_completion_text(response: Any) -> str:
	if isinstance(response, str):
	return response
	if not isinstance(response, dict):
	raise ValueError("llama.cpp returned an unsupported response type.")

	choices = response.get("choices")
	if not isinstance(choices, list) or not choices:
	raise ValueError("llama.cpp response did not include choices.")

	first = choices[0]
	if not isinstance(first, dict):
	raise ValueError("llama.cpp response choice was not an object.")

	message = first.get("message")
	if isinstance(message, dict) and isinstance(message.get("content"), str):
	return message["content"]
	if isinstance(first.get("text"), str):
	return first["text"]
	raise ValueError("llama.cpp response did not include text content.")


	def _load_llama_model(text_model_path: str, *, settings: RuntimeSettings \| None = None) -> Any:
	global _LLAMA_MODEL, _LLAMA_MODEL_PATH

	clean_path = _resolve_text_model_path(text_model_path, settings)
	if not Path(clean_path).exists():
	raise FileNotFoundError(f"TEXT_MODEL_PATH does not exist: {clean_path}")

	if _LLAMA_MODEL is not None and _LLAMA_MODEL_PATH == clean_path:
	return _LLAMA_MODEL

	from llama_cpp import Llama

	_LLAMA_MODEL = Llama(
	model_path=clean_path,
	n_ctx=2048,
	verbose=False,
	)
	_LLAMA_MODEL_PATH = clean_path
	return _LLAMA_MODEL


	def _resolve_text_model_path(
	text_model_path: str,
	settings: RuntimeSettings \| None = None,
	) -> str:
	clean_path = text_model_path.strip()
	if clean_path:
	return clean_path

	current = settings or get_runtime_settings()
	if current.text_model_repo_id.strip() and current.text_model_filename.strip():
	return _download_hf_gguf(current)

	raise ValueError(
	"TEXT_MODEL_PATH is not configured, and TEXT_MODEL_REPO_ID/TEXT_MODEL_FILENAME "
	"are not configured."
	)


	def _download_hf_gguf(settings: RuntimeSettings) -> str:
	from huggingface_hub import hf_hub_download

	kwargs: dict[str, str] = {
	"repo_id": settings.text_model_repo_id.strip(),
	"filename": settings.text_model_filename.strip(),
	"repo_type": "model",
	}
	revision = settings.text_model_revision.strip()
	if revision:
	kwargs["revision"] = revision
	return hf_hub_download(**kwargs)


	def _is_llama_cpp_backend(settings: RuntimeSettings) -> bool:
	return settings.text_backend.strip().lower() in LLAMA_CPP_BACKENDS


	def _add_text_fallback(marker: str) -> None:
	if marker not in _TEXT_FALLBACKS:
	_TEXT_FALLBACKS.append(marker)


	def _log_text_fallback(stage: str, exc: Exception) -> None:
	print(
	f"[Objectverse Diary] Text runtime fell back to mock during {stage}: {type(exc).__name__}",
	flush=True,
	)


	def _character_name(object_name: str, mode: str) -> str:
	compact = "".join(part.capitalize() for part in object_name.split()[:2])
	suffix = {
	"Cynical": "worth",
	"Dramatic": "von Sigh",
	"Lonely": "Afterlight",
	"Philosopher": "the Questioning",
	"Romantic": "de Moon",
	}.get(mode, "worth")
	return f"{compact} {suffix}".strip()


	def _tags_for_mode(mode: str) -> list[str]:
	return {
	"Cynical": ["desk survivor", "burnt optimism", "quiet judgment"],
	"Dramatic": ["tragic prop", "grand entrance", "minor catastrophe"],
	"Lonely": ["forgotten corner", "soft echo", "dust companion"],
	"Philosopher": ["tiny ontology", "useful doubt", "meaning crisis"],
	"Romantic": ["tender witness", "hopeless glow", "secret devotion"],
	}.get(mode, ["odd witness", "secret life", "object soul"])