ObjectverseDiary / src /models /llama_cpp_runner.py
qqyule's picture
Deploy Hub GGUF downloader runtime
c45600f verified
"""Text generation runtime with mock and optional llama.cpp backends."""
from __future__ import annotations
import json
from pathlib import Path
from typing import Any
from src.config import RuntimeSettings, get_runtime_settings
from src.models.schema import DiaryEntry, ObjectUnderstanding, Persona, PersonaEnvelope
from src.prompts.diary_generation import (
CHAT_REPLY_PROMPT,
DIARY_GENERATION_PROMPT,
PERSONA_DIARY_GENERATION_PROMPT,
)
from src.prompts.persona_generation import PERSONA_GENERATION_PROMPT
from src.utils.json_repair import parse_json_object
MODE_PROFILES = {
"Cynical": {
"mood": "tired but sarcastic",
"fear": "being replaced by a newer object with worse opinions",
"voice": "dry",
},
"Dramatic": {
"mood": "theatrical and wounded",
"fear": "being forgotten before the final act",
"voice": "operatic",
},
"Lonely": {
"mood": "softly abandoned",
"fear": "becoming invisible in plain sight",
"voice": "quiet",
},
"Philosopher": {
"mood": "curious and needlessly profound",
"fear": "discovering that usefulness is not meaning",
"voice": "reflective",
},
"Romantic": {
"mood": "hopelessly sentimental",
"fear": "loving a human who only sees storage capacity",
"voice": "wistful",
},
}
LLAMA_CPP_BACKENDS = {"llama-cpp", "llama_cpp", "llamacpp"}
TEXT_FALLBACK_TO_MOCK = "text-fallback-to-mock"
_LLAMA_MODEL: Any | None = None
_LLAMA_MODEL_PATH: str | None = None
_TEXT_FALLBACKS: list[str] = []
def generate_persona(object_understanding: ObjectUnderstanding, mode: str) -> PersonaEnvelope:
settings = get_runtime_settings()
if _is_llama_cpp_backend(settings):
try:
return _generate_persona_llama_cpp(object_understanding, mode, settings)
except Exception as exc:
_log_text_fallback("persona", exc)
_add_text_fallback(TEXT_FALLBACK_TO_MOCK)
return _generate_persona_mock(object_understanding, mode)
def generate_persona_and_diary(
object_understanding: ObjectUnderstanding,
mode: str,
) -> tuple[PersonaEnvelope, DiaryEntry]:
settings = get_runtime_settings()
if _is_llama_cpp_backend(settings):
try:
return _generate_persona_and_diary_llama_cpp(object_understanding, mode, settings)
except Exception as exc:
_log_text_fallback("persona+diary", exc)
_add_text_fallback(TEXT_FALLBACK_TO_MOCK)
persona = _generate_persona_mock(object_understanding, mode)
return persona, _generate_diary_mock(persona, mode)
def generate_diary(persona: PersonaEnvelope, mode: str) -> DiaryEntry:
settings = get_runtime_settings()
if _is_llama_cpp_backend(settings) and TEXT_FALLBACK_TO_MOCK not in _TEXT_FALLBACKS:
try:
return _generate_diary_llama_cpp(persona, mode, settings)
except Exception as exc:
_log_text_fallback("diary", exc)
_add_text_fallback(TEXT_FALLBACK_TO_MOCK)
return _generate_diary_mock(persona, mode)
def reply_as_object(persona_data: dict, message: str) -> str:
settings = get_runtime_settings()
if _is_llama_cpp_backend(settings) and TEXT_FALLBACK_TO_MOCK not in _TEXT_FALLBACKS:
try:
return _reply_as_object_llama_cpp(persona_data, message, settings)
except Exception as exc:
_log_text_fallback("chat", exc)
_add_text_fallback(TEXT_FALLBACK_TO_MOCK)
return _reply_as_object_mock(persona_data, message)
def reset_text_runtime_fallbacks() -> None:
_TEXT_FALLBACKS.clear()
def get_text_runtime_fallbacks() -> list[str]:
return list(_TEXT_FALLBACKS)
def _generate_persona_mock(object_understanding: ObjectUnderstanding, mode: str) -> PersonaEnvelope:
object_name = object_understanding.object.name
profile = MODE_PROFILES.get(mode, MODE_PROFILES["Cynical"])
character_name = _character_name(object_name, mode)
persona = Persona(
object_name=object_name,
character_name=character_name,
mood=profile["mood"],
secret_fear=profile["fear"],
core_memory=f"survived many quiet hours as a {object_name} while humans called it normal life",
complaint=f"I am not just a {object_name}. I am an unpaid witness with excellent recall.",
tags=_tags_for_mode(mode),
)
return PersonaEnvelope(persona=persona)
def _generate_diary_mock(persona: PersonaEnvelope, mode: str) -> DiaryEntry:
p = persona.persona
day_number = 417 + len(p.object_name)
english = (
f"They touched me again today with the confidence of someone who has never asked "
f"a {p.object_name} for consent. I remained still, because that is my contract with gravity. "
f"My mood is {p.mood}, my secret fear is {p.secret_fear}, and my only comfort is knowing "
"I have outlived at least three urgent plans."
)
chinese = (
f"今天他们又理所当然地碰了我,好像一个 {p.object_name} 不会有边界感。"
f"我保持沉默,因为这大概是我和重力签下的合同。我的情绪是 {p.mood},"
f"秘密恐惧是 {p.secret_fear}。至少,我已经熬过了好几个所谓紧急计划。"
)
return DiaryEntry(
title=f"Secret Diary - Day {day_number}",
english=english,
chinese=chinese,
)
def _reply_as_object_mock(persona_data: dict, message: str) -> str:
persona = persona_data.get("persona", {})
character_name = persona.get("character_name", "The Object")
object_name = persona.get("object_name", "object")
mood = persona.get("mood", "suspicious")
complaint = persona.get("complaint", "I have seen enough.")
clean_message = message.strip() or "..."
return (
f"{character_name}: You ask me about '{clean_message}', as if a {object_name} "
f"with a {mood} mood has unlimited office hours. {complaint}"
)
def _generate_persona_llama_cpp(
object_understanding: ObjectUnderstanding,
mode: str,
settings: RuntimeSettings,
) -> PersonaEnvelope:
raw = _run_llama_json(
system_prompt=PERSONA_GENERATION_PROMPT,
user_payload={
"mode": mode,
"object_understanding": object_understanding.model_dump(mode="json"),
},
settings=settings,
max_tokens=320,
)
return PersonaEnvelope.model_validate(raw)
def _generate_persona_and_diary_llama_cpp(
object_understanding: ObjectUnderstanding,
mode: str,
settings: RuntimeSettings,
) -> tuple[PersonaEnvelope, DiaryEntry]:
raw = _run_llama_json(
system_prompt=PERSONA_DIARY_GENERATION_PROMPT,
user_payload={
"mode": mode,
"object_understanding": object_understanding.model_dump(mode="json"),
},
settings=settings,
max_tokens=1024,
)
persona = PersonaEnvelope.model_validate({"persona": raw.get("persona")})
diary = DiaryEntry.model_validate(raw.get("diary"))
return persona, diary
def _generate_diary_llama_cpp(
persona: PersonaEnvelope,
mode: str,
settings: RuntimeSettings,
) -> DiaryEntry:
raw = _run_llama_json(
system_prompt=DIARY_GENERATION_PROMPT,
user_payload={
"mode": mode,
"persona": persona.model_dump(mode="json"),
},
settings=settings,
max_tokens=360,
)
return DiaryEntry.model_validate(raw)
def _reply_as_object_llama_cpp(
persona_data: dict,
message: str,
settings: RuntimeSettings,
) -> str:
PersonaEnvelope.model_validate(persona_data)
raw = _run_llama_json(
system_prompt=CHAT_REPLY_PROMPT,
user_payload={
"persona": persona_data,
"message": message.strip() or "...",
},
settings=settings,
max_tokens=180,
)
reply = raw.get("reply")
if not isinstance(reply, str) or not reply.strip():
raise ValueError("llama.cpp chat response did not include a non-empty reply.")
return reply.strip()
def _run_llama_json(
*,
system_prompt: str,
user_payload: dict[str, Any],
settings: RuntimeSettings,
max_tokens: int,
) -> dict[str, Any]:
model = _load_llama_model(settings.text_model_path, settings=settings)
user_content = json.dumps(user_payload, ensure_ascii=False, indent=2)
raw = _complete_llama(
model,
system_prompt=system_prompt,
user_content=user_content,
max_tokens=max_tokens,
)
return parse_json_object(raw)
def _complete_llama(
model: Any,
*,
system_prompt: str,
user_content: str,
max_tokens: int,
) -> str:
stop = ["</s>", "<|end|>", "<|eot_id|>", "<|im_end|>"]
if hasattr(model, "create_chat_completion"):
response = model.create_chat_completion(
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_content},
],
temperature=0.2,
top_p=0.9,
max_tokens=max_tokens,
stop=stop,
)
return _extract_completion_text(response)
prompt = f"System:\n{system_prompt}\n\nUser:\n{user_content}\n\nAssistant JSON:\n"
response = model(
prompt,
temperature=0.2,
top_p=0.9,
max_tokens=max_tokens,
stop=stop,
)
return _extract_completion_text(response)
def _extract_completion_text(response: Any) -> str:
if isinstance(response, str):
return response
if not isinstance(response, dict):
raise ValueError("llama.cpp returned an unsupported response type.")
choices = response.get("choices")
if not isinstance(choices, list) or not choices:
raise ValueError("llama.cpp response did not include choices.")
first = choices[0]
if not isinstance(first, dict):
raise ValueError("llama.cpp response choice was not an object.")
message = first.get("message")
if isinstance(message, dict) and isinstance(message.get("content"), str):
return message["content"]
if isinstance(first.get("text"), str):
return first["text"]
raise ValueError("llama.cpp response did not include text content.")
def _load_llama_model(text_model_path: str, *, settings: RuntimeSettings | None = None) -> Any:
global _LLAMA_MODEL, _LLAMA_MODEL_PATH
clean_path = _resolve_text_model_path(text_model_path, settings)
if not Path(clean_path).exists():
raise FileNotFoundError(f"TEXT_MODEL_PATH does not exist: {clean_path}")
if _LLAMA_MODEL is not None and _LLAMA_MODEL_PATH == clean_path:
return _LLAMA_MODEL
from llama_cpp import Llama
_LLAMA_MODEL = Llama(
model_path=clean_path,
n_ctx=2048,
verbose=False,
)
_LLAMA_MODEL_PATH = clean_path
return _LLAMA_MODEL
def _resolve_text_model_path(
text_model_path: str,
settings: RuntimeSettings | None = None,
) -> str:
clean_path = text_model_path.strip()
if clean_path:
return clean_path
current = settings or get_runtime_settings()
if current.text_model_repo_id.strip() and current.text_model_filename.strip():
return _download_hf_gguf(current)
raise ValueError(
"TEXT_MODEL_PATH is not configured, and TEXT_MODEL_REPO_ID/TEXT_MODEL_FILENAME "
"are not configured."
)
def _download_hf_gguf(settings: RuntimeSettings) -> str:
from huggingface_hub import hf_hub_download
kwargs: dict[str, str] = {
"repo_id": settings.text_model_repo_id.strip(),
"filename": settings.text_model_filename.strip(),
"repo_type": "model",
}
revision = settings.text_model_revision.strip()
if revision:
kwargs["revision"] = revision
return hf_hub_download(**kwargs)
def _is_llama_cpp_backend(settings: RuntimeSettings) -> bool:
return settings.text_backend.strip().lower() in LLAMA_CPP_BACKENDS
def _add_text_fallback(marker: str) -> None:
if marker not in _TEXT_FALLBACKS:
_TEXT_FALLBACKS.append(marker)
def _log_text_fallback(stage: str, exc: Exception) -> None:
print(
f"[Objectverse Diary] Text runtime fell back to mock during {stage}: {type(exc).__name__}",
flush=True,
)
def _character_name(object_name: str, mode: str) -> str:
compact = "".join(part.capitalize() for part in object_name.split()[:2])
suffix = {
"Cynical": "worth",
"Dramatic": "von Sigh",
"Lonely": "Afterlight",
"Philosopher": "the Questioning",
"Romantic": "de Moon",
}.get(mode, "worth")
return f"{compact} {suffix}".strip()
def _tags_for_mode(mode: str) -> list[str]:
return {
"Cynical": ["desk survivor", "burnt optimism", "quiet judgment"],
"Dramatic": ["tragic prop", "grand entrance", "minor catastrophe"],
"Lonely": ["forgotten corner", "soft echo", "dust companion"],
"Philosopher": ["tiny ontology", "useful doubt", "meaning crisis"],
"Romantic": ["tender witness", "hopeless glow", "secret devotion"],
}.get(mode, ["odd witness", "secret life", "object soul"])