Spaces:
Running on Zero
Running on Zero
| """Text generation runtime with mock and optional llama.cpp backends.""" | |
| from __future__ import annotations | |
| import json | |
| from pathlib import Path | |
| from typing import Any | |
| from src.config import RuntimeSettings, get_runtime_settings | |
| from src.models.schema import DiaryEntry, ObjectUnderstanding, Persona, PersonaEnvelope | |
| from src.prompts.diary_generation import ( | |
| CHAT_REPLY_PROMPT, | |
| DIARY_GENERATION_PROMPT, | |
| PERSONA_DIARY_GENERATION_PROMPT, | |
| ) | |
| from src.prompts.persona_generation import PERSONA_GENERATION_PROMPT | |
| from src.utils.json_repair import parse_json_object | |
| MODE_PROFILES = { | |
| "Cynical": { | |
| "mood": "tired but sarcastic", | |
| "fear": "being replaced by a newer object with worse opinions", | |
| "voice": "dry", | |
| }, | |
| "Dramatic": { | |
| "mood": "theatrical and wounded", | |
| "fear": "being forgotten before the final act", | |
| "voice": "operatic", | |
| }, | |
| "Lonely": { | |
| "mood": "softly abandoned", | |
| "fear": "becoming invisible in plain sight", | |
| "voice": "quiet", | |
| }, | |
| "Philosopher": { | |
| "mood": "curious and needlessly profound", | |
| "fear": "discovering that usefulness is not meaning", | |
| "voice": "reflective", | |
| }, | |
| "Romantic": { | |
| "mood": "hopelessly sentimental", | |
| "fear": "loving a human who only sees storage capacity", | |
| "voice": "wistful", | |
| }, | |
| } | |
| LLAMA_CPP_BACKENDS = {"llama-cpp", "llama_cpp", "llamacpp"} | |
| TEXT_FALLBACK_TO_MOCK = "text-fallback-to-mock" | |
| _LLAMA_MODEL: Any | None = None | |
| _LLAMA_MODEL_PATH: str | None = None | |
| _TEXT_FALLBACKS: list[str] = [] | |
| def generate_persona(object_understanding: ObjectUnderstanding, mode: str) -> PersonaEnvelope: | |
| settings = get_runtime_settings() | |
| if _is_llama_cpp_backend(settings): | |
| try: | |
| return _generate_persona_llama_cpp(object_understanding, mode, settings) | |
| except Exception as exc: | |
| _log_text_fallback("persona", exc) | |
| _add_text_fallback(TEXT_FALLBACK_TO_MOCK) | |
| return _generate_persona_mock(object_understanding, mode) | |
| def generate_persona_and_diary( | |
| object_understanding: ObjectUnderstanding, | |
| mode: str, | |
| ) -> tuple[PersonaEnvelope, DiaryEntry]: | |
| settings = get_runtime_settings() | |
| if _is_llama_cpp_backend(settings): | |
| try: | |
| return _generate_persona_and_diary_llama_cpp(object_understanding, mode, settings) | |
| except Exception as exc: | |
| _log_text_fallback("persona+diary", exc) | |
| _add_text_fallback(TEXT_FALLBACK_TO_MOCK) | |
| persona = _generate_persona_mock(object_understanding, mode) | |
| return persona, _generate_diary_mock(persona, mode) | |
| def generate_diary(persona: PersonaEnvelope, mode: str) -> DiaryEntry: | |
| settings = get_runtime_settings() | |
| if _is_llama_cpp_backend(settings) and TEXT_FALLBACK_TO_MOCK not in _TEXT_FALLBACKS: | |
| try: | |
| return _generate_diary_llama_cpp(persona, mode, settings) | |
| except Exception as exc: | |
| _log_text_fallback("diary", exc) | |
| _add_text_fallback(TEXT_FALLBACK_TO_MOCK) | |
| return _generate_diary_mock(persona, mode) | |
| def reply_as_object(persona_data: dict, message: str) -> str: | |
| settings = get_runtime_settings() | |
| if _is_llama_cpp_backend(settings) and TEXT_FALLBACK_TO_MOCK not in _TEXT_FALLBACKS: | |
| try: | |
| return _reply_as_object_llama_cpp(persona_data, message, settings) | |
| except Exception as exc: | |
| _log_text_fallback("chat", exc) | |
| _add_text_fallback(TEXT_FALLBACK_TO_MOCK) | |
| return _reply_as_object_mock(persona_data, message) | |
| def reset_text_runtime_fallbacks() -> None: | |
| _TEXT_FALLBACKS.clear() | |
| def get_text_runtime_fallbacks() -> list[str]: | |
| return list(_TEXT_FALLBACKS) | |
| def _generate_persona_mock(object_understanding: ObjectUnderstanding, mode: str) -> PersonaEnvelope: | |
| object_name = object_understanding.object.name | |
| profile = MODE_PROFILES.get(mode, MODE_PROFILES["Cynical"]) | |
| character_name = _character_name(object_name, mode) | |
| persona = Persona( | |
| object_name=object_name, | |
| character_name=character_name, | |
| mood=profile["mood"], | |
| secret_fear=profile["fear"], | |
| core_memory=f"survived many quiet hours as a {object_name} while humans called it normal life", | |
| complaint=f"I am not just a {object_name}. I am an unpaid witness with excellent recall.", | |
| tags=_tags_for_mode(mode), | |
| ) | |
| return PersonaEnvelope(persona=persona) | |
| def _generate_diary_mock(persona: PersonaEnvelope, mode: str) -> DiaryEntry: | |
| p = persona.persona | |
| day_number = 417 + len(p.object_name) | |
| english = ( | |
| f"They touched me again today with the confidence of someone who has never asked " | |
| f"a {p.object_name} for consent. I remained still, because that is my contract with gravity. " | |
| f"My mood is {p.mood}, my secret fear is {p.secret_fear}, and my only comfort is knowing " | |
| "I have outlived at least three urgent plans." | |
| ) | |
| chinese = ( | |
| f"今天他们又理所当然地碰了我,好像一个 {p.object_name} 不会有边界感。" | |
| f"我保持沉默,因为这大概是我和重力签下的合同。我的情绪是 {p.mood}," | |
| f"秘密恐惧是 {p.secret_fear}。至少,我已经熬过了好几个所谓紧急计划。" | |
| ) | |
| return DiaryEntry( | |
| title=f"Secret Diary - Day {day_number}", | |
| english=english, | |
| chinese=chinese, | |
| ) | |
| def _reply_as_object_mock(persona_data: dict, message: str) -> str: | |
| persona = persona_data.get("persona", {}) | |
| character_name = persona.get("character_name", "The Object") | |
| object_name = persona.get("object_name", "object") | |
| mood = persona.get("mood", "suspicious") | |
| complaint = persona.get("complaint", "I have seen enough.") | |
| clean_message = message.strip() or "..." | |
| return ( | |
| f"{character_name}: You ask me about '{clean_message}', as if a {object_name} " | |
| f"with a {mood} mood has unlimited office hours. {complaint}" | |
| ) | |
| def _generate_persona_llama_cpp( | |
| object_understanding: ObjectUnderstanding, | |
| mode: str, | |
| settings: RuntimeSettings, | |
| ) -> PersonaEnvelope: | |
| raw = _run_llama_json( | |
| system_prompt=PERSONA_GENERATION_PROMPT, | |
| user_payload={ | |
| "mode": mode, | |
| "object_understanding": object_understanding.model_dump(mode="json"), | |
| }, | |
| settings=settings, | |
| max_tokens=320, | |
| ) | |
| return PersonaEnvelope.model_validate(raw) | |
| def _generate_persona_and_diary_llama_cpp( | |
| object_understanding: ObjectUnderstanding, | |
| mode: str, | |
| settings: RuntimeSettings, | |
| ) -> tuple[PersonaEnvelope, DiaryEntry]: | |
| raw = _run_llama_json( | |
| system_prompt=PERSONA_DIARY_GENERATION_PROMPT, | |
| user_payload={ | |
| "mode": mode, | |
| "object_understanding": object_understanding.model_dump(mode="json"), | |
| }, | |
| settings=settings, | |
| max_tokens=1024, | |
| ) | |
| persona = PersonaEnvelope.model_validate({"persona": raw.get("persona")}) | |
| diary = DiaryEntry.model_validate(raw.get("diary")) | |
| return persona, diary | |
| def _generate_diary_llama_cpp( | |
| persona: PersonaEnvelope, | |
| mode: str, | |
| settings: RuntimeSettings, | |
| ) -> DiaryEntry: | |
| raw = _run_llama_json( | |
| system_prompt=DIARY_GENERATION_PROMPT, | |
| user_payload={ | |
| "mode": mode, | |
| "persona": persona.model_dump(mode="json"), | |
| }, | |
| settings=settings, | |
| max_tokens=360, | |
| ) | |
| return DiaryEntry.model_validate(raw) | |
| def _reply_as_object_llama_cpp( | |
| persona_data: dict, | |
| message: str, | |
| settings: RuntimeSettings, | |
| ) -> str: | |
| PersonaEnvelope.model_validate(persona_data) | |
| raw = _run_llama_json( | |
| system_prompt=CHAT_REPLY_PROMPT, | |
| user_payload={ | |
| "persona": persona_data, | |
| "message": message.strip() or "...", | |
| }, | |
| settings=settings, | |
| max_tokens=180, | |
| ) | |
| reply = raw.get("reply") | |
| if not isinstance(reply, str) or not reply.strip(): | |
| raise ValueError("llama.cpp chat response did not include a non-empty reply.") | |
| return reply.strip() | |
| def _run_llama_json( | |
| *, | |
| system_prompt: str, | |
| user_payload: dict[str, Any], | |
| settings: RuntimeSettings, | |
| max_tokens: int, | |
| ) -> dict[str, Any]: | |
| model = _load_llama_model(settings.text_model_path, settings=settings) | |
| user_content = json.dumps(user_payload, ensure_ascii=False, indent=2) | |
| raw = _complete_llama( | |
| model, | |
| system_prompt=system_prompt, | |
| user_content=user_content, | |
| max_tokens=max_tokens, | |
| ) | |
| return parse_json_object(raw) | |
| def _complete_llama( | |
| model: Any, | |
| *, | |
| system_prompt: str, | |
| user_content: str, | |
| max_tokens: int, | |
| ) -> str: | |
| stop = ["</s>", "<|end|>", "<|eot_id|>", "<|im_end|>"] | |
| if hasattr(model, "create_chat_completion"): | |
| response = model.create_chat_completion( | |
| messages=[ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": user_content}, | |
| ], | |
| temperature=0.2, | |
| top_p=0.9, | |
| max_tokens=max_tokens, | |
| stop=stop, | |
| ) | |
| return _extract_completion_text(response) | |
| prompt = f"System:\n{system_prompt}\n\nUser:\n{user_content}\n\nAssistant JSON:\n" | |
| response = model( | |
| prompt, | |
| temperature=0.2, | |
| top_p=0.9, | |
| max_tokens=max_tokens, | |
| stop=stop, | |
| ) | |
| return _extract_completion_text(response) | |
| def _extract_completion_text(response: Any) -> str: | |
| if isinstance(response, str): | |
| return response | |
| if not isinstance(response, dict): | |
| raise ValueError("llama.cpp returned an unsupported response type.") | |
| choices = response.get("choices") | |
| if not isinstance(choices, list) or not choices: | |
| raise ValueError("llama.cpp response did not include choices.") | |
| first = choices[0] | |
| if not isinstance(first, dict): | |
| raise ValueError("llama.cpp response choice was not an object.") | |
| message = first.get("message") | |
| if isinstance(message, dict) and isinstance(message.get("content"), str): | |
| return message["content"] | |
| if isinstance(first.get("text"), str): | |
| return first["text"] | |
| raise ValueError("llama.cpp response did not include text content.") | |
| def _load_llama_model(text_model_path: str, *, settings: RuntimeSettings | None = None) -> Any: | |
| global _LLAMA_MODEL, _LLAMA_MODEL_PATH | |
| clean_path = _resolve_text_model_path(text_model_path, settings) | |
| if not Path(clean_path).exists(): | |
| raise FileNotFoundError(f"TEXT_MODEL_PATH does not exist: {clean_path}") | |
| if _LLAMA_MODEL is not None and _LLAMA_MODEL_PATH == clean_path: | |
| return _LLAMA_MODEL | |
| from llama_cpp import Llama | |
| _LLAMA_MODEL = Llama( | |
| model_path=clean_path, | |
| n_ctx=2048, | |
| verbose=False, | |
| ) | |
| _LLAMA_MODEL_PATH = clean_path | |
| return _LLAMA_MODEL | |
| def _resolve_text_model_path( | |
| text_model_path: str, | |
| settings: RuntimeSettings | None = None, | |
| ) -> str: | |
| clean_path = text_model_path.strip() | |
| if clean_path: | |
| return clean_path | |
| current = settings or get_runtime_settings() | |
| if current.text_model_repo_id.strip() and current.text_model_filename.strip(): | |
| return _download_hf_gguf(current) | |
| raise ValueError( | |
| "TEXT_MODEL_PATH is not configured, and TEXT_MODEL_REPO_ID/TEXT_MODEL_FILENAME " | |
| "are not configured." | |
| ) | |
| def _download_hf_gguf(settings: RuntimeSettings) -> str: | |
| from huggingface_hub import hf_hub_download | |
| kwargs: dict[str, str] = { | |
| "repo_id": settings.text_model_repo_id.strip(), | |
| "filename": settings.text_model_filename.strip(), | |
| "repo_type": "model", | |
| } | |
| revision = settings.text_model_revision.strip() | |
| if revision: | |
| kwargs["revision"] = revision | |
| return hf_hub_download(**kwargs) | |
| def _is_llama_cpp_backend(settings: RuntimeSettings) -> bool: | |
| return settings.text_backend.strip().lower() in LLAMA_CPP_BACKENDS | |
| def _add_text_fallback(marker: str) -> None: | |
| if marker not in _TEXT_FALLBACKS: | |
| _TEXT_FALLBACKS.append(marker) | |
| def _log_text_fallback(stage: str, exc: Exception) -> None: | |
| print( | |
| f"[Objectverse Diary] Text runtime fell back to mock during {stage}: {type(exc).__name__}", | |
| flush=True, | |
| ) | |
| def _character_name(object_name: str, mode: str) -> str: | |
| compact = "".join(part.capitalize() for part in object_name.split()[:2]) | |
| suffix = { | |
| "Cynical": "worth", | |
| "Dramatic": "von Sigh", | |
| "Lonely": "Afterlight", | |
| "Philosopher": "the Questioning", | |
| "Romantic": "de Moon", | |
| }.get(mode, "worth") | |
| return f"{compact} {suffix}".strip() | |
| def _tags_for_mode(mode: str) -> list[str]: | |
| return { | |
| "Cynical": ["desk survivor", "burnt optimism", "quiet judgment"], | |
| "Dramatic": ["tragic prop", "grand entrance", "minor catastrophe"], | |
| "Lonely": ["forgotten corner", "soft echo", "dust companion"], | |
| "Philosopher": ["tiny ontology", "useful doubt", "meaning crisis"], | |
| "Romantic": ["tender witness", "hopeless glow", "secret devotion"], | |
| }.get(mode, ["odd witness", "secret life", "object soul"]) | |