File size: 13,235 Bytes
e20e3d9
bc02199
 
 
e20e3d9
 
 
 
 
bc02199
c45600f
 
 
 
 
e20e3d9
 
bc02199
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e20e3d9
 
 
 
 
 
 
bc02199
 
e20e3d9
 
 
 
 
 
 
 
 
 
 
c45600f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e20e3d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d30bd8e
e20e3d9
 
 
 
 
 
 
 
 
 
 
 
 
bc02199
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e20e3d9
bc02199
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e20e3d9
bc02199
 
 
 
 
 
 
 
 
 
 
 
 
e20e3d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c45600f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e20e3d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c45600f
e20e3d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c45600f
 
e20e3d9
 
 
 
 
 
 
 
c45600f
 
e20e3d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c45600f
e20e3d9
 
c45600f
e20e3d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c45600f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e20e3d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bc02199
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
"""Text generation runtime with mock and optional llama.cpp backends."""

from __future__ import annotations

import json
from pathlib import Path
from typing import Any

from src.config import RuntimeSettings, get_runtime_settings
from src.models.schema import DiaryEntry, ObjectUnderstanding, Persona, PersonaEnvelope
from src.prompts.diary_generation import (
    CHAT_REPLY_PROMPT,
    DIARY_GENERATION_PROMPT,
    PERSONA_DIARY_GENERATION_PROMPT,
)
from src.prompts.persona_generation import PERSONA_GENERATION_PROMPT
from src.utils.json_repair import parse_json_object


MODE_PROFILES = {
    "Cynical": {
        "mood": "tired but sarcastic",
        "fear": "being replaced by a newer object with worse opinions",
        "voice": "dry",
    },
    "Dramatic": {
        "mood": "theatrical and wounded",
        "fear": "being forgotten before the final act",
        "voice": "operatic",
    },
    "Lonely": {
        "mood": "softly abandoned",
        "fear": "becoming invisible in plain sight",
        "voice": "quiet",
    },
    "Philosopher": {
        "mood": "curious and needlessly profound",
        "fear": "discovering that usefulness is not meaning",
        "voice": "reflective",
    },
    "Romantic": {
        "mood": "hopelessly sentimental",
        "fear": "loving a human who only sees storage capacity",
        "voice": "wistful",
    },
}

LLAMA_CPP_BACKENDS = {"llama-cpp", "llama_cpp", "llamacpp"}
TEXT_FALLBACK_TO_MOCK = "text-fallback-to-mock"

_LLAMA_MODEL: Any | None = None
_LLAMA_MODEL_PATH: str | None = None
_TEXT_FALLBACKS: list[str] = []


def generate_persona(object_understanding: ObjectUnderstanding, mode: str) -> PersonaEnvelope:
    settings = get_runtime_settings()
    if _is_llama_cpp_backend(settings):
        try:
            return _generate_persona_llama_cpp(object_understanding, mode, settings)
        except Exception as exc:
            _log_text_fallback("persona", exc)
            _add_text_fallback(TEXT_FALLBACK_TO_MOCK)

    return _generate_persona_mock(object_understanding, mode)


def generate_persona_and_diary(
    object_understanding: ObjectUnderstanding,
    mode: str,
) -> tuple[PersonaEnvelope, DiaryEntry]:
    settings = get_runtime_settings()
    if _is_llama_cpp_backend(settings):
        try:
            return _generate_persona_and_diary_llama_cpp(object_understanding, mode, settings)
        except Exception as exc:
            _log_text_fallback("persona+diary", exc)
            _add_text_fallback(TEXT_FALLBACK_TO_MOCK)

    persona = _generate_persona_mock(object_understanding, mode)
    return persona, _generate_diary_mock(persona, mode)


def generate_diary(persona: PersonaEnvelope, mode: str) -> DiaryEntry:
    settings = get_runtime_settings()
    if _is_llama_cpp_backend(settings) and TEXT_FALLBACK_TO_MOCK not in _TEXT_FALLBACKS:
        try:
            return _generate_diary_llama_cpp(persona, mode, settings)
        except Exception as exc:
            _log_text_fallback("diary", exc)
            _add_text_fallback(TEXT_FALLBACK_TO_MOCK)

    return _generate_diary_mock(persona, mode)


def reply_as_object(persona_data: dict, message: str) -> str:
    settings = get_runtime_settings()
    if _is_llama_cpp_backend(settings) and TEXT_FALLBACK_TO_MOCK not in _TEXT_FALLBACKS:
        try:
            return _reply_as_object_llama_cpp(persona_data, message, settings)
        except Exception as exc:
            _log_text_fallback("chat", exc)
            _add_text_fallback(TEXT_FALLBACK_TO_MOCK)

    return _reply_as_object_mock(persona_data, message)


def reset_text_runtime_fallbacks() -> None:
    _TEXT_FALLBACKS.clear()


def get_text_runtime_fallbacks() -> list[str]:
    return list(_TEXT_FALLBACKS)


def _generate_persona_mock(object_understanding: ObjectUnderstanding, mode: str) -> PersonaEnvelope:
    object_name = object_understanding.object.name
    profile = MODE_PROFILES.get(mode, MODE_PROFILES["Cynical"])
    character_name = _character_name(object_name, mode)

    persona = Persona(
        object_name=object_name,
        character_name=character_name,
        mood=profile["mood"],
        secret_fear=profile["fear"],
        core_memory=f"survived many quiet hours as a {object_name} while humans called it normal life",
        complaint=f"I am not just a {object_name}. I am an unpaid witness with excellent recall.",
        tags=_tags_for_mode(mode),
    )
    return PersonaEnvelope(persona=persona)


def _generate_diary_mock(persona: PersonaEnvelope, mode: str) -> DiaryEntry:
    p = persona.persona
    day_number = 417 + len(p.object_name)

    english = (
        f"They touched me again today with the confidence of someone who has never asked "
        f"a {p.object_name} for consent. I remained still, because that is my contract with gravity. "
        f"My mood is {p.mood}, my secret fear is {p.secret_fear}, and my only comfort is knowing "
        "I have outlived at least three urgent plans."
    )
    chinese = (
        f"今天他们又理所当然地碰了我,好像一个 {p.object_name} 不会有边界感。"
        f"我保持沉默,因为这大概是我和重力签下的合同。我的情绪是 {p.mood},"
        f"秘密恐惧是 {p.secret_fear}。至少,我已经熬过了好几个所谓紧急计划。"
    )

    return DiaryEntry(
        title=f"Secret Diary - Day {day_number}",
        english=english,
        chinese=chinese,
    )


def _reply_as_object_mock(persona_data: dict, message: str) -> str:
    persona = persona_data.get("persona", {})
    character_name = persona.get("character_name", "The Object")
    object_name = persona.get("object_name", "object")
    mood = persona.get("mood", "suspicious")
    complaint = persona.get("complaint", "I have seen enough.")
    clean_message = message.strip() or "..."

    return (
        f"{character_name}: You ask me about '{clean_message}', as if a {object_name} "
        f"with a {mood} mood has unlimited office hours. {complaint}"
    )


def _generate_persona_llama_cpp(
    object_understanding: ObjectUnderstanding,
    mode: str,
    settings: RuntimeSettings,
) -> PersonaEnvelope:
    raw = _run_llama_json(
        system_prompt=PERSONA_GENERATION_PROMPT,
        user_payload={
            "mode": mode,
            "object_understanding": object_understanding.model_dump(mode="json"),
        },
        settings=settings,
        max_tokens=320,
    )
    return PersonaEnvelope.model_validate(raw)


def _generate_persona_and_diary_llama_cpp(
    object_understanding: ObjectUnderstanding,
    mode: str,
    settings: RuntimeSettings,
) -> tuple[PersonaEnvelope, DiaryEntry]:
    raw = _run_llama_json(
        system_prompt=PERSONA_DIARY_GENERATION_PROMPT,
        user_payload={
            "mode": mode,
            "object_understanding": object_understanding.model_dump(mode="json"),
        },
        settings=settings,
        max_tokens=1024,
    )
    persona = PersonaEnvelope.model_validate({"persona": raw.get("persona")})
    diary = DiaryEntry.model_validate(raw.get("diary"))
    return persona, diary


def _generate_diary_llama_cpp(
    persona: PersonaEnvelope,
    mode: str,
    settings: RuntimeSettings,
) -> DiaryEntry:
    raw = _run_llama_json(
        system_prompt=DIARY_GENERATION_PROMPT,
        user_payload={
            "mode": mode,
            "persona": persona.model_dump(mode="json"),
        },
        settings=settings,
        max_tokens=360,
    )
    return DiaryEntry.model_validate(raw)


def _reply_as_object_llama_cpp(
    persona_data: dict,
    message: str,
    settings: RuntimeSettings,
) -> str:
    PersonaEnvelope.model_validate(persona_data)
    raw = _run_llama_json(
        system_prompt=CHAT_REPLY_PROMPT,
        user_payload={
            "persona": persona_data,
            "message": message.strip() or "...",
        },
        settings=settings,
        max_tokens=180,
    )
    reply = raw.get("reply")
    if not isinstance(reply, str) or not reply.strip():
        raise ValueError("llama.cpp chat response did not include a non-empty reply.")
    return reply.strip()


def _run_llama_json(
    *,
    system_prompt: str,
    user_payload: dict[str, Any],
    settings: RuntimeSettings,
    max_tokens: int,
) -> dict[str, Any]:
    model = _load_llama_model(settings.text_model_path, settings=settings)
    user_content = json.dumps(user_payload, ensure_ascii=False, indent=2)
    raw = _complete_llama(
        model,
        system_prompt=system_prompt,
        user_content=user_content,
        max_tokens=max_tokens,
    )
    return parse_json_object(raw)


def _complete_llama(
    model: Any,
    *,
    system_prompt: str,
    user_content: str,
    max_tokens: int,
) -> str:
    stop = ["</s>", "<|end|>", "<|eot_id|>", "<|im_end|>"]
    if hasattr(model, "create_chat_completion"):
        response = model.create_chat_completion(
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_content},
            ],
            temperature=0.2,
            top_p=0.9,
            max_tokens=max_tokens,
            stop=stop,
        )
        return _extract_completion_text(response)

    prompt = f"System:\n{system_prompt}\n\nUser:\n{user_content}\n\nAssistant JSON:\n"
    response = model(
        prompt,
        temperature=0.2,
        top_p=0.9,
        max_tokens=max_tokens,
        stop=stop,
    )
    return _extract_completion_text(response)


def _extract_completion_text(response: Any) -> str:
    if isinstance(response, str):
        return response
    if not isinstance(response, dict):
        raise ValueError("llama.cpp returned an unsupported response type.")

    choices = response.get("choices")
    if not isinstance(choices, list) or not choices:
        raise ValueError("llama.cpp response did not include choices.")

    first = choices[0]
    if not isinstance(first, dict):
        raise ValueError("llama.cpp response choice was not an object.")

    message = first.get("message")
    if isinstance(message, dict) and isinstance(message.get("content"), str):
        return message["content"]
    if isinstance(first.get("text"), str):
        return first["text"]
    raise ValueError("llama.cpp response did not include text content.")


def _load_llama_model(text_model_path: str, *, settings: RuntimeSettings | None = None) -> Any:
    global _LLAMA_MODEL, _LLAMA_MODEL_PATH

    clean_path = _resolve_text_model_path(text_model_path, settings)
    if not Path(clean_path).exists():
        raise FileNotFoundError(f"TEXT_MODEL_PATH does not exist: {clean_path}")

    if _LLAMA_MODEL is not None and _LLAMA_MODEL_PATH == clean_path:
        return _LLAMA_MODEL

    from llama_cpp import Llama

    _LLAMA_MODEL = Llama(
        model_path=clean_path,
        n_ctx=2048,
        verbose=False,
    )
    _LLAMA_MODEL_PATH = clean_path
    return _LLAMA_MODEL


def _resolve_text_model_path(
    text_model_path: str,
    settings: RuntimeSettings | None = None,
) -> str:
    clean_path = text_model_path.strip()
    if clean_path:
        return clean_path

    current = settings or get_runtime_settings()
    if current.text_model_repo_id.strip() and current.text_model_filename.strip():
        return _download_hf_gguf(current)

    raise ValueError(
        "TEXT_MODEL_PATH is not configured, and TEXT_MODEL_REPO_ID/TEXT_MODEL_FILENAME "
        "are not configured."
    )


def _download_hf_gguf(settings: RuntimeSettings) -> str:
    from huggingface_hub import hf_hub_download

    kwargs: dict[str, str] = {
        "repo_id": settings.text_model_repo_id.strip(),
        "filename": settings.text_model_filename.strip(),
        "repo_type": "model",
    }
    revision = settings.text_model_revision.strip()
    if revision:
        kwargs["revision"] = revision
    return hf_hub_download(**kwargs)


def _is_llama_cpp_backend(settings: RuntimeSettings) -> bool:
    return settings.text_backend.strip().lower() in LLAMA_CPP_BACKENDS


def _add_text_fallback(marker: str) -> None:
    if marker not in _TEXT_FALLBACKS:
        _TEXT_FALLBACKS.append(marker)


def _log_text_fallback(stage: str, exc: Exception) -> None:
    print(
        f"[Objectverse Diary] Text runtime fell back to mock during {stage}: {type(exc).__name__}",
        flush=True,
    )


def _character_name(object_name: str, mode: str) -> str:
    compact = "".join(part.capitalize() for part in object_name.split()[:2])
    suffix = {
        "Cynical": "worth",
        "Dramatic": "von Sigh",
        "Lonely": "Afterlight",
        "Philosopher": "the Questioning",
        "Romantic": "de Moon",
    }.get(mode, "worth")
    return f"{compact} {suffix}".strip()


def _tags_for_mode(mode: str) -> list[str]:
    return {
        "Cynical": ["desk survivor", "burnt optimism", "quiet judgment"],
        "Dramatic": ["tragic prop", "grand entrance", "minor catastrophe"],
        "Lonely": ["forgotten corner", "soft echo", "dust companion"],
        "Philosopher": ["tiny ontology", "useful doubt", "meaning crisis"],
        "Romantic": ["tender witness", "hopeless glow", "secret devotion"],
    }.get(mode, ["odd witness", "secret life", "object soul"])