File size: 8,568 Bytes
8f74b6b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d297e4a
 
 
 
 
8f74b6b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
import os
import json
from PIL import Image, ImageDraw, ImageFont

DEMO_MODE = os.environ.get("LIFELOG_DEMO", "0") == "1"

# Model IDs — swap here if needed
MODEL_TEXT = "openbmb/MiniCPM5-1B"
MODEL_VISION = "openbmb/MiniCPM-V-2_6"
MODEL_ASR = "openai/whisper-small"
MODEL_IMAGE = "black-forest-labs/FLUX.1-schnell"


def _gpu_decorator(duration=60):
    try:
        import spaces
        return spaces.GPU(duration=duration)
    except ImportError:
        return lambda fn: fn


# ---------------------------------------------------------------------------
# Demo-mode mock data
# ---------------------------------------------------------------------------
_DEMO_FOLLOW_UPS = [
    (
        "That's a significant decision. What was the specific moment or event "
        "that tipped the scales? Was there a single trigger, or has this been "
        "building for a while?"
    ),
    (
        "I see. Let's stress-test this — what does the absolute worst-case "
        "scenario look like if this doesn't work out? And on the flip side, "
        "what's the best realistic outcome in six months?"
    ),
    (
        "Last question — who else is affected by this change? Are there "
        "dependencies you need to manage — people counting on the old "
        "arrangement, or opportunities blocked until this ships?"
    ),
]

_DEMO_CATEGORIZE = json.dumps({
    "category": "career",
    "subcategory": "job_change",
    "severity": 7,
    "status_emoji": "🔧",
})

_DEMO_PREDICT = json.dumps([
    {
        "outcome": "Short-term financial pressure during the transition",
        "probability": "high",
        "valence": "negative",
        "timeframe": "months",
    },
    {
        "outcome": "New growth opportunities and skill development",
        "probability": "medium",
        "valence": "positive",
        "timeframe": "months",
    },
    {
        "outcome": "Stress and uncertainty while adjusting",
        "probability": "high",
        "valence": "negative",
        "timeframe": "weeks",
    },
    {
        "outcome": "Improved long-term career satisfaction",
        "probability": "medium",
        "valence": "positive",
        "timeframe": "years",
    },
])

_DEMO_CARD_PROMPT = (
    "A solitary figure standing at a crossroads in soft watercolor, one path "
    "leading through a dense forest, the other opening to a sunlit meadow, "
    "warm amber light breaking through clouds overhead"
)

_DEMO_IMAGE_DESC = (
    "This appears to be a formal document with professional letterhead. "
    "The key information suggests important correspondence regarding a "
    "significant life decision or career change."
)

_DEMO_PATTERN = """\
## 🔍 Debug Report: Life Pattern Analysis

### Recurring Patterns
- You tend to make major decisions after prolonged periods of dissatisfaction \
rather than proactively.
- Career decisions show a pattern of choosing growth over stability.
- You process decisions emotionally first, then rationalize afterward.

### Category Distribution
Decisions are heavily weighted toward career (60%) with relationship decisions \
as the second most common (20%). Work is your primary source of both \
satisfaction and stress.

### Prediction Accuracy
Based on resolved decisions, predictions are ~65% accurate. You tend to \
overestimate negative outcomes and underestimate how quickly you adapt.

### Risk Profile
**Moderate risk-taker.** You avoid purely speculative decisions but accept \
significant uncertainty when the upside is clear.

### 🔧 Recommended Patch
Add a 72-hour cool-down for decisions with severity > 6. Your first instincts \
are usually good, but stress-testing them before they ship to production would \
catch edge cases."""

# ---------------------------------------------------------------------------
# Model loading (skipped in demo mode)
# ---------------------------------------------------------------------------
text_model = None
text_tokenizer = None
asr_pipe = None
vision_model = None
vision_tokenizer = None
image_pipe = None

if not DEMO_MODE:
    import torch
    from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

    print("[LifeLog] Loading text model…")
    text_tokenizer = AutoTokenizer.from_pretrained(
        MODEL_TEXT, trust_remote_code=True
    )
    text_model = AutoModelForCausalLM.from_pretrained(
        MODEL_TEXT,
        torch_dtype=torch.bfloat16,
        trust_remote_code=True,
        device_map="auto",
    )

    print("[LifeLog] Loading ASR model…")
    asr_pipe = pipeline(
        "automatic-speech-recognition",
        model=MODEL_ASR,
        torch_dtype=torch.float16,
        device_map="auto",
    )

    print("[LifeLog] Loading vision model…")
    vision_tokenizer = AutoTokenizer.from_pretrained(
        MODEL_VISION, trust_remote_code=True
    )
    # Patch: MiniCPM-V's custom model class lacks all_tied_weights_keys
    # which newer transformers expects during from_pretrained.
    from transformers import PreTrainedModel
    if not hasattr(PreTrainedModel, "all_tied_weights_keys"):
        PreTrainedModel.all_tied_weights_keys = {}
    vision_model = AutoModelForCausalLM.from_pretrained(
        MODEL_VISION,
        torch_dtype=torch.bfloat16,
        trust_remote_code=True,
    )

    print("[LifeLog] Loading image generation model…")
    from diffusers import FluxPipeline

    image_pipe = FluxPipeline.from_pretrained(
        MODEL_IMAGE, torch_dtype=torch.bfloat16
    )
    image_pipe.enable_model_cpu_offload()

    print("[LifeLog] All models loaded.")


# ---------------------------------------------------------------------------
# Inference functions
# ---------------------------------------------------------------------------

@_gpu_decorator(duration=60)
def generate_text(messages: list[dict], max_tokens: int = 512) -> str:
    if DEMO_MODE:
        last = messages[-1].get("content", "") if messages else ""
        lower = last.lower()
        if "consequence" in lower or ("predict" in lower and "json" in lower):
            return _DEMO_PREDICT
        if "category" in lower and "json" in lower:
            return _DEMO_CATEGORIZE
        if "image prompt" in lower or "moment card" in lower:
            return _DEMO_CARD_PROMPT
        if "pattern" in lower or "debug report" in lower:
            return _DEMO_PATTERN
        if "#1" in last:
            return _DEMO_FOLLOW_UPS[0]
        if "#2" in last:
            return _DEMO_FOLLOW_UPS[1]
        if "#3" in last:
            return _DEMO_FOLLOW_UPS[2]
        return _DEMO_FOLLOW_UPS[0]

    text = text_tokenizer.apply_chat_template(
        messages, tokenize=False, add_generation_prompt=True
    )
    inputs = text_tokenizer([text], return_tensors="pt").to(text_model.device)
    output_ids = text_model.generate(
        **inputs, max_new_tokens=max_tokens, temperature=0.7, do_sample=True
    )
    output_ids = output_ids[:, inputs.input_ids.shape[-1]:]
    return text_tokenizer.decode(output_ids[0], skip_special_tokens=True)


@_gpu_decorator(duration=30)
def transcribe_audio(audio_path: str) -> str:
    if DEMO_MODE:
        return "I decided to leave my current job and pursue freelancing full-time."
    result = asr_pipe(audio_path)
    return result["text"]


@_gpu_decorator(duration=60)
def describe_image(image_path: str, question: str) -> str:
    if DEMO_MODE:
        return _DEMO_IMAGE_DESC

    image = Image.open(image_path).convert("RGB")
    msgs = [{"role": "user", "content": question}]
    response = vision_model.chat(
        image=image, msgs=msgs, tokenizer=vision_tokenizer
    )
    return response


@_gpu_decorator(duration=120)
def generate_moment_card(prompt: str) -> Image.Image:
    if DEMO_MODE:
        img = Image.new("RGB", (512, 512), color=(22, 27, 34))
        draw = ImageDraw.Draw(img)
        try:
            font = ImageFont.truetype("arial.ttf", 18)
        except OSError:
            font = ImageFont.load_default()
        draw.multiline_text(
            (256, 230),
            "Moment Card\n(Demo Mode)",
            fill=(34, 197, 94),
            font=font,
            anchor="mm",
            align="center",
        )
        draw.rectangle([20, 20, 492, 492], outline=(48, 54, 61), width=2)
        return img

    import torch

    image = image_pipe(
        prompt=prompt,
        height=512,
        width=512,
        guidance_scale=0.0,
        num_inference_steps=4,
        max_sequence_length=256,
        generator=torch.Generator(device="cpu").manual_seed(0),
    ).images[0]
    return image