Spaces:

build-small-hackathon
/

LifeLog

Runtime error

File size: 8,568 Bytes

import os
import json
from PIL import Image, ImageDraw, ImageFont

DEMO_MODE = os.environ.get("LIFELOG_DEMO", "0") == "1"

# Model IDs — swap here if needed
MODEL_TEXT = "openbmb/MiniCPM5-1B"
MODEL_VISION = "openbmb/MiniCPM-V-2_6"
MODEL_ASR = "openai/whisper-small"
MODEL_IMAGE = "black-forest-labs/FLUX.1-schnell"


def _gpu_decorator(duration=60):
    try:
        import spaces
        return spaces.GPU(duration=duration)
    except ImportError:
        return lambda fn: fn


# ---------------------------------------------------------------------------
# Demo-mode mock data
# ---------------------------------------------------------------------------
_DEMO_FOLLOW_UPS = [
    (
        "That's a significant decision. What was the specific moment or event "
        "that tipped the scales? Was there a single trigger, or has this been "
        "building for a while?"
    ),
    (
        "I see. Let's stress-test this — what does the absolute worst-case "
        "scenario look like if this doesn't work out? And on the flip side, "
        "what's the best realistic outcome in six months?"
    ),
    (
        "Last question — who else is affected by this change? Are there "
        "dependencies you need to manage — people counting on the old "
        "arrangement, or opportunities blocked until this ships?"
    ),
]

_DEMO_CATEGORIZE = json.dumps({
    "category": "career",
    "subcategory": "job_change",
    "severity": 7,
    "status_emoji": "🔧",
})

_DEMO_PREDICT = json.dumps([
    {
        "outcome": "Short-term financial pressure during the transition",
        "probability": "high",
        "valence": "negative",
        "timeframe": "months",
    },
    {
        "outcome": "New growth opportunities and skill development",
        "probability": "medium",
        "valence": "positive",
        "timeframe": "months",
    },
    {
        "outcome": "Stress and uncertainty while adjusting",
        "probability": "high",
        "valence": "negative",
        "timeframe": "weeks",
    },
    {
        "outcome": "Improved long-term career satisfaction",
        "probability": "medium",
        "valence": "positive",
        "timeframe": "years",
    },
])

_DEMO_CARD_PROMPT = (
    "A solitary figure standing at a crossroads in soft watercolor, one path "
    "leading through a dense forest, the other opening to a sunlit meadow, "
    "warm amber light breaking through clouds overhead"
)

_DEMO_IMAGE_DESC = (
    "This appears to be a formal document with professional letterhead. "
    "The key information suggests important correspondence regarding a "
    "significant life decision or career change."
)

_DEMO_PATTERN = """\
## 🔍 Debug Report: Life Pattern Analysis

### Recurring Patterns
- You tend to make major decisions after prolonged periods of dissatisfaction \
rather than proactively.
- Career decisions show a pattern of choosing growth over stability.
- You process decisions emotionally first, then rationalize afterward.

### Category Distribution
Decisions are heavily weighted toward career (60%) with relationship decisions \
as the second most common (20%). Work is your primary source of both \
satisfaction and stress.

### Prediction Accuracy
Based on resolved decisions, predictions are ~65% accurate. You tend to \
overestimate negative outcomes and underestimate how quickly you adapt.

### Risk Profile
**Moderate risk-taker.** You avoid purely speculative decisions but accept \
significant uncertainty when the upside is clear.

### 🔧 Recommended Patch
Add a 72-hour cool-down for decisions with severity > 6. Your first instincts \
are usually good, but stress-testing them before they ship to production would \
catch edge cases."""

# ---------------------------------------------------------------------------
# Model loading (skipped in demo mode)
# ---------------------------------------------------------------------------
text_model = None
text_tokenizer = None
asr_pipe = None
vision_model = None
vision_tokenizer = None
image_pipe = None

if not DEMO_MODE:
    import torch
    from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

    print("[LifeLog] Loading text model…")
    text_tokenizer = AutoTokenizer.from_pretrained(
        MODEL_TEXT, trust_remote_code=True
    )
    text_model = AutoModelForCausalLM.from_pretrained(
        MODEL_TEXT,
        torch_dtype=torch.bfloat16,
        trust_remote_code=True,
        device_map="auto",
    )

    print("[LifeLog] Loading ASR model…")
    asr_pipe = pipeline(
        "automatic-speech-recognition",
        model=MODEL_ASR,
        torch_dtype=torch.float16,
        device_map="auto",
    )

    print("[LifeLog] Loading vision model…")
    vision_tokenizer = AutoTokenizer.from_pretrained(
        MODEL_VISION, trust_remote_code=True
    )
    # Patch: MiniCPM-V's custom model class lacks all_tied_weights_keys
    # which newer transformers expects during from_pretrained.
    from transformers import PreTrainedModel
    if not hasattr(PreTrainedModel, "all_tied_weights_keys"):
        PreTrainedModel.all_tied_weights_keys = {}
    vision_model = AutoModelForCausalLM.from_pretrained(
        MODEL_VISION,
        torch_dtype=torch.bfloat16,
        trust_remote_code=True,
    )

    print("[LifeLog] Loading image generation model…")
    from diffusers import FluxPipeline

    image_pipe = FluxPipeline.from_pretrained(
        MODEL_IMAGE, torch_dtype=torch.bfloat16
    )
    image_pipe.enable_model_cpu_offload()

    print("[LifeLog] All models loaded.")


# ---------------------------------------------------------------------------
# Inference functions
# ---------------------------------------------------------------------------

@_gpu_decorator(duration=60)
def generate_text(messages: list[dict], max_tokens: int = 512) -> str:
    if DEMO_MODE:
        last = messages[-1].get("content", "") if messages else ""
        lower = last.lower()
        if "consequence" in lower or ("predict" in lower and "json" in lower):
            return _DEMO_PREDICT
        if "category" in lower and "json" in lower:
            return _DEMO_CATEGORIZE
        if "image prompt" in lower or "moment card" in lower:
            return _DEMO_CARD_PROMPT
        if "pattern" in lower or "debug report" in lower:
            return _DEMO_PATTERN
        if "#1" in last:
            return _DEMO_FOLLOW_UPS[0]
        if "#2" in last:
            return _DEMO_FOLLOW_UPS[1]
        if "#3" in last:
            return _DEMO_FOLLOW_UPS[2]
        return _DEMO_FOLLOW_UPS[0]

    text = text_tokenizer.apply_chat_template(
        messages, tokenize=False, add_generation_prompt=True
    )
    inputs = text_tokenizer([text], return_tensors="pt").to(text_model.device)
    output_ids = text_model.generate(
        **inputs, max_new_tokens=max_tokens, temperature=0.7, do_sample=True
    )
    output_ids = output_ids[:, inputs.input_ids.shape[-1]:]
    return text_tokenizer.decode(output_ids[0], skip_special_tokens=True)


@_gpu_decorator(duration=30)
def transcribe_audio(audio_path: str) -> str:
    if DEMO_MODE:
        return "I decided to leave my current job and pursue freelancing full-time."
    result = asr_pipe(audio_path)
    return result["text"]


@_gpu_decorator(duration=60)
def describe_image(image_path: str, question: str) -> str:
    if DEMO_MODE:
        return _DEMO_IMAGE_DESC

    image = Image.open(image_path).convert("RGB")
    msgs = [{"role": "user", "content": question}]
    response = vision_model.chat(
        image=image, msgs=msgs, tokenizer=vision_tokenizer
    )
    return response


@_gpu_decorator(duration=120)
def generate_moment_card(prompt: str) -> Image.Image:
    if DEMO_MODE:
        img = Image.new("RGB", (512, 512), color=(22, 27, 34))
        draw = ImageDraw.Draw(img)
        try:
            font = ImageFont.truetype("arial.ttf", 18)
        except OSError:
            font = ImageFont.load_default()
        draw.multiline_text(
            (256, 230),
            "Moment Card\n(Demo Mode)",
            fill=(34, 197, 94),
            font=font,
            anchor="mm",
            align="center",
        )
        draw.rectangle([20, 20, 492, 492], outline=(48, 54, 61), width=2)
        return img

    import torch

    image = image_pipe(
        prompt=prompt,
        height=512,
        width=512,
        guidance_scale=0.0,
        num_inference_steps=4,
        max_sequence_length=256,
        generator=torch.Generator(device="cpu").manual_seed(0),
    ).images[0]
    return image