import os import json from PIL import Image, ImageDraw, ImageFont DEMO_MODE = os.environ.get("LIFELOG_DEMO", "0") == "1" # Model IDs — swap here if needed MODEL_TEXT = "openbmb/MiniCPM5-1B" MODEL_VISION = "openbmb/MiniCPM-V-2_6" MODEL_ASR = "openai/whisper-small" MODEL_IMAGE = "black-forest-labs/FLUX.1-schnell" def _gpu_decorator(duration=60): try: import spaces return spaces.GPU(duration=duration) except ImportError: return lambda fn: fn # --------------------------------------------------------------------------- # Demo-mode mock data # --------------------------------------------------------------------------- _DEMO_FOLLOW_UPS = [ ( "That's a significant decision. What was the specific moment or event " "that tipped the scales? Was there a single trigger, or has this been " "building for a while?" ), ( "I see. Let's stress-test this — what does the absolute worst-case " "scenario look like if this doesn't work out? And on the flip side, " "what's the best realistic outcome in six months?" ), ( "Last question — who else is affected by this change? Are there " "dependencies you need to manage — people counting on the old " "arrangement, or opportunities blocked until this ships?" ), ] _DEMO_CATEGORIZE = json.dumps({ "category": "career", "subcategory": "job_change", "severity": 7, "status_emoji": "🔧", }) _DEMO_PREDICT = json.dumps([ { "outcome": "Short-term financial pressure during the transition", "probability": "high", "valence": "negative", "timeframe": "months", }, { "outcome": "New growth opportunities and skill development", "probability": "medium", "valence": "positive", "timeframe": "months", }, { "outcome": "Stress and uncertainty while adjusting", "probability": "high", "valence": "negative", "timeframe": "weeks", }, { "outcome": "Improved long-term career satisfaction", "probability": "medium", "valence": "positive", "timeframe": "years", }, ]) _DEMO_CARD_PROMPT = ( "A solitary figure standing at a crossroads in soft watercolor, one path " "leading through a dense forest, the other opening to a sunlit meadow, " "warm amber light breaking through clouds overhead" ) _DEMO_IMAGE_DESC = ( "This appears to be a formal document with professional letterhead. " "The key information suggests important correspondence regarding a " "significant life decision or career change." ) _DEMO_PATTERN = """\ ## 🔍 Debug Report: Life Pattern Analysis ### Recurring Patterns - You tend to make major decisions after prolonged periods of dissatisfaction \ rather than proactively. - Career decisions show a pattern of choosing growth over stability. - You process decisions emotionally first, then rationalize afterward. ### Category Distribution Decisions are heavily weighted toward career (60%) with relationship decisions \ as the second most common (20%). Work is your primary source of both \ satisfaction and stress. ### Prediction Accuracy Based on resolved decisions, predictions are ~65% accurate. You tend to \ overestimate negative outcomes and underestimate how quickly you adapt. ### Risk Profile **Moderate risk-taker.** You avoid purely speculative decisions but accept \ significant uncertainty when the upside is clear. ### 🔧 Recommended Patch Add a 72-hour cool-down for decisions with severity > 6. Your first instincts \ are usually good, but stress-testing them before they ship to production would \ catch edge cases.""" # --------------------------------------------------------------------------- # Model loading (skipped in demo mode) # --------------------------------------------------------------------------- text_model = None text_tokenizer = None asr_pipe = None vision_model = None vision_tokenizer = None image_pipe = None if not DEMO_MODE: import torch from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline print("[LifeLog] Loading text model…") text_tokenizer = AutoTokenizer.from_pretrained( MODEL_TEXT, trust_remote_code=True ) text_model = AutoModelForCausalLM.from_pretrained( MODEL_TEXT, torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto", ) print("[LifeLog] Loading ASR model…") asr_pipe = pipeline( "automatic-speech-recognition", model=MODEL_ASR, torch_dtype=torch.float16, device_map="auto", ) print("[LifeLog] Loading vision model…") vision_tokenizer = AutoTokenizer.from_pretrained( MODEL_VISION, trust_remote_code=True ) # Patch: MiniCPM-V's custom model class lacks all_tied_weights_keys # which newer transformers expects during from_pretrained. from transformers import PreTrainedModel if not hasattr(PreTrainedModel, "all_tied_weights_keys"): PreTrainedModel.all_tied_weights_keys = {} vision_model = AutoModelForCausalLM.from_pretrained( MODEL_VISION, torch_dtype=torch.bfloat16, trust_remote_code=True, ) print("[LifeLog] Loading image generation model…") from diffusers import FluxPipeline image_pipe = FluxPipeline.from_pretrained( MODEL_IMAGE, torch_dtype=torch.bfloat16 ) image_pipe.enable_model_cpu_offload() print("[LifeLog] All models loaded.") # --------------------------------------------------------------------------- # Inference functions # --------------------------------------------------------------------------- @_gpu_decorator(duration=60) def generate_text(messages: list[dict], max_tokens: int = 512) -> str: if DEMO_MODE: last = messages[-1].get("content", "") if messages else "" lower = last.lower() if "consequence" in lower or ("predict" in lower and "json" in lower): return _DEMO_PREDICT if "category" in lower and "json" in lower: return _DEMO_CATEGORIZE if "image prompt" in lower or "moment card" in lower: return _DEMO_CARD_PROMPT if "pattern" in lower or "debug report" in lower: return _DEMO_PATTERN if "#1" in last: return _DEMO_FOLLOW_UPS[0] if "#2" in last: return _DEMO_FOLLOW_UPS[1] if "#3" in last: return _DEMO_FOLLOW_UPS[2] return _DEMO_FOLLOW_UPS[0] text = text_tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) inputs = text_tokenizer([text], return_tensors="pt").to(text_model.device) output_ids = text_model.generate( **inputs, max_new_tokens=max_tokens, temperature=0.7, do_sample=True ) output_ids = output_ids[:, inputs.input_ids.shape[-1]:] return text_tokenizer.decode(output_ids[0], skip_special_tokens=True) @_gpu_decorator(duration=30) def transcribe_audio(audio_path: str) -> str: if DEMO_MODE: return "I decided to leave my current job and pursue freelancing full-time." result = asr_pipe(audio_path) return result["text"] @_gpu_decorator(duration=60) def describe_image(image_path: str, question: str) -> str: if DEMO_MODE: return _DEMO_IMAGE_DESC image = Image.open(image_path).convert("RGB") msgs = [{"role": "user", "content": question}] response = vision_model.chat( image=image, msgs=msgs, tokenizer=vision_tokenizer ) return response @_gpu_decorator(duration=120) def generate_moment_card(prompt: str) -> Image.Image: if DEMO_MODE: img = Image.new("RGB", (512, 512), color=(22, 27, 34)) draw = ImageDraw.Draw(img) try: font = ImageFont.truetype("arial.ttf", 18) except OSError: font = ImageFont.load_default() draw.multiline_text( (256, 230), "Moment Card\n(Demo Mode)", fill=(34, 197, 94), font=font, anchor="mm", align="center", ) draw.rectangle([20, 20, 492, 492], outline=(48, 54, 61), width=2) return img import torch image = image_pipe( prompt=prompt, height=512, width=512, guidance_scale=0.0, num_inference_steps=4, max_sequence_length=256, generator=torch.Generator(device="cpu").manual_seed(0), ).images[0] return image