Spaces:

build-small-hackathon
/

LifeLog

Runtime error

App Files Files Community

LifeLog / models.py

arunsa

Fix MiniCPM-V loading: patch missing all_tied_weights_keys on PreTrainedModel

d297e4a 2 days ago

Raw

History Blame Contribute Delete

8.57 kB

	import os
	import json
	from PIL import Image, ImageDraw, ImageFont

	DEMO_MODE = os.environ.get("LIFELOG_DEMO", "0") == "1"

	# Model IDs — swap here if needed
	MODEL_TEXT = "openbmb/MiniCPM5-1B"
	MODEL_VISION = "openbmb/MiniCPM-V-2_6"
	MODEL_ASR = "openai/whisper-small"
	MODEL_IMAGE = "black-forest-labs/FLUX.1-schnell"


	def _gpu_decorator(duration=60):
	try:
	import spaces
	return spaces.GPU(duration=duration)
	except ImportError:
	return lambda fn: fn


	# ---------------------------------------------------------------------------
	# Demo-mode mock data
	# ---------------------------------------------------------------------------
	_DEMO_FOLLOW_UPS = [
	(
	"That's a significant decision. What was the specific moment or event "
	"that tipped the scales? Was there a single trigger, or has this been "
	"building for a while?"
	),
	(
	"I see. Let's stress-test this — what does the absolute worst-case "
	"scenario look like if this doesn't work out? And on the flip side, "
	"what's the best realistic outcome in six months?"
	),
	(
	"Last question — who else is affected by this change? Are there "
	"dependencies you need to manage — people counting on the old "
	"arrangement, or opportunities blocked until this ships?"
	),
	]

	_DEMO_CATEGORIZE = json.dumps({
	"category": "career",
	"subcategory": "job_change",
	"severity": 7,
	"status_emoji": "🔧",
	})

	_DEMO_PREDICT = json.dumps([
	{
	"outcome": "Short-term financial pressure during the transition",
	"probability": "high",
	"valence": "negative",
	"timeframe": "months",
	},
	{
	"outcome": "New growth opportunities and skill development",
	"probability": "medium",
	"valence": "positive",
	"timeframe": "months",
	},
	{
	"outcome": "Stress and uncertainty while adjusting",
	"probability": "high",
	"valence": "negative",
	"timeframe": "weeks",
	},
	{
	"outcome": "Improved long-term career satisfaction",
	"probability": "medium",
	"valence": "positive",
	"timeframe": "years",
	},
	])

	_DEMO_CARD_PROMPT = (
	"A solitary figure standing at a crossroads in soft watercolor, one path "
	"leading through a dense forest, the other opening to a sunlit meadow, "
	"warm amber light breaking through clouds overhead"
	)

	_DEMO_IMAGE_DESC = (
	"This appears to be a formal document with professional letterhead. "
	"The key information suggests important correspondence regarding a "
	"significant life decision or career change."
	)

	_DEMO_PATTERN = """\
	## 🔍 Debug Report: Life Pattern Analysis

	### Recurring Patterns
	- You tend to make major decisions after prolonged periods of dissatisfaction \
	rather than proactively.
	- Career decisions show a pattern of choosing growth over stability.
	- You process decisions emotionally first, then rationalize afterward.

	### Category Distribution
	Decisions are heavily weighted toward career (60%) with relationship decisions \
	as the second most common (20%). Work is your primary source of both \
	satisfaction and stress.

	### Prediction Accuracy
	Based on resolved decisions, predictions are ~65% accurate. You tend to \
	overestimate negative outcomes and underestimate how quickly you adapt.

	### Risk Profile
	Moderate risk-taker. You avoid purely speculative decisions but accept \
	significant uncertainty when the upside is clear.

	### 🔧 Recommended Patch
	Add a 72-hour cool-down for decisions with severity > 6. Your first instincts \
	are usually good, but stress-testing them before they ship to production would \
	catch edge cases."""

	# ---------------------------------------------------------------------------
	# Model loading (skipped in demo mode)
	# ---------------------------------------------------------------------------
	text_model = None
	text_tokenizer = None
	asr_pipe = None
	vision_model = None
	vision_tokenizer = None
	image_pipe = None

	if not DEMO_MODE:
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

	print("[LifeLog] Loading text model…")
	text_tokenizer = AutoTokenizer.from_pretrained(
	MODEL_TEXT, trust_remote_code=True
	)
	text_model = AutoModelForCausalLM.from_pretrained(
	MODEL_TEXT,
	torch_dtype=torch.bfloat16,
	trust_remote_code=True,
	device_map="auto",
	)

	print("[LifeLog] Loading ASR model…")
	asr_pipe = pipeline(
	"automatic-speech-recognition",
	model=MODEL_ASR,
	torch_dtype=torch.float16,
	device_map="auto",
	)

	print("[LifeLog] Loading vision model…")
	vision_tokenizer = AutoTokenizer.from_pretrained(
	MODEL_VISION, trust_remote_code=True
	)
	# Patch: MiniCPM-V's custom model class lacks all_tied_weights_keys
	# which newer transformers expects during from_pretrained.
	from transformers import PreTrainedModel
	if not hasattr(PreTrainedModel, "all_tied_weights_keys"):
	PreTrainedModel.all_tied_weights_keys = {}
	vision_model = AutoModelForCausalLM.from_pretrained(
	MODEL_VISION,
	torch_dtype=torch.bfloat16,
	trust_remote_code=True,
	)

	print("[LifeLog] Loading image generation model…")
	from diffusers import FluxPipeline

	image_pipe = FluxPipeline.from_pretrained(
	MODEL_IMAGE, torch_dtype=torch.bfloat16
	)
	image_pipe.enable_model_cpu_offload()

	print("[LifeLog] All models loaded.")


	# ---------------------------------------------------------------------------
	# Inference functions
	# ---------------------------------------------------------------------------

	@_gpu_decorator(duration=60)
	def generate_text(messages: list[dict], max_tokens: int = 512) -> str:
	if DEMO_MODE:
	last = messages[-1].get("content", "") if messages else ""
	lower = last.lower()
	if "consequence" in lower or ("predict" in lower and "json" in lower):
	return _DEMO_PREDICT
	if "category" in lower and "json" in lower:
	return _DEMO_CATEGORIZE
	if "image prompt" in lower or "moment card" in lower:
	return _DEMO_CARD_PROMPT
	if "pattern" in lower or "debug report" in lower:
	return _DEMO_PATTERN
	if "#1" in last:
	return _DEMO_FOLLOW_UPS[0]
	if "#2" in last:
	return _DEMO_FOLLOW_UPS[1]
	if "#3" in last:
	return _DEMO_FOLLOW_UPS[2]
	return _DEMO_FOLLOW_UPS[0]

	text = text_tokenizer.apply_chat_template(
	messages, tokenize=False, add_generation_prompt=True
	)
	inputs = text_tokenizer([text], return_tensors="pt").to(text_model.device)
	output_ids = text_model.generate(
	**inputs, max_new_tokens=max_tokens, temperature=0.7, do_sample=True
	)
	output_ids = output_ids[:, inputs.input_ids.shape[-1]:]
	return text_tokenizer.decode(output_ids[0], skip_special_tokens=True)


	@_gpu_decorator(duration=30)
	def transcribe_audio(audio_path: str) -> str:
	if DEMO_MODE:
	return "I decided to leave my current job and pursue freelancing full-time."
	result = asr_pipe(audio_path)
	return result["text"]


	@_gpu_decorator(duration=60)
	def describe_image(image_path: str, question: str) -> str:
	if DEMO_MODE:
	return _DEMO_IMAGE_DESC

	image = Image.open(image_path).convert("RGB")
	msgs = [{"role": "user", "content": question}]
	response = vision_model.chat(
	image=image, msgs=msgs, tokenizer=vision_tokenizer
	)
	return response


	@_gpu_decorator(duration=120)
	def generate_moment_card(prompt: str) -> Image.Image:
	if DEMO_MODE:
	img = Image.new("RGB", (512, 512), color=(22, 27, 34))
	draw = ImageDraw.Draw(img)
	try:
	font = ImageFont.truetype("arial.ttf", 18)
	except OSError:
	font = ImageFont.load_default()
	draw.multiline_text(
	(256, 230),
	"Moment Card\n(Demo Mode)",
	fill=(34, 197, 94),
	font=font,
	anchor="mm",
	align="center",
	)
	draw.rectangle([20, 20, 492, 492], outline=(48, 54, 61), width=2)
	return img

	import torch

	image = image_pipe(
	prompt=prompt,
	height=512,
	width=512,
	guidance_scale=0.0,
	num_inference_steps=4,
	max_sequence_length=256,
	generator=torch.Generator(device="cpu").manual_seed(0),
	).images[0]
	return image