SwiftLens

Sleeping

App Files Files Community

SwiftLens / predict.py

Lawliet18

Add application file

6342ac4 7 months ago

raw

history blame contribute delete

3.58 kB

	# Prediction interface for Cog ⚙️
	# https://github.com/replicate/cog/blob/main/docs/python.md

	import os
	import numpy as np
	import yaml
	import torch
	from cog import BasePredictor, Input, Path

	from utils import *
	from models import instructir
	from text.models import LanguageModel, LMHead

	os.environ["TOKENIZERS_PARALLELISM"] = "false"


	class Predictor(BasePredictor):
	def setup(self) -> None:
	"""Load the model into memory to make running multiple predictions efficient"""

	LM_MODEL = "models/lm_instructir-7d.pt"
	MODEL_NAME = "models/im_instructir-7d.pt"
	device = torch.device("cpu")

	with open(os.path.join("configs/eval5d.yml"), "r") as f:
	config = yaml.safe_load(f)

	cfg = dict2namespace(config)

	torch.backends.cudnn.deterministic = True
	self.model = instructir.create_model(
	input_channels=cfg.model.in_ch,
	width=cfg.model.width,
	enc_blks=cfg.model.enc_blks,
	middle_blk_num=cfg.model.middle_blk_num,
	dec_blks=cfg.model.dec_blks,
	txtdim=cfg.model.textdim,
	)

	self.model = self.model.to(device)
	print("IMAGE MODEL CKPT:", MODEL_NAME)
	self.model.load_state_dict(
	torch.load(MODEL_NAME, map_location="cpu"), strict=True
	)

	# Initialize the LanguageModel class
	LMODEL = cfg.llm.model
	self.language_model = LanguageModel(model=LMODEL)
	self.lm_head = LMHead(
	embedding_dim=cfg.llm.model_dim,
	hidden_dim=cfg.llm.embd_dim,
	num_classes=cfg.llm.nclasses,
	)
	self.lm_head = self.lm_head # .to(device)

	print("LMHEAD MODEL CKPT:", LM_MODEL)
	self.lm_head.load_state_dict(
	torch.load(LM_MODEL, map_location="cpu"), strict=True
	)
	print("Loaded weights!")

	def predict(
	self,
	image: Path = Input(description="Input image."),
	prompt: str = Input(description="Input prompt."),
	seed: int = Input(
	description="Random seed. Leave blank to randomize the seed", default=None
	),
	) -> Path:
	"""Run a single prediction on the model"""
	if seed is None:
	seed = int.from_bytes(os.urandom(2), "big")
	print(f"Using seed: {seed}")
	seed_everything(SEED=seed)

	torch.cuda.empty_cache()
	torch.cuda.reset_peak_memory_stats()

	image = load_img(str(image))
	out_image = process_img(
	image, prompt, self.language_model, self.model, self.lm_head
	)

	out_path = "/tmp/out.png"
	saveImage(out_path, out_image)

	return Path(out_path)


	def process_img(image, prompt, language_model, model, lm_head):
	"""
	Given an image and a prompt, we run InstructIR to restore the image following the human prompt.
	image: RGB image as numpy array normalized to [0,1]
	prompt: plain python string,

	returns the restored image as numpy array.
	"""

	# Convert the image to tensor
	y = torch.Tensor(image).permute(2, 0, 1).unsqueeze(0)

	# Get the text embedding (and predicted degradation class)
	lm_embd = language_model(prompt)
	lm_embd = lm_embd # .to(device)
	text_embd, deg_pred = lm_head(lm_embd)

	# Forward pass: Paper Figure 2
	x_hat = model(y, text_embd)

	# convert the restored image <x_hat> into a np array
	restored_img = x_hat[0].permute(1, 2, 0).cpu().detach().numpy()
	restored_img = np.clip(restored_img, 0.0, 1.0)
	return restored_img