From Panels to Prose: Generating Literary Narratives from Comics
Paper
•
2503.23344
•
Published
A model for comics understanding.
This is a model duplicated from ragavsachdeva. Please refer to the original model or its paper for more information.
from transformers import AutoModelForCausalLM, AutoProcessor
from PIL import Image
import numpy as np
import torch
def load_image(path):
with open(path, "rb") as file:
image = Image.open(file).convert("L").convert("RGB")
image = np.array(image)
return image
images = ["01.jpg", "02.jpg"]
images = [load_image(image) for image in images]
# All panels from images, not provided by model
panels = splitImagesToPanels(images)
# The generated captions for each panels, not provided by model
captions = generateCaptionsFromPanels(panels)
model = AutoModelForCausalLM.from_pretrained('mrfish233/magiv3', torch_dtype=torch.float16, trust_remote_code=True).cuda().eval()
processor = AutoProcessor.from_pretrained('mrfish233/magiv3', trust_remote_code=True)
with torch.no_grad():
# detections from
detections = model.predict_detections_and_associations(images, processor)
# OCR for each page
ocr_results = model.predict_ocr(images, processor)
# get character grounding with captions provided
grounding = model.predict_character_grounding(panels, captions, processor)
Base model
ragavsachdeva/magiv3