| | --- |
| | base_model: ragavsachdeva/magiv3 |
| | --- |
| | |
| | # Magiv3 |
| |
|
| | A model for comics understanding. |
| |
|
| | ## DISCLAIMER |
| |
|
| | This is a model duplicated from [ragavsachdeva](https://huggingface.co/ragavsachdeva/magiv3). Please refer to the original model or its [paper](https://arxiv.org/abs/2503.23344) for more information. |
| |
|
| | ## Usage |
| |
|
| | ```python |
| | from transformers import AutoModelForCausalLM, AutoProcessor |
| | from PIL import Image |
| | import numpy as np |
| | import torch |
| | |
| | def load_image(path): |
| | with open(path, "rb") as file: |
| | image = Image.open(file).convert("L").convert("RGB") |
| | image = np.array(image) |
| | |
| | return image |
| | |
| | images = ["01.jpg", "02.jpg"] |
| | images = [load_image(image) for image in images] |
| | |
| | # All panels from images, not provided by model |
| | panels = splitImagesToPanels(images) |
| | |
| | # The generated captions for each panels, not provided by model |
| | captions = generateCaptionsFromPanels(panels) |
| | |
| | model = AutoModelForCausalLM.from_pretrained('mrfish233/magiv3', torch_dtype=torch.float16, trust_remote_code=True).cuda().eval() |
| | processor = AutoProcessor.from_pretrained('mrfish233/magiv3', trust_remote_code=True) |
| | |
| | with torch.no_grad(): |
| | # detections from |
| | detections = model.predict_detections_and_associations(images, processor) |
| | |
| | # OCR for each page |
| | ocr_results = model.predict_ocr(images, processor) |
| | |
| | # get character grounding with captions provided |
| | grounding = model.predict_character_grounding(panels, captions, processor) |
| | ``` |
| |
|