Spaces:
Running on Zero
Running on Zero
| from __future__ import annotations | |
| import traceback | |
| from functools import lru_cache | |
| import gradio as gr | |
| import spaces | |
| import torch | |
| from transformers import AutoModelForImageTextToText, AutoProcessor | |
| MODEL_ID = "openbmb/MiniCPM-V-4.6" | |
| PROMPTS = { | |
| "front": ( | |
| "Transcribe only visibly printed marketing claims from this food packet front. " | |
| "Preserve exact claims and do not explain or infer." | |
| ), | |
| "back": ( | |
| "Transcribe only visibly printed food-label evidence. Focus on ingredients, nutrition values " | |
| "and basis, net weight, FSSAI license, dates, and after-opening instructions. For nutrition " | |
| "tables, preserve every visible row as 'nutrient name | unit | value', include the declared " | |
| "basis, and do not omit zero values. Do not summarize or infer." | |
| ), | |
| } | |
| def load_model(): | |
| processor = AutoProcessor.from_pretrained(MODEL_ID) | |
| model = AutoModelForImageTextToText.from_pretrained( | |
| MODEL_ID, | |
| torch_dtype="auto", | |
| device_map="auto", | |
| ) | |
| model.eval() | |
| return processor, model | |
| def extract_label(image_path: str | None, side: str) -> str: | |
| if image_path is None: | |
| return "No image supplied." | |
| try: | |
| processor, model = load_model() | |
| messages = [ | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "image", "url": image_path}, | |
| {"type": "text", "text": PROMPTS[side]}, | |
| ], | |
| } | |
| ] | |
| inputs = processor.apply_chat_template( | |
| messages, | |
| tokenize=True, | |
| add_generation_prompt=True, | |
| return_dict=True, | |
| return_tensors="pt", | |
| downsample_mode="4x", | |
| max_slice_nums=36, | |
| ).to(model.device) | |
| generated = model.generate(**inputs, downsample_mode="4x", max_new_tokens=512, do_sample=False) | |
| trimmed = [output[len(source) :] for source, output in zip(inputs.input_ids, generated)] | |
| return processor.batch_decode(trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0].strip() | |
| except Exception as exc: | |
| return f"PACKETCOURT_VISION_ERROR: {type(exc).__name__}: {exc}\n{traceback.format_exc(limit=4)}" | |
| demo = gr.Interface( | |
| fn=extract_label, | |
| inputs=[ | |
| gr.Image(type="filepath", label="Packet label photo"), | |
| gr.Radio(["front", "back"], value="back", label="Packet side"), | |
| ], | |
| outputs=gr.Textbox(label="Visible label evidence"), | |
| title="PacketCourt Vision", | |
| description="OpenBMB MiniCPM-V-4.6 evidence transcription service for PacketCourt.", | |
| flagging_mode="never", | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |