Spaces:
Running
on
Zero
Running
on
Zero
| import gradio as gr | |
| from PIL import Image as PILImage | |
| import os | |
| import json | |
| import spaces | |
| from typing import Optional | |
| from pydantic import BaseModel, Field | |
| import outlines | |
| from outlines.inputs import Chat, Image | |
| from transformers import Qwen3VLMoeForConditionalGeneration, AutoProcessor | |
| os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1" | |
| # Define the metadata schema | |
| class CatalogCardMetadata(BaseModel): | |
| """Structured metadata from a library catalog card.""" | |
| title: Optional[str] = Field(None, description="The main title or name on the card") | |
| author: Optional[str] = Field( | |
| None, description="Author, creator, or associated person/organization" | |
| ) | |
| date: Optional[str] = Field( | |
| None, | |
| description="Any dates mentioned (publication, creation, or coverage dates)", | |
| ) | |
| call_number: Optional[str] = Field( | |
| None, description="Library classification or call number" | |
| ) | |
| physical_description: Optional[str] = Field( | |
| None, description="Details about the physical item (size, extent, format)" | |
| ) | |
| subjects: Optional[list[str]] = Field( | |
| None, description="Subject headings or topics" | |
| ) | |
| notes: Optional[str] = Field( | |
| None, description="Any additional notes or information" | |
| ) | |
| # Load model and processor with Outlines | |
| print("Loading Qwen3-VL-30B-A3B-Instruct model with Outlines...") | |
| hf_model = Qwen3VLMoeForConditionalGeneration.from_pretrained( | |
| "Qwen/Qwen3-VL-30B-A3B-Instruct", torch_dtype="auto", device_map="auto" | |
| ) | |
| hf_processor = AutoProcessor.from_pretrained("Qwen/Qwen3-VL-30B-A3B-Instruct") | |
| model = outlines.from_transformers(hf_model, hf_processor) | |
| print("Model loaded successfully!") | |
| EXTRACTION_PROMPT = """Extract all metadata from this library catalog card. Include title, author, dates, call number, physical description, subjects, and notes. If a field is not present, omit it.""" | |
| def extract_metadata(image): | |
| """Extract structured metadata from catalog card image using Outlines.""" | |
| if image is None: | |
| return "Please upload an image." | |
| try: | |
| # Ensure image is PIL Image | |
| if not isinstance(image, PILImage.Image): | |
| image = PILImage.open(image).convert("RGB") | |
| # Set format (required by Outlines Image class) | |
| if not image.format: | |
| image.format = "PNG" | |
| # Create Chat prompt with Image | |
| prompt = Chat( | |
| messages=[ | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "text", "text": EXTRACTION_PROMPT}, | |
| {"type": "image", "image": Image(image)}, | |
| ], | |
| } | |
| ] | |
| ) | |
| # Generate with structured output - guaranteed valid JSON | |
| result = model(prompt, CatalogCardMetadata, max_new_tokens=512) | |
| # Parse and format (always valid JSON with Outlines) | |
| metadata = CatalogCardMetadata.model_validate_json(result) | |
| return json.dumps(metadata.model_dump(exclude_none=True), indent=2) | |
| except Exception as e: | |
| return f"Error during extraction: {str(e)}" | |
| # Create Gradio interface | |
| with gr.Blocks(title="Library Card Metadata Extractor") as demo: | |
| gr.Markdown("# π Library Card Metadata Extractor") | |
| gr.Markdown( | |
| "Extract structured metadata from library catalog cards using **Qwen3-VL-30B**. " | |
| "Upload an image of a catalog card and get JSON-formatted metadata including title, author, dates, " | |
| "call numbers, and more.\n\n" | |
| "This demo works with catalog cards from libraries and archives, such as the " | |
| "[Rubenstein Manuscript Catalog](https://huggingface.co/datasets/biglam/rubenstein-manuscript-catalog) " | |
| "and [Boston Public Library Card Catalog](https://huggingface.co/datasets/biglam/bpl-card-catalog)." | |
| ) | |
| gr.Markdown("---") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("### π€ Upload Catalog Card") | |
| image_input = gr.Image(label="Library Catalog Card", type="pil") | |
| submit_btn = gr.Button("π Extract Metadata", variant="primary", size="lg") | |
| with gr.Column(scale=1): | |
| gr.Markdown("### π Extracted Metadata (JSON)") | |
| output = gr.Code(label="Metadata", language="json", lines=15) | |
| submit_btn.click(fn=extract_metadata, inputs=image_input, outputs=output) | |
| gr.Markdown("---") | |
| # Examples | |
| gr.Markdown("## π― Try Examples") | |
| gr.Examples( | |
| examples=[ | |
| ["examples/rubenstein_0.jpg"], | |
| ["examples/rubenstein_1.jpg"], | |
| ["examples/rubenstein_2.jpg"], | |
| ["examples/bpl_0.jpg"], | |
| ["examples/bpl_1.jpg"], | |
| ["examples/bpl_2.jpg"], | |
| ], | |
| inputs=image_input, | |
| outputs=output, | |
| fn=extract_metadata, | |
| cache_examples=False, | |
| ) | |
| gr.Markdown("---") | |
| # Footer | |
| gr.Markdown( | |
| "<center>\n\n" | |
| "Built for the GLAM community using [Qwen3-VL-30B-A3B-Instruct](https://huggingface.co/Qwen/Qwen3-VL-30B-A3B-Instruct) | " | |
| "Example cards from [Rubenstein](https://huggingface.co/datasets/biglam/rubenstein-manuscript-catalog) " | |
| "and [BPL](https://huggingface.co/datasets/biglam/bpl-card-catalog) collections\n\n" | |
| "</center>" | |
| ) | |
| if __name__ == "__main__": | |
| print("Launching demo...") | |
| demo.launch() | |