Spaces:
Running
on
Zero
Running
on
Zero
File size: 5,416 Bytes
a85cd29 20edd58 a85cd29 20edd58 a9a6a82 a85cd29 20edd58 a85cd29 20edd58 a85cd29 20edd58 a85cd29 20edd58 a85cd29 20edd58 a85cd29 20edd58 a85cd29 20edd58 a85cd29 20edd58 a85cd29 20edd58 a85cd29 20edd58 a85cd29 20edd58 a85cd29 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
import gradio as gr
from PIL import Image as PILImage
import os
import json
import spaces
from typing import Optional
from pydantic import BaseModel, Field
import outlines
from outlines.inputs import Chat, Image
from transformers import Qwen3VLMoeForConditionalGeneration, AutoProcessor
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
# Define the metadata schema
class CatalogCardMetadata(BaseModel):
"""Structured metadata from a library catalog card."""
title: Optional[str] = Field(None, description="The main title or name on the card")
author: Optional[str] = Field(
None, description="Author, creator, or associated person/organization"
)
date: Optional[str] = Field(
None,
description="Any dates mentioned (publication, creation, or coverage dates)",
)
call_number: Optional[str] = Field(
None, description="Library classification or call number"
)
physical_description: Optional[str] = Field(
None, description="Details about the physical item (size, extent, format)"
)
subjects: Optional[list[str]] = Field(
None, description="Subject headings or topics"
)
notes: Optional[str] = Field(
None, description="Any additional notes or information"
)
# Load model and processor with Outlines
print("Loading Qwen3-VL-30B-A3B-Instruct model with Outlines...")
hf_model = Qwen3VLMoeForConditionalGeneration.from_pretrained(
"Qwen/Qwen3-VL-30B-A3B-Instruct", torch_dtype="auto", device_map="auto"
)
hf_processor = AutoProcessor.from_pretrained("Qwen/Qwen3-VL-30B-A3B-Instruct")
model = outlines.from_transformers(hf_model, hf_processor)
print("Model loaded successfully!")
EXTRACTION_PROMPT = """Extract all metadata from this library catalog card. Include title, author, dates, call number, physical description, subjects, and notes. If a field is not present, omit it."""
@spaces.GPU
def extract_metadata(image):
"""Extract structured metadata from catalog card image using Outlines."""
if image is None:
return "Please upload an image."
try:
# Ensure image is PIL Image
if not isinstance(image, PILImage.Image):
image = PILImage.open(image).convert("RGB")
# Set format (required by Outlines Image class)
if not image.format:
image.format = "PNG"
# Create Chat prompt with Image
prompt = Chat(
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": EXTRACTION_PROMPT},
{"type": "image", "image": Image(image)},
],
}
]
)
# Generate with structured output - guaranteed valid JSON
result = model(prompt, CatalogCardMetadata, max_new_tokens=512)
# Parse and format (always valid JSON with Outlines)
metadata = CatalogCardMetadata.model_validate_json(result)
return json.dumps(metadata.model_dump(exclude_none=True), indent=2)
except Exception as e:
return f"Error during extraction: {str(e)}"
# Create Gradio interface
with gr.Blocks(title="Library Card Metadata Extractor") as demo:
gr.Markdown("# π Library Card Metadata Extractor")
gr.Markdown(
"Extract structured metadata from library catalog cards using **Qwen3-VL-30B**. "
"Upload an image of a catalog card and get JSON-formatted metadata including title, author, dates, "
"call numbers, and more.\n\n"
"This demo works with catalog cards from libraries and archives, such as the "
"[Rubenstein Manuscript Catalog](https://huggingface.co/datasets/biglam/rubenstein-manuscript-catalog) "
"and [Boston Public Library Card Catalog](https://huggingface.co/datasets/biglam/bpl-card-catalog)."
)
gr.Markdown("---")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### π€ Upload Catalog Card")
image_input = gr.Image(label="Library Catalog Card", type="pil")
submit_btn = gr.Button("π Extract Metadata", variant="primary", size="lg")
with gr.Column(scale=1):
gr.Markdown("### π Extracted Metadata (JSON)")
output = gr.Code(label="Metadata", language="json", lines=15)
submit_btn.click(fn=extract_metadata, inputs=image_input, outputs=output)
gr.Markdown("---")
# Examples
gr.Markdown("## π― Try Examples")
gr.Examples(
examples=[
["examples/rubenstein_0.jpg"],
["examples/rubenstein_1.jpg"],
["examples/rubenstein_2.jpg"],
["examples/bpl_0.jpg"],
["examples/bpl_1.jpg"],
["examples/bpl_2.jpg"],
],
inputs=image_input,
outputs=output,
fn=extract_metadata,
cache_examples=False,
)
gr.Markdown("---")
# Footer
gr.Markdown(
"<center>\n\n"
"Built for the GLAM community using [Qwen3-VL-30B-A3B-Instruct](https://huggingface.co/Qwen/Qwen3-VL-30B-A3B-Instruct) | "
"Example cards from [Rubenstein](https://huggingface.co/datasets/biglam/rubenstein-manuscript-catalog) "
"and [BPL](https://huggingface.co/datasets/biglam/bpl-card-catalog) collections\n\n"
"</center>"
)
if __name__ == "__main__":
print("Launching demo...")
demo.launch()
|