davanstrien's picture
davanstrien HF Staff
Update example images in app.py: remove bpl_1.jpg and add new examples (bpl_4.jpg, bpl_6.jpg, bpl_8.jpg, bpl_9.jpg, bpl_12.jpg, bpl_15.jpg, bpl_22.jpg)
2b9ac6b
import gradio as gr
from PIL import Image
import os
import torch
import json
import spaces
from transformers import AutoModelForImageTextToText, AutoProcessor
from qwen_vl_utils import process_vision_info
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
# Load model and processor
print("Loading Qwen3-VL-30B-A3B-Instruct model...")
model = AutoModelForImageTextToText.from_pretrained(
"Qwen/Qwen3-VL-30B-A3B-Instruct", torch_dtype=torch.bfloat16, device_map="auto"
)
processor = AutoProcessor.from_pretrained("Qwen/Qwen3-VL-30B-A3B-Instruct")
print("Model loaded successfully!")
EXTRACTION_PROMPT = """Extract metadata from this library catalog card as JSON.
Library catalog cards contain bibliographic information about materials and filing/access information. Extract whatever fields are present:
CORE BIBLIOGRAPHIC FIELDS:
- title: Full title of the work
- author: Main author/creator (person or organization)
- editor: Editor if different from author
- contributor: Other contributors (translators, illustrators, etc.)
- publication_date: Date(s) of publication
- publisher: Publisher name
- publication_place: Place of publication
- physical_description: Physical details (volumes, pages, size, illustrations)
- series: Series information if part of a series
- edition: Edition statement
- contents: Description of contents, volumes, or parts
CATALOGING/ACCESS FIELDS:
- call_number: Library classification number
- subject_headings: Subject terms (often numbered list)
- added_entries: Additional access points for co-authors, editors, etc. (often with Roman numerals)
- notes: Any additional notes
CARD-SPECIFIC:
- filing_heading: The heading under which this card is filed (often at top, may be in all caps)
- card_sequence: If this is a continuation card (e.g., "Card 2", "Card 3")
Return ONLY valid JSON. Use null for fields not present on the card. Use arrays [] for repeating fields like subject_headings and added_entries."""
@spaces.GPU
def extract_metadata(image):
"""Extract structured metadata from catalog card image."""
if image is None:
return "Please upload an image."
try:
# Ensure image is PIL Image
if not isinstance(image, Image.Image):
image = Image.open(image).convert("RGB")
# Format messages for Qwen3-VL
messages = [
{
"role": "user",
"content": [
{"type": "image", "image": image},
{"type": "text", "text": EXTRACTION_PROMPT},
],
}
]
# Prepare inputs
text = processor.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True
)
image_inputs, video_inputs = process_vision_info(messages)
inputs = processor(
text=[text],
images=image_inputs,
videos=video_inputs,
padding=True,
return_tensors="pt",
)
inputs = inputs.to(model.device)
# Generate
with torch.inference_mode():
generated_ids = model.generate(
**inputs, max_new_tokens=512, temperature=0.1, do_sample=False
)
# Trim input tokens from output
generated_ids_trimmed = [
out_ids[len(in_ids) :]
for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
]
# Decode output
output_text = processor.batch_decode(
generated_ids_trimmed,
skip_special_tokens=True,
clean_up_tokenization_spaces=False,
)[0]
# Try to parse as JSON for pretty formatting
try:
json_data = json.loads(output_text)
return json.dumps(json_data, indent=2)
except json.JSONDecodeError:
# If not valid JSON, return as-is
return output_text
except Exception as e:
return f"Error during extraction: {str(e)}"
# Create Gradio interface
with gr.Blocks(title="Library Card Metadata Extractor") as demo:
gr.Markdown("# πŸ“‡ Library Card Metadata Extractor")
gr.Markdown(
"Extract structured metadata from library catalog cards using **Qwen/Qwen3-VL-30B-A3B-Instruct**. "
"Upload an image of a catalog card and get JSON-formatted metadata including title, author, dates, "
"call numbers, and more.\n\n"
"This demo works with catalog cards from libraries and archives, such as the "
"[Rubenstein Manuscript Catalog](https://huggingface.co/datasets/biglam/rubenstein-manuscript-catalog) "
"and [Boston Public Library Card Catalog](https://huggingface.co/datasets/biglam/bpl-card-catalog)."
)
gr.Markdown("---")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### πŸ“€ Upload Catalog Card")
image_input = gr.Image(label="Library Catalog Card", type="pil")
submit_btn = gr.Button("πŸ” Extract Metadata", variant="primary", size="lg")
with gr.Column(scale=1):
gr.Markdown("### πŸ“‹ Extracted Metadata (JSON)")
output = gr.Code(label="Metadata", language="json", lines=15)
submit_btn.click(fn=extract_metadata, inputs=image_input, outputs=output)
gr.Markdown("---")
# Examples
gr.Markdown("## 🎯 Try Examples")
gr.Examples(
examples=[
["examples/bpl_0.jpg"],
["examples/bpl_2.jpg"],
["examples/bpl_4.jpg"],
["examples/bpl_6.jpg"],
["examples/bpl_8.jpg"],
["examples/bpl_9.jpg"],
["examples/bpl_12.jpg"],
["examples/bpl_15.jpg"],
["examples/bpl_22.jpg"],
],
inputs=image_input,
outputs=output,
fn=extract_metadata,
cache_examples=False,
)
gr.Markdown("---")
# Footer
gr.Markdown(
"<center>\n\n"
"Built for the GLAM community using [Qwen3-VL-30B-A3B-Instruct](https://huggingface.co/Qwen/Qwen3-VL-30B-A3B-Instruct) | "
"Example cards from [Rubenstein](https://huggingface.co/datasets/biglam/rubenstein-manuscript-catalog) "
"and [BPL](https://huggingface.co/datasets/biglam/bpl-card-catalog) collections\n\n"
"</center>"
)
if __name__ == "__main__":
print("Launching demo...")
demo.launch()