Spaces:
Running
on
Zero
Running
on
Zero
davanstrien
HF Staff
Update example images in app.py: remove bpl_1.jpg and add new examples (bpl_4.jpg, bpl_6.jpg, bpl_8.jpg, bpl_9.jpg, bpl_12.jpg, bpl_15.jpg, bpl_22.jpg)
2b9ac6b
| import gradio as gr | |
| from PIL import Image | |
| import os | |
| import torch | |
| import json | |
| import spaces | |
| from transformers import AutoModelForImageTextToText, AutoProcessor | |
| from qwen_vl_utils import process_vision_info | |
| os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1" | |
| # Load model and processor | |
| print("Loading Qwen3-VL-30B-A3B-Instruct model...") | |
| model = AutoModelForImageTextToText.from_pretrained( | |
| "Qwen/Qwen3-VL-30B-A3B-Instruct", torch_dtype=torch.bfloat16, device_map="auto" | |
| ) | |
| processor = AutoProcessor.from_pretrained("Qwen/Qwen3-VL-30B-A3B-Instruct") | |
| print("Model loaded successfully!") | |
| EXTRACTION_PROMPT = """Extract metadata from this library catalog card as JSON. | |
| Library catalog cards contain bibliographic information about materials and filing/access information. Extract whatever fields are present: | |
| CORE BIBLIOGRAPHIC FIELDS: | |
| - title: Full title of the work | |
| - author: Main author/creator (person or organization) | |
| - editor: Editor if different from author | |
| - contributor: Other contributors (translators, illustrators, etc.) | |
| - publication_date: Date(s) of publication | |
| - publisher: Publisher name | |
| - publication_place: Place of publication | |
| - physical_description: Physical details (volumes, pages, size, illustrations) | |
| - series: Series information if part of a series | |
| - edition: Edition statement | |
| - contents: Description of contents, volumes, or parts | |
| CATALOGING/ACCESS FIELDS: | |
| - call_number: Library classification number | |
| - subject_headings: Subject terms (often numbered list) | |
| - added_entries: Additional access points for co-authors, editors, etc. (often with Roman numerals) | |
| - notes: Any additional notes | |
| CARD-SPECIFIC: | |
| - filing_heading: The heading under which this card is filed (often at top, may be in all caps) | |
| - card_sequence: If this is a continuation card (e.g., "Card 2", "Card 3") | |
| Return ONLY valid JSON. Use null for fields not present on the card. Use arrays [] for repeating fields like subject_headings and added_entries.""" | |
| def extract_metadata(image): | |
| """Extract structured metadata from catalog card image.""" | |
| if image is None: | |
| return "Please upload an image." | |
| try: | |
| # Ensure image is PIL Image | |
| if not isinstance(image, Image.Image): | |
| image = Image.open(image).convert("RGB") | |
| # Format messages for Qwen3-VL | |
| messages = [ | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "image", "image": image}, | |
| {"type": "text", "text": EXTRACTION_PROMPT}, | |
| ], | |
| } | |
| ] | |
| # Prepare inputs | |
| text = processor.apply_chat_template( | |
| messages, tokenize=False, add_generation_prompt=True | |
| ) | |
| image_inputs, video_inputs = process_vision_info(messages) | |
| inputs = processor( | |
| text=[text], | |
| images=image_inputs, | |
| videos=video_inputs, | |
| padding=True, | |
| return_tensors="pt", | |
| ) | |
| inputs = inputs.to(model.device) | |
| # Generate | |
| with torch.inference_mode(): | |
| generated_ids = model.generate( | |
| **inputs, max_new_tokens=512, temperature=0.1, do_sample=False | |
| ) | |
| # Trim input tokens from output | |
| generated_ids_trimmed = [ | |
| out_ids[len(in_ids) :] | |
| for in_ids, out_ids in zip(inputs.input_ids, generated_ids) | |
| ] | |
| # Decode output | |
| output_text = processor.batch_decode( | |
| generated_ids_trimmed, | |
| skip_special_tokens=True, | |
| clean_up_tokenization_spaces=False, | |
| )[0] | |
| # Try to parse as JSON for pretty formatting | |
| try: | |
| json_data = json.loads(output_text) | |
| return json.dumps(json_data, indent=2) | |
| except json.JSONDecodeError: | |
| # If not valid JSON, return as-is | |
| return output_text | |
| except Exception as e: | |
| return f"Error during extraction: {str(e)}" | |
| # Create Gradio interface | |
| with gr.Blocks(title="Library Card Metadata Extractor") as demo: | |
| gr.Markdown("# π Library Card Metadata Extractor") | |
| gr.Markdown( | |
| "Extract structured metadata from library catalog cards using **Qwen/Qwen3-VL-30B-A3B-Instruct**. " | |
| "Upload an image of a catalog card and get JSON-formatted metadata including title, author, dates, " | |
| "call numbers, and more.\n\n" | |
| "This demo works with catalog cards from libraries and archives, such as the " | |
| "[Rubenstein Manuscript Catalog](https://huggingface.co/datasets/biglam/rubenstein-manuscript-catalog) " | |
| "and [Boston Public Library Card Catalog](https://huggingface.co/datasets/biglam/bpl-card-catalog)." | |
| ) | |
| gr.Markdown("---") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("### π€ Upload Catalog Card") | |
| image_input = gr.Image(label="Library Catalog Card", type="pil") | |
| submit_btn = gr.Button("π Extract Metadata", variant="primary", size="lg") | |
| with gr.Column(scale=1): | |
| gr.Markdown("### π Extracted Metadata (JSON)") | |
| output = gr.Code(label="Metadata", language="json", lines=15) | |
| submit_btn.click(fn=extract_metadata, inputs=image_input, outputs=output) | |
| gr.Markdown("---") | |
| # Examples | |
| gr.Markdown("## π― Try Examples") | |
| gr.Examples( | |
| examples=[ | |
| ["examples/bpl_0.jpg"], | |
| ["examples/bpl_2.jpg"], | |
| ["examples/bpl_4.jpg"], | |
| ["examples/bpl_6.jpg"], | |
| ["examples/bpl_8.jpg"], | |
| ["examples/bpl_9.jpg"], | |
| ["examples/bpl_12.jpg"], | |
| ["examples/bpl_15.jpg"], | |
| ["examples/bpl_22.jpg"], | |
| ], | |
| inputs=image_input, | |
| outputs=output, | |
| fn=extract_metadata, | |
| cache_examples=False, | |
| ) | |
| gr.Markdown("---") | |
| # Footer | |
| gr.Markdown( | |
| "<center>\n\n" | |
| "Built for the GLAM community using [Qwen3-VL-30B-A3B-Instruct](https://huggingface.co/Qwen/Qwen3-VL-30B-A3B-Instruct) | " | |
| "Example cards from [Rubenstein](https://huggingface.co/datasets/biglam/rubenstein-manuscript-catalog) " | |
| "and [BPL](https://huggingface.co/datasets/biglam/bpl-card-catalog) collections\n\n" | |
| "</center>" | |
| ) | |
| if __name__ == "__main__": | |
| print("Launching demo...") | |
| demo.launch() | |