Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from transformers import AutoProcessor, AutoModelForCausalLM | |
| from PIL import Image | |
| import torch | |
| # Load the Florence model and processor | |
| def load_model(): | |
| model_id = 'microsoft/Florence-2-large' | |
| model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True).eval().to(torch.float32) | |
| processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True) | |
| return model, processor | |
| # Load the model and processor globally | |
| model, processor = load_model() | |
| # Function to run the model | |
| def run_example(task_prompt, image, text_input=None): | |
| if text_input is None: | |
| prompt = task_prompt | |
| else: | |
| prompt = task_prompt + text_input | |
| # Prepare inputs | |
| inputs = processor(text=prompt, images=image, return_tensors="pt") | |
| inputs["input_ids"] = inputs["input_ids"].to(torch.float32) | |
| inputs["pixel_values"] = inputs["pixel_values"].to(torch.float32) | |
| # Ensure the model is in float32 mode | |
| # The model has already been converted to float32 during loading, so this is not needed here. | |
| # Generate predictions | |
| generated_ids = model.generate( | |
| input_ids=inputs["input_ids"], | |
| pixel_values=inputs["pixel_values"], | |
| max_new_tokens=1024, | |
| early_stopping=False, | |
| do_sample=False, | |
| num_beams=3, | |
| ) | |
| generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0] | |
| parsed_answer = processor.post_process_generation( | |
| generated_text, | |
| task=task_prompt, | |
| image_size=(image.width, image.height) | |
| ) | |
| return parsed_answer | |
| # Streamlit UI | |
| st.title("Microsoft Florence Image Captioning (CPU)") | |
| # File uploader | |
| uploaded_file = st.file_uploader("Upload an image (PNG or JPG)", type=["png", "jpg", "jpeg"]) | |
| if uploaded_file is not None: | |
| # Convert and display the image | |
| image = Image.open(uploaded_file).convert("RGB") | |
| st.image(image, caption="Uploaded Image", use_column_width=True) | |
| # Generate captions | |
| st.subheader("Generated Captions") | |
| with st.spinner("Generating caption..."): | |
| try: | |
| caption = run_example('<CAPTION>', image) | |
| detailed_caption = run_example('<DETAILED_CAPTION>', image) | |
| more_detailed_caption = run_example('<MORE_DETAILED_CAPTION>', image) | |
| st.write("**Caption:**", caption) | |
| st.write("**Detailed Caption:**", detailed_caption) | |
| st.write("**More Detailed Caption:**", more_detailed_caption) | |
| # Option to save the output | |
| if st.button("Save Captions"): | |
| output_path = "captions.txt" | |
| with open(output_path, "w") as file: | |
| file.write(f"Caption: {caption}\n") | |
| file.write(f"Detailed Caption: {detailed_caption}\n") | |
| file.write(f"More Detailed Caption: {more_detailed_caption}\n") | |
| st.success(f"Captions saved to {output_path}!") | |
| except Exception as e: | |
| st.error(f"Error: {e}") | |