| import gradio as gr | |
| from scripts.generate_image_caption import predict_caption | |
| from scripts.blip_model import generate_blip_caption | |
| def get_caption(image_path, model_choice): | |
| if model_choice == "BLIP": | |
| return generate_blip_caption(image_path) | |
| elif model_choice == "CNN_LSTM": | |
| greedy_caption, beam_caption = predict_caption(image_path) | |
| return f"Greedy Search: {greedy_caption}\nBeam Search: {beam_caption}" | |
| # Create the Gradio interface | |
| iface = gr.Interface( | |
| fn=get_caption, | |
| inputs=[ | |
| gr.Image(type="filepath", label="Upload Image"), | |
| gr.Dropdown( | |
| ["BLIP", "CNN_LSTM"], label="Choose Model", value="BLIP" | |
| ), | |
| ], | |
| outputs=gr.Textbox(label="Generated Caption"), | |
| title="Image Captioning with BLIP and CNN-LSTM", | |
| description="Upload an image and choose a model to generate a caption.", | |
| examples=[ | |
| ["examples/fight.jpg"], | |
| ["examples/101669240_b2d3e7f17b.jpg"], | |
| ], | |
| ) | |
| # Launch the interface | |
| if __name__ == "__main__": | |
| iface.launch() | |