Spaces:
Running
Running
File size: 3,568 Bytes
683a5b4 d3f61c4 ff9f4e9 1f447f3 ff9f4e9 1f0a8ac d3f61c4 ef8e1c9 1f0a8ac 1792c67 607f6c3 0680b7e 607f6c3 d3f61c4 1ba53ba 1792c67 1f0a8ac 359647d fa9c320 1ba53ba 1792c67 d3f61c4 33e5ff5 1ba53ba 1792c67 d3f61c4 1ba53ba b68a7f1 1f0a8ac 1792c67 d3f61c4 1ba53ba 1792c67 607f6c3 157edf2 0680b7e 607f6c3 3f77bf2 1f0a8ac 1f447f3 1f0a8ac 1792c67 1f0a8ac 5da3a6f 1f0a8ac 5da3a6f 1792c67 1ba53ba 1f0a8ac 5da3a6f b68a7f1 359647d 1f0a8ac 5da3a6f 1ba53ba d3f61c4 1ba53ba 1792c67 d3f61c4 607f6c3 1792c67 607f6c3 1432dc1 1ba53ba 1792c67 0d0471f 1792c67 1ba53ba d3f61c4 607f6c3 0d0471f 607f6c3 1f0a8ac d3f61c4 1ba53ba 56ce2bf 1792c67 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
# Caption Generator w/English-to-Spanish Translation
# A. Harper | ARIN 460 | December 2025
# Load into Hugging Face Space (using the Gradio Framework)
# Include requirements.txt file (list: gradio, pandas, torch, sentencepiece, tensorflow, Image, transformers)
# To run, navigate to the App tab. Click the red Generate button.
# The app will randomly select image, generate (English) caption,
# then generate Spanish translation.
# Import gradio - app framework
import gradio as gr
# Two image datasources are available.
# Minor adjustments (add/remove # to deactivate/activate) to switch between datasources.
# AA comments refer to images in the DataFrame / from Coco database
# BB comments refer to images stored in local Gradio app folder
# Import os and random to support random selection of image (from folder)
import os
import random
# Import pandas datasets, transformers, torch
import pandas as pd
from datasets import load_dataset
from transformers import (
BlipProcessor,
BlipForConditionalGeneration,
AutoTokenizer,
AutoModelForSeq2SeqLM,
MarianMTModel,
MarianTokenizer
)
from PIL import Image
import torch
# AA: Load dataset. Initial image source.
#Load dataset (henryscheible/coco_val2014_tiny)
dataset = load_dataset("henryscheible/coco_val2014_tiny", split="validation")
# Reduce dataset to 20 rows, i.e., get sample
samples = dataset.select(range(20))
#Convert to dataframe
df = pd.DataFrame(samples)
# BB: Direct to Photos folder
IMAGE_FOLDER = "Photos"
image_paths = [
os.path.join(IMAGE_FOLDER, f)
for f in os.listdir(IMAGE_FOLDER)
if f.lower().endswith((".jpg", ".jpeg", ".png"))
]
#Load the image captioning model (Salesforce/blip-image-captioning-large)
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
#Load transformer for translating captions from English to Spanish
model_name = "Helsinki-NLP/opus-mt-en-es"
trans_tokenizer = MarianTokenizer.from_pretrained(model_name)
trans_model = MarianMTModel.from_pretrained(model_name)
#Configure captioning function
def caption_random_image():
# AA: pick random row - from DF
##sample = df.sample(1).iloc[0]
# BB: Pick a random image path - image from folder
img_path = random.choice(image_paths)
# BB: Load into PIL - image from folder - image from folder
image = Image.open(img_path).convert("RGB")
# AA: Image - for DF
##image = sample["image"]
# Unconditional image captioning
inputs = processor(image, return_tensors="pt")
out = model.generate(**inputs)
caption_eng = processor.decode(out[0], skip_special_tokens=True)
# Translate caption from English to Spanish
trans_inputs = trans_tokenizer.encode(caption_eng, return_tensors="pt")
trans_out = trans_model.generate(trans_inputs)
caption_es = trans_tokenizer.decode(trans_out[0], skip_special_tokens=True)
return image, caption_eng, caption_es
demo = gr.Interface(
fn=caption_random_image,
inputs=None,
outputs=[
gr.Image(type="pil", label="Random Image"),
gr.Textbox(label="Caption (English)"),
gr.Textbox(label="Caption (Spanish)")
],
title="Image Captioning (with English to Spanish translation)",
description="Selects a random image (from either the local folder or henryscheible/coco data subset); generates a BLIP caption; then translates the (English) caption to Spanish."
)
demo.launch()
|