Spaces:
Sleeping
Sleeping
File size: 3,023 Bytes
a7fae6f 54724a9 a7fae6f 54724a9 a7fae6f 54724a9 a7fae6f 54724a9 42c3b13 a7fae6f 42c3b13 54724a9 a7fae6f 54724a9 a7fae6f 42c3b13 a7fae6f 54724a9 a7fae6f e402baa a7fae6f 42c3b13 54724a9 42c3b13 54724a9 42c3b13 54724a9 42c3b13 54724a9 42c3b13 e402baa a7fae6f 54724a9 a7fae6f 42c3b13 a7fae6f e402baa a7fae6f e402baa 42c3b13 e402baa 54724a9 42c3b13 54724a9 e402baa 42c3b13 e402baa a7fae6f 54724a9 a7fae6f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 | import gradio as gr
from PIL import Image
from transformers import (
BlipProcessor,
BlipForConditionalGeneration,
AutoTokenizer,
AutoModelForSeq2SeqLM
)
import torch
# -------- Load Models -------- #
blip_processor = BlipProcessor.from_pretrained(
"Salesforce/blip-image-captioning-base"
)
blip_model = BlipForConditionalGeneration.from_pretrained(
"Salesforce/blip-image-captioning-base"
)
seo_tokenizer = AutoTokenizer.from_pretrained(
"google/flan-t5-base"
)
seo_model = AutoModelForSeq2SeqLM.from_pretrained(
"google/flan-t5-base"
)
# -------- Core Function -------- #
def generate_alt_text(image, seo_mode, keywords):
if image is None:
return ""
# ---- Step 1: Base Caption ---- #
inputs = blip_processor(image, return_tensors="pt")
with torch.no_grad():
output = blip_model.generate(
**inputs,
max_new_tokens=30
)
base_caption = blip_processor.decode(
output[0],
skip_special_tokens=True
).strip()
# ---- Step 2: Normal Mode ---- #
if not seo_mode:
return base_caption.capitalize() + "."
# ---- Step 3: SEO Prompt ---- #
keywords = keywords.strip()
keyword_instruction = (
f"Include the following keywords naturally: {keywords}. "
if keywords else ""
)
prompt = (
"You are an SEO expert. "
"Write a detailed, descriptive, and natural alt text for a website image. "
"The alt text should be longer than the original caption and written in a single sentence. "
f"{keyword_instruction}"
f"Image description: {base_caption}."
)
seo_inputs = seo_tokenizer(
prompt,
return_tensors="pt",
truncation=True
)
# ---- Step 4: Force Expansion ---- #
with torch.no_grad():
seo_output = seo_model.generate(
**seo_inputs,
max_new_tokens=120,
do_sample=True,
top_p=0.95,
temperature=1.0
)
seo_alt_text = seo_tokenizer.decode(
seo_output[0],
skip_special_tokens=True
).strip()
return seo_alt_text
# -------- Gradio UI -------- #
with gr.Blocks(title="Alt Text Generator") as demo:
gr.Markdown("""
# 🖼️ Alt Text Generator
AI-powered alt text for accessibility and SEO.
""")
image_input = gr.Image(
type="pil",
label="Upload Image"
)
seo_toggle = gr.Checkbox(
label="SEO Mode (expanded, keyword-aware alt text)",
value=False
)
keyword_input = gr.Textbox(
label="Keywords (optional)",
placeholder="e.g. science experiment for kids, STEM education"
)
alt_text_output = gr.Textbox(
label="Generated Alt Text",
lines=6
)
generate_btn = gr.Button("Generate Alt Text 🚀")
generate_btn.click(
fn=generate_alt_text,
inputs=[image_input, seo_toggle, keyword_input],
outputs=alt_text_output
)
demo.launch()
|