Spaces:

Inam65
/

Alt-text-Generator

Sleeping

File size: 3,023 Bytes

a7fae6f
 
54724a9
 
 
 
 
 
a7fae6f
 
54724a9
 
 
a7fae6f
 
54724a9
a7fae6f
 
 
54724a9
 
 
 
 
 
 
 
 
42c3b13
a7fae6f
 
 
42c3b13
54724a9
a7fae6f
 
54724a9
a7fae6f
42c3b13
a7fae6f
 
54724a9
a7fae6f
 
e402baa
a7fae6f
42c3b13
54724a9
 
 
42c3b13
 
 
 
 
 
 
54724a9
 
42c3b13
 
 
 
 
54724a9
 
 
 
 
 
 
 
42c3b13
54724a9
 
 
42c3b13
 
 
 
e402baa
a7fae6f
54724a9
 
 
 
 
 
 
 
a7fae6f
 
 
 
42c3b13
a7fae6f
 
e402baa
 
 
 
a7fae6f
e402baa
42c3b13
e402baa
 
 
54724a9
 
42c3b13
54724a9
 
e402baa
 
42c3b13
e402baa
a7fae6f
 
 
 
 
54724a9
a7fae6f

import gradio as gr
from PIL import Image
from transformers import (
    BlipProcessor,
    BlipForConditionalGeneration,
    AutoTokenizer,
    AutoModelForSeq2SeqLM
)
import torch

# -------- Load Models -------- #

blip_processor = BlipProcessor.from_pretrained(
    "Salesforce/blip-image-captioning-base"
)
blip_model = BlipForConditionalGeneration.from_pretrained(
    "Salesforce/blip-image-captioning-base"
)

seo_tokenizer = AutoTokenizer.from_pretrained(
    "google/flan-t5-base"
)
seo_model = AutoModelForSeq2SeqLM.from_pretrained(
    "google/flan-t5-base"
)

# -------- Core Function -------- #

def generate_alt_text(image, seo_mode, keywords):
    if image is None:
        return ""

    # ---- Step 1: Base Caption ---- #
    inputs = blip_processor(image, return_tensors="pt")

    with torch.no_grad():
        output = blip_model.generate(
            **inputs,
            max_new_tokens=30
        )

    base_caption = blip_processor.decode(
        output[0],
        skip_special_tokens=True
    ).strip()

    # ---- Step 2: Normal Mode ---- #
    if not seo_mode:
        return base_caption.capitalize() + "."

    # ---- Step 3: SEO Prompt ---- #
    keywords = keywords.strip()

    keyword_instruction = (
        f"Include the following keywords naturally: {keywords}. "
        if keywords else ""
    )

    prompt = (
        "You are an SEO expert. "
        "Write a detailed, descriptive, and natural alt text for a website image. "
        "The alt text should be longer than the original caption and written in a single sentence. "
        f"{keyword_instruction}"
        f"Image description: {base_caption}."
    )

    seo_inputs = seo_tokenizer(
        prompt,
        return_tensors="pt",
        truncation=True
    )

    # ---- Step 4: Force Expansion ---- #
    with torch.no_grad():
        seo_output = seo_model.generate(
            **seo_inputs,
            max_new_tokens=120,
            do_sample=True,
            top_p=0.95,
            temperature=1.0
        )

    seo_alt_text = seo_tokenizer.decode(
        seo_output[0],
        skip_special_tokens=True
    ).strip()

    return seo_alt_text

# -------- Gradio UI -------- #

with gr.Blocks(title="Alt Text Generator") as demo:
    gr.Markdown("""
    # 🖼️ Alt Text Generator
    AI-powered alt text for accessibility and SEO.
    """)

    image_input = gr.Image(
        type="pil",
        label="Upload Image"
    )

    seo_toggle = gr.Checkbox(
        label="SEO Mode (expanded, keyword-aware alt text)",
        value=False
    )

    keyword_input = gr.Textbox(
        label="Keywords (optional)",
        placeholder="e.g. science experiment for kids, STEM education"
    )

    alt_text_output = gr.Textbox(
        label="Generated Alt Text",
        lines=6
    )

    generate_btn = gr.Button("Generate Alt Text 🚀")

    generate_btn.click(
        fn=generate_alt_text,
        inputs=[image_input, seo_toggle, keyword_input],
        outputs=alt_text_output
    )

demo.launch()