import gradio as gr from PIL import Image from transformers import ( BlipProcessor, BlipForConditionalGeneration, AutoTokenizer, AutoModelForSeq2SeqLM ) import torch # -------- Load Models -------- # blip_processor = BlipProcessor.from_pretrained( "Salesforce/blip-image-captioning-base" ) blip_model = BlipForConditionalGeneration.from_pretrained( "Salesforce/blip-image-captioning-base" ) seo_tokenizer = AutoTokenizer.from_pretrained( "google/flan-t5-base" ) seo_model = AutoModelForSeq2SeqLM.from_pretrained( "google/flan-t5-base" ) # -------- Core Function -------- # def generate_alt_text(image, seo_mode, keywords): if image is None: return "" # ---- Step 1: Base Caption ---- # inputs = blip_processor(image, return_tensors="pt") with torch.no_grad(): output = blip_model.generate( **inputs, max_new_tokens=30 ) base_caption = blip_processor.decode( output[0], skip_special_tokens=True ).strip() # ---- Step 2: Normal Mode ---- # if not seo_mode: return base_caption.capitalize() + "." # ---- Step 3: SEO Prompt ---- # keywords = keywords.strip() keyword_instruction = ( f"Include the following keywords naturally: {keywords}. " if keywords else "" ) prompt = ( "You are an SEO expert. " "Write a detailed, descriptive, and natural alt text for a website image. " "The alt text should be longer than the original caption and written in a single sentence. " f"{keyword_instruction}" f"Image description: {base_caption}." ) seo_inputs = seo_tokenizer( prompt, return_tensors="pt", truncation=True ) # ---- Step 4: Force Expansion ---- # with torch.no_grad(): seo_output = seo_model.generate( **seo_inputs, max_new_tokens=120, do_sample=True, top_p=0.95, temperature=1.0 ) seo_alt_text = seo_tokenizer.decode( seo_output[0], skip_special_tokens=True ).strip() return seo_alt_text # -------- Gradio UI -------- # with gr.Blocks(title="Alt Text Generator") as demo: gr.Markdown(""" # 🖼️ Alt Text Generator AI-powered alt text for accessibility and SEO. """) image_input = gr.Image( type="pil", label="Upload Image" ) seo_toggle = gr.Checkbox( label="SEO Mode (expanded, keyword-aware alt text)", value=False ) keyword_input = gr.Textbox( label="Keywords (optional)", placeholder="e.g. science experiment for kids, STEM education" ) alt_text_output = gr.Textbox( label="Generated Alt Text", lines=6 ) generate_btn = gr.Button("Generate Alt Text 🚀") generate_btn.click( fn=generate_alt_text, inputs=[image_input, seo_toggle, keyword_input], outputs=alt_text_output ) demo.launch()