File size: 2,854 Bytes
edf3f45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import gradio as gr
from transformers import BlipProcessor, BlipForConditionalGeneration
import torch
from PIL import Image
from deep_translator import GoogleTranslator
from gradio.themes import Base

# Load BLIP model and processor
caption_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
caption_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")

# Translator
translator = GoogleTranslator(source='en', target='hi')

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
caption_model.to(device)

def generate_caption(image):
    try:
        # Preprocess the image
        inputs = caption_processor(images=image, return_tensors="pt")
        pixel_values = inputs.pixel_values.to(device)
        
        # Generate caption
        output_ids = caption_model.generate(
            pixel_values,
            max_length=30,
            num_beams=10,
            num_beam_groups=2,
            diversity_penalty=0.5,
            repetition_penalty=2.0,
            temperature=0.6,
            top_k=50,
            top_p=0.95,
            no_repeat_ngram_size=3
        )
        
        # Decode English caption
        english_caption = caption_processor.decode(output_ids[0], skip_special_tokens=True)
        
        # Translate to Hindi
        hindi_caption = translator.translate(english_caption)
        
        return english_caption, hindi_caption
    
    except Exception as e:
        return f"Error: {str(e)}", f"Error: {str(e)}"

# Custom theme with a blue and white color scheme
custom_theme = gr.themes.Default(
    primary_hue="blue",         # Main color (buttons, highlights)
    secondary_hue="gray",       # Secondary elements
    neutral_hue="slate",        # Backgrounds, borders
    text_size="lg",             # Larger text for readability
    radius_size="md",           # Rounded corners
    font=[gr.themes.GoogleFont("Roboto"), "sans-serif"]  # Modern font
)

# Gradio interface with improved visuals
interface = gr.Interface(
    fn=generate_caption,
    inputs=gr.Image(type="pil", label="Upload an Image"),
    outputs=[
        gr.Textbox(label="English Caption", lines=2, placeholder="English caption will appear here..."),
        gr.Textbox(label="Hindi Caption", lines=2, placeholder="हिंदी कैप्शन यहाँ दिखाई देगा...")
    ],
    title="Image Caption Generator (English & Hindi)",
    description="Upload an image to generate captions in English and Hindi with a sleek, modern interface.",
    theme=custom_theme,
    css="""
        .gradio-container { max-width: 800px; margin: auto; }
        h1 { text-align: center; color: #1E40AF; }
        .label { font-weight: bold; }
        input, output { border-radius: 8px; }
    """
)

interface.launch()