File size: 2,556 Bytes
bd04394
 
 
 
 
6cb42a5
 
 
bd04394
 
 
 
 
 
 
6cb42a5
 
 
bd04394
 
6cb42a5
bd04394
 
 
 
 
 
 
 
6cb42a5
bd04394
 
6cb42a5
 
 
bd04394
6cb42a5
bd04394
 
 
6cb42a5
 
bd04394
 
 
 
 
 
 
6cb42a5
bd04394
 
 
6cb42a5
bd04394
 
 
6cb42a5
bd04394
 
 
6cb42a5
bd04394
6cb42a5
 
 
bd04394
 
6cb42a5
 
 
bd04394
 
6cb42a5
bd04394
 
6cb42a5
bd04394
 
6cb42a5
bd04394
6cb42a5
 
bd04394
 
 
 
6cb42a5
 
bd04394
6cb42a5
 
 
 
bd04394
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import gradio as gr
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration
from groq import Groq

# ==============================
# LOAD MODEL (ONCE)
# ==============================
processor = BlipProcessor.from_pretrained(
    "Salesforce/blip-image-captioning-base"
)
model = BlipForConditionalGeneration.from_pretrained(
    "Salesforce/blip-image-captioning-base"
)

# ==============================
# CORE FUNCTION
# ==============================
def generate_caption(api_key, image, style):
    try:
        if not api_key or api_key.strip() == "":
            return "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"

        if image is None:
            return "❌ Please upload an image."

        if image.mode != "RGB":
            image = image.convert("RGB")

        # Image β†’ basic caption
        inputs = processor(image, return_tensors="pt")
        output = model.generate(**inputs, max_new_tokens=30)
        basic_caption = processor.decode(
            output[0], skip_special_tokens=True
        )

        # Groq refinement
        client = Groq(api_key=api_key)

        prompt = f"""
Rewrite the following image caption in a {style.lower()} style.
Keep it short (1–2 lines).

Caption:
"{basic_caption}"
"""

        response = client.chat.completions.create(
            model="llama-3.3-70b-versatile",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.7,
        )

        refined = response.choices[0].message.content

        return (
            f"πŸ–ΌοΈ **Basic Caption:** {basic_caption}\n\n"
            f"✨ **AI Refined Caption ({style}):**\n{refined}"
        )

    except Exception as e:
        return f"❌ Error:\n{str(e)}"

# ==============================
# UI
# ==============================
with gr.Blocks(theme=gr.themes.Soft()) as app:
    gr.Markdown("""
    # πŸ–ΌοΈ Image Caption Generator  
    Image β†’ Caption β†’ Groq AI ✨  
    Hugging Face Deployment
    """)

    api_key = gr.Textbox(
        label="πŸ”‘ Groq API Key",
        type="password",
        placeholder="Paste your Groq API key here"
    )

    image = gr.Image(type="pil", label="πŸ“· Upload Image")

    style = gr.Dropdown(
        ["Normal", "Creative", "Fun / Gen-Z"],
        value="Normal",
        label="🎨 Caption Style"
    )

    btn = gr.Button("πŸš€ Generate Caption")
    output = gr.Markdown()

    btn.click(
        generate_caption,
        inputs=[api_key, image, style],
        outputs=output
    )

app.launch()