File size: 3,477 Bytes
5878807
 
 
ea9eb2b
 
 
 
 
 
 
 
17f24fa
5878807
 
 
 
 
 
 
ea9eb2b
5878807
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8d1ca0b
5878807
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import os
import gradio as gr

# ดึงค่า API Key จาก Hugging Face Secrets
hf_api_key = os.environ.get("HF_API_KEY")

# ตรวจสอบว่า API Key ถูกดึงออกมาหรือยัง
if hf_api_key:
    print(f"API Key Loaded: {hf_api_key}")
else:
    print("API Key not found. Please check your Hugging Face secrets.")
    
from haystack.components.generators.chat import HuggingFaceAPIChatGenerator
from haystack.components.builders.chat_prompt_builder import ChatPromptBuilder
from haystack.dataclasses import ChatMessage
from haystack import Pipeline
from haystack.utils import Secret
from image_captioner import ImageCaptioner


description = """
# Captionate 📸
### Create Instagram captions for your pics!
* Upload your photo or select one from the examples
* Choose your model
* ✨ Captionate! ✨ 
It uses [Salesforce/blip-image-captioning-base](https://huggingface.co/Salesforce/blip-image-captioning-base) model for image-to-text caption generation task.
For Instagrammable captions, try different text-to-text models to see how they react to the same prompt.
Built by [Bilge Yucel](https://twitter.com/bilgeycl) using [Haystack](https://github.com/deepset-ai/haystack) 💙
"""

prompt_template =[ChatMessage.from_user("""
You will receive a descriptive text of a photo.
Try to generate a nice Instagram caption with a phrase rhyming with the text. Include emojis in the caption.
Just return one option without alternatives. Don't use hashtags.           
                                                                                 
Descriptive text: {{caption}};
Instagram Caption:
""")]

hf_api_key = os.environ["HF_API_KEY"]

def generate_caption(image_file_path, model_name):
    image_to_text = ImageCaptioner(model_name="Salesforce/blip-image-captioning-base")
    prompt_builder = ChatPromptBuilder(template=prompt_template, required_variables="*")
    generator = HuggingFaceAPIChatGenerator(
        api_type="serverless_inference_api", 
        api_params={"model": model_name}, 
        token=Secret.from_token(hf_api_key))
    
    captioning_pipeline = Pipeline()
    captioning_pipeline.add_component("image_to_text", image_to_text)
    captioning_pipeline.add_component("prompt_builder", prompt_builder)
    captioning_pipeline.add_component("generator", generator)
    
    captioning_pipeline.connect("image_to_text.caption", "prompt_builder.caption")
    captioning_pipeline.connect("prompt_builder", "generator")

    result = captioning_pipeline.run({"image_to_text":{"image_file_path":image_file_path}})
    return result["generator"]["replies"][0].text

with gr.Blocks(theme="soft") as demo:
    gr.Markdown(value=description)
    with gr.Row():
        image = gr.Image(type="filepath")
        with gr.Column():
            model_name = gr.Dropdown(
                ["deepseek-ai/DeepSeek-V3.1-Terminus", "meta-llama/Llama-3.3-70B-Instruct", "openai/gpt-oss-20b", "Qwen/Qwen3-4B-Instruct-2507"], 
                value="deepseek-ai/DeepSeek-V3.1-Terminus", 
                label="Choose your model!"
                )
            gr.Examples(["./whale.png", "./rainbow.jpeg", "./selfie.png"], inputs=image, label="Click on any example") 
    submit_btn = gr.Button("✨ Captionate ✨")
    caption = gr.Textbox(label="Caption", show_copy_button=True)
    submit_btn.click(fn=generate_caption, inputs=[image, model_name], outputs=[caption])

if __name__ == "__main__":
    demo.launch()