', unsafe_allow_html=True)
# Header with logo and title
col1, col2, col3 = st.columns([1, 2, 1])
with col2:
st.markdown('
', unsafe_allow_html=True)
st.markdown('🖼️✨', unsafe_allow_html=True)
st.markdown('
', unsafe_allow_html=True)
st.markdown('
Image Caption Studio
', unsafe_allow_html=True)
st.markdown('
Transform your images into beautiful captions using advanced AI. Upload any image and get short, technical, and human-friendly captions instantly!
', unsafe_allow_html=True)
# Load model (cached)
model, processor, device = load_model()
if model is None:
st.error("⚠️ Model failed to load. Please refresh the page or check your connection.")
return
# ============================
# SIDEBAR FOR SETTINGS
# ============================
with st.sidebar:
st.markdown("## ⚙️ Settings")
# Caption type selection
caption_type = st.radio(
"**Select Caption Style:**",
["🎯 All Three Styles", "📝 Short Only", "🔬 Technical Only", "😊 Human-Friendly Only"],
help="Choose which caption styles to generate"
)
# Advanced options expander
with st.expander("**⚙️ Advanced Options**", expanded=False):
st.markdown("### Word Limits")
short_limit = st.slider(
"**Short Caption Limit:**",
min_value=5,
max_value=25,
value=15,
help="Maximum words for short captions"
)
tech_limit = st.slider(
"**Technical Caption Limit:**",
min_value=15,
max_value=50,
value=35,
help="Maximum words for technical captions"
)
human_limit = st.slider(
"**Human-Friendly Limit:**",
min_value=15,
max_value=50,
value=25,
help="Maximum words for human-friendly captions"
)
# Performance info
st.markdown("---")
st.markdown("### 📊 System Info")
st.info(f"**Device:** {device.upper()}\n\n**Model:** Qwen2.5-VL-7B\n\n**Status:** Ready ✅")
# ============================
# MAIN CONTENT AREA
# ============================
# Create two columns for layout
col_left, col_right = st.columns([1, 1])
with col_left:
st.markdown("### 📤 Upload Your Image")
# File uploader
uploaded_file = st.file_uploader(
"Choose an image...",
type=['jpg', 'jpeg', 'png', 'bmp', 'tiff'],
help="Supported formats: JPG, JPEG, PNG, BMP, TIFF"
)
# Display uploaded image
if uploaded_file is not None:
try:
image = Image.open(uploaded_file)
# Resize for display
max_size = (500, 500)
image.thumbnail(max_size, Image.Resampling.LANCZOS)
st.markdown('
', unsafe_allow_html=True)
st.image(image, use_column_width=True)
st.markdown('
', unsafe_allow_html=True)
# Image info
st.success(f"✅ **{uploaded_file.name}** uploaded successfully!")
st.caption(f"**Size:** {image.size[0]}x{image.size[1]} pixels | **Format:** {image.format}")
except Exception as e:
st.error(f"Error loading image: {str(e)}")
image = None
else:
# Display placeholder
st.markdown('
', unsafe_allow_html=True)
st.image("https://via.placeholder.com/500x300/667eea/ffffff?text=Upload+an+Image",
use_column_width=True)
st.markdown('
', unsafe_allow_html=True)
st.info("👆 Upload an image to get started")
image = None
with col_right:
st.markdown("### 🎨 Caption Settings")
# Display current settings
if caption_type == "🎯 All Three Styles":
st.markdown("**Selected:** All caption styles")
cols = st.columns(3)
with cols[0]:
st.markdown('
Short
', unsafe_allow_html=True)
with cols[1]:
st.markdown('
Technical
', unsafe_allow_html=True)
with cols[2]:
st.markdown('
Human-Friendly
', unsafe_allow_html=True)
else:
st.markdown(f"**Selected:** {caption_type.split(' ')[1]}")
# Generate button
generate_btn = st.button(
"🚀 Generate Captions",
type="primary",
disabled=uploaded_file is None,
use_container_width=True
)
# ============================
# CAPTION GENERATION
# ============================
if generate_btn and uploaded_file is not None and image is not None:
try:
# Progress bar
progress_bar = st.progress(0)
status_text = st.empty()
# Generate captions based on selection
captions = {}
if caption_type in ["🎯 All Three Styles", "📝 Short Only"]:
status_text.text("🔍 Generating short caption...")
short_prompt = CaptionPrompts.get_short_caption_prompt(short_limit)
short_caption = generate_caption(model, processor, device, image, short_prompt, 50)
# Enforce word limit
short_words = short_caption.split()
if len(short_words) > short_limit:
short_caption = ' '.join(short_words[:short_limit]) + "..."
captions['short'] = short_caption
progress_bar.progress(33)
if caption_type in ["🎯 All Three Styles", "🔬 Technical Only"]:
status_text.text("🔬 Generating technical caption...")
tech_prompt = CaptionPrompts.get_technical_caption_prompt(tech_limit)
tech_caption = generate_caption(model, processor, device, image, tech_prompt, 100)
# Enforce word limit
tech_words = tech_caption.split()
if len(tech_words) > tech_limit:
tech_caption = ' '.join(tech_words[:tech_limit]) + "..."
captions['technical'] = tech_caption
progress_bar.progress(66 if caption_type == "🔬 Technical Only" else 66)
if caption_type in ["🎯 All Three Styles", "😊 Human-Friendly Only"]:
status_text.text("😊 Generating human-friendly caption...")
human_prompt = CaptionPrompts.get_human_friendly_caption_prompt(human_limit)
human_caption = generate_caption(model, processor, device, image, human_prompt, 100)
# Enforce word limit
human_words = human_caption.split()
if len(human_words) > human_limit:
human_caption = ' '.join(human_words[:human_limit]) + "..."
captions['human'] = human_caption
progress_bar.progress(100)
status_text.text("✅ Captions generated successfully!")
time.sleep(0.5)
progress_bar.empty()
status_text.empty()
# ============================
# DISPLAY RESULTS
# ============================
st.markdown("---")
st.markdown("## 📋 Generated Captions")
# Display appropriate cards
if 'short' in captions:
st.markdown('
', unsafe_allow_html=True)
st.markdown('
Short Caption
', unsafe_allow_html=True)
st.markdown(f'**{captions["short"]}**')
st.markdown(f'
📊 Words: {len(captions["short"].split())} / {short_limit}
', unsafe_allow_html=True)
st.markdown('
', unsafe_allow_html=True)
if 'technical' in captions:
st.markdown('
', unsafe_allow_html=True)
st.markdown('
Technical Caption
', unsafe_allow_html=True)
st.markdown(f'**{captions["technical"]}**')
st.markdown(f'
📊 Words: {len(captions["technical"].split())} / {tech_limit}
', unsafe_allow_html=True)
st.markdown('
', unsafe_allow_html=True)
if 'human' in captions:
st.markdown('
', unsafe_allow_html=True)
st.markdown('
Human-Friendly Caption
', unsafe_allow_html=True)
st.markdown(f'**{captions["human"]}**')
st.markdown(f'
📊 Words: {len(captions["human"].split())} / {human_limit}
', unsafe_allow_html=True)
st.markdown('
', unsafe_allow_html=True)
# Copy to clipboard button
if caption_type == "🎯 All Three Styles":
all_captions = f"Short: {captions.get('short', '')}\n\nTechnical: {captions.get('technical', '')}\n\nHuman-Friendly: {captions.get('human', '')}"
else:
all_captions = list(captions.values())[0]
st.download_button(
label="💾 Download All Captions",
data=all_captions,
file_name="captions.txt",
mime="text/plain",
use_container_width=True
)
# Success message
st.markdown('
✨ Captions generated successfully! You can copy them or download as text.
', unsafe_allow_html=True)
except Exception as e:
st.error(f"❌ Error generating captions: {str(e)}")
# ============================
# FEATURES SECTION
# ============================
st.markdown("---")
st.markdown("## ✨ Features")
features_cols = st.columns(3)
with features_cols[0]:
st.markdown("""
🎯 Multiple Styles
Short, technical, and human-friendly captions tailored to your needs
""", unsafe_allow_html=True)
with features_cols[1]:
st.markdown("""
⚡ Fast & Accurate
Powered by Qwen2.5-VL AI model for precise and quick results
""", unsafe_allow_html=True)
with features_cols[2]:
st.markdown("""
🎨 Customizable
Adjust word limits and choose specific caption styles
""", unsafe_allow_html=True)
# ============================
# FOOTER
# ============================
st.markdown("---")
st.markdown('', unsafe_allow_html=True)
st.markdown('
', unsafe_allow_html=True) # Close main container
# ============================
# RUN THE APP
# ============================
if __name__ == "__main__":
main()