from caption_history import CaptionHistory from caption_generation import MultiModelCaptionGenerator from caption_overlay import ImageCaptionOverlay import io import os import cv2 import numpy as np from PIL import Image import streamlit as st from dotenv import load_dotenv load_dotenv() openai_key = os.getenv("OPENAI_API_KEY_IC") gemini_key = os.getenv("GEMINI_API_KEY_IC") groq_key = os.getenv("GROQ_API_KEY_IC") def main(): st.set_page_config( page_title="Multi-Model Image Caption Generator", page_icon="🖼️", layout="wide" ) st.title("🖼️ Multi-Model Image Caption Generator") st.markdown("Generate captions using OpenAI GPT-4V, Google Gemini, and GROQ Vision models") # Initialize session state if 'caption_history' not in st.session_state: st.session_state.caption_history = CaptionHistory() if 'caption_generator' not in st.session_state: st.session_state.caption_generator = MultiModelCaptionGenerator() # Sidebar for API configuration with st.sidebar: st.header("🔑 API Configuration") # Show API status if openai_key: st.success("✅ OpenAI API Key loaded from .env") else: st.warning("⚠️ OpenAI API Key not found in .env") if gemini_key: st.success("✅ Gemini API Key loaded from .env") else: st.warning("⚠️ Gemini API Key not found in .env") if groq_key: st.success("✅ GROQ API Key loaded from .env") else: st.warning("⚠️ GROQ API Key not found in .env") if st.button("Configure APIs"): try: st.session_state.caption_generator.configure_apis( openai_key=openai_key, gemini_key=gemini_key, groq_key=groq_key ) st.success("APIs configured successfully!") except Exception as e: st.error(f"Error configuring APIs: {str(e)}") st.markdown("---") # Caption overlay settings st.header("🎨 Caption Settings") caption_method = st.selectbox( "Caption Method", ["Overlay on Image", "Background Behind Image"] ) if caption_method == "Overlay on Image": position = st.selectbox("Position", ["bottom", "top", "center"]) font_size = st.slider("Font Size", 0.5, 3.0, 1.0, 0.1) thickness = st.slider("Thickness", 1, 5, 2) else: bg_color = st.color_picker("Background Color", "#000000") text_color = st.color_picker("Text Color", "#FFFFFF") margin = st.slider("Margin", 20, 100, 50) # Optional: Custom font path custom_font = st.text_input( "Custom Font Path (optional)", placeholder="e.g., fonts/Poppins-Regular.ttf" ) st.markdown("---") # History management st.header("📝 Caption History") if st.button("View History"): st.session_state.show_history = True if st.button("Hide History"): st.session_state.show_history = False if st.button("Clear History"): st.session_state.caption_history.clear_history() st.success("History cleared!") # Main content area col1, col2 = st.columns([1, 1]) with col1: st.header("📤 Upload Image") uploaded_file = st.file_uploader( "Choose an image...", type=['png', 'jpg', 'jpeg', 'bmp', 'tiff'] ) if uploaded_file is not None: # Display original image image = Image.open(uploaded_file) st.image(image, caption="Original Image", use_container_width=True) # Model selection st.header("🤖 Select Model") models = { "OpenAI GPT-4o": "openai", # Updated model name "Google Gemini": "gemini", "GROQ Vision": "groq" } selected_model = st.selectbox("Choose a model", list(models.keys())) # Show model-specific info model_info = { "OpenAI GPT-4o": "Uses GPT-4o vision model for detailed image analysis", "Google Gemini": "Uses Gemini-1.5-flash for fast and accurate captions", "GROQ Vision": "Uses Llama-3.2-11b-vision for high-speed processing" } st.info(model_info[selected_model]) if st.button("Generate Caption", type="primary"): # Check if APIs are configured if not any([openai_key, gemini_key, groq_key]): st.error("Please add API keys to your .env file and click 'Configure APIs'") return try: model_key = models[selected_model] # Check specific API availability if model_key == "openai" and not openai_key: st.error("OpenAI API key not available. Please add it to your .env file.") return elif model_key == "gemini" and not gemini_key: st.error("Gemini API key not available. Please add it to your .env file.") return elif model_key == "groq" and not groq_key: st.error("GROQ API key not available. Please add it to your .env file.") return with st.spinner(f"Generating caption with {selected_model}..."): if model_key == "openai": caption = st.session_state.caption_generator.generate_caption_openai(image) elif model_key == "gemini": caption = st.session_state.caption_generator.generate_caption_gemini(image) elif model_key == "groq": caption = st.session_state.caption_generator.generate_caption_groq(image) st.session_state.current_caption = caption st.session_state.current_image = image st.session_state.current_model = selected_model # Add to history st.session_state.caption_history.add_interaction( uploaded_file.name, selected_model, caption ) st.success(f"Caption generated successfully with {selected_model}!") except Exception as e: st.error(f"Error generating caption: {str(e)}") st.error("Please check your API keys and internet connection.") with col2: st.header("✨ Generated Caption & Preview") if hasattr(st.session_state, 'current_caption'): # Editable caption edited_caption = st.text_area( "Generated Caption (editable)", st.session_state.current_caption, height=100, help="You can edit the caption before applying it to the image" ) # Update the caption if edited if edited_caption != st.session_state.current_caption: st.session_state.current_caption = edited_caption # Generate preview with caption if hasattr(st.session_state, 'current_image'): # Convert PIL to OpenCV format cv_image = cv2.cvtColor(np.array(st.session_state.current_image), cv2.COLOR_RGB2BGR) try: if caption_method == "Overlay on Image": result_image = ImageCaptionOverlay.add_caption_overlay( cv_image, st.session_state.current_caption, position=position, font_size=font_size, thickness=thickness ) else: # Convert hex colors to RGB tuples bg_rgb = tuple(int(bg_color[i:i+2], 16) for i in (1, 3, 5)) text_rgb = tuple(int(text_color[i:i+2], 16) for i in (1, 3, 5)) # Use custom font if provided font_path = custom_font if custom_font and os.path.exists(custom_font) else None result_image = ImageCaptionOverlay.add_caption_background( cv_image, st.session_state.current_caption, font_path=font_path, background_color=bg_rgb, text_color=text_rgb, margin=margin ) # Convert back to PIL for display result_pil = Image.fromarray(cv2.cvtColor(result_image, cv2.COLOR_BGR2RGB)) st.image(result_pil, caption="Image with Caption", use_container_width=True) # Download button img_buffer = io.BytesIO() result_pil.save(img_buffer, format='PNG') st.download_button( label="📥 Download Image with Caption", data=img_buffer.getvalue(), file_name=f"captioned_{uploaded_file.name if uploaded_file else 'image'}.png", mime="image/png" ) except Exception as e: st.error(f"Error processing image: {str(e)}") else: st.info("👆 Upload an image and generate a caption to see the preview here") # History display if getattr(st.session_state, 'show_history', False): st.markdown("---") st.header("📋 Caption Generation History") history = st.session_state.caption_history.get_history() if history: # Add search/filter functionality search_term = st.text_input("🔍 Search history", placeholder="Search by image name or caption...") filtered_history = history if search_term: filtered_history = [ item for item in history if search_term.lower() in item['image_name'].lower() or search_term.lower() in item['caption'].lower() or search_term.lower() in item['model'].lower() ] if filtered_history: for i, item in enumerate(reversed(filtered_history[-20:])): # Show last 20 items with st.expander(f"{item['timestamp'][:19]} - {item['image_name']} ({item['model']})"): st.write(f"**Model:** {item['model']}") st.write(f"**Image:** {item['image_name']}") st.write(f"**Caption:** {item['caption']}") st.write(f"**Timestamp:** {item['timestamp']}") else: st.info("No matching history found.") else: st.info("No caption history available.") # Footer st.markdown("---") st.markdown("""

Built with Streamlit, LangChain, OpenCV, and multi-model AI APIs

Supports OpenAI GPT-4o, Google Gemini, and GROQ Vision models

Make sure to add your API keys to the .env file

""", unsafe_allow_html=True) if __name__ == "__main__": main()