Spaces:
Sleeping
Sleeping
| from caption_history import CaptionHistory | |
| from caption_generation import MultiModelCaptionGenerator | |
| from caption_overlay import ImageCaptionOverlay | |
| import io | |
| import os | |
| import cv2 | |
| import numpy as np | |
| from PIL import Image | |
| import streamlit as st | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| openai_key = os.getenv("OPENAI_API_KEY_IC") | |
| gemini_key = os.getenv("GEMINI_API_KEY_IC") | |
| groq_key = os.getenv("GROQ_API_KEY_IC") | |
| def main(): | |
| st.set_page_config( | |
| page_title="Multi-Model Image Caption Generator", | |
| page_icon="πΌοΈ", | |
| layout="wide" | |
| ) | |
| st.title("πΌοΈ Multi-Model Image Caption Generator") | |
| st.markdown("Generate captions using OpenAI GPT-4V, Google Gemini, and GROQ Vision models") | |
| # Initialize session state | |
| if 'caption_history' not in st.session_state: | |
| st.session_state.caption_history = CaptionHistory() | |
| if 'caption_generator' not in st.session_state: | |
| st.session_state.caption_generator = MultiModelCaptionGenerator() | |
| # Sidebar for API configuration | |
| with st.sidebar: | |
| st.header("π API Configuration") | |
| # Show API status | |
| if openai_key: | |
| st.success("β OpenAI API Key loaded from .env") | |
| else: | |
| st.warning("β οΈ OpenAI API Key not found in .env") | |
| if gemini_key: | |
| st.success("β Gemini API Key loaded from .env") | |
| else: | |
| st.warning("β οΈ Gemini API Key not found in .env") | |
| if groq_key: | |
| st.success("β GROQ API Key loaded from .env") | |
| else: | |
| st.warning("β οΈ GROQ API Key not found in .env") | |
| if st.button("Configure APIs"): | |
| try: | |
| st.session_state.caption_generator.configure_apis( | |
| openai_key=openai_key, | |
| gemini_key=gemini_key, | |
| groq_key=groq_key | |
| ) | |
| st.success("APIs configured successfully!") | |
| except Exception as e: | |
| st.error(f"Error configuring APIs: {str(e)}") | |
| st.markdown("---") | |
| # Caption overlay settings | |
| st.header("π¨ Caption Settings") | |
| caption_method = st.selectbox( | |
| "Caption Method", | |
| ["Overlay on Image", "Background Behind Image"] | |
| ) | |
| if caption_method == "Overlay on Image": | |
| position = st.selectbox("Position", ["bottom", "top", "center"]) | |
| font_size = st.slider("Font Size", 0.5, 3.0, 1.0, 0.1) | |
| thickness = st.slider("Thickness", 1, 5, 2) | |
| else: | |
| bg_color = st.color_picker("Background Color", "#000000") | |
| text_color = st.color_picker("Text Color", "#FFFFFF") | |
| margin = st.slider("Margin", 20, 100, 50) | |
| # Optional: Custom font path | |
| custom_font = st.text_input( | |
| "Custom Font Path (optional)", | |
| placeholder="e.g., fonts/Poppins-Regular.ttf" | |
| ) | |
| st.markdown("---") | |
| # History management | |
| st.header("π Caption History") | |
| if st.button("View History"): | |
| st.session_state.show_history = True | |
| if st.button("Hide History"): | |
| st.session_state.show_history = False | |
| if st.button("Clear History"): | |
| st.session_state.caption_history.clear_history() | |
| st.success("History cleared!") | |
| # Main content area | |
| col1, col2 = st.columns([1, 1]) | |
| with col1: | |
| st.header("π€ Upload Image") | |
| uploaded_file = st.file_uploader( | |
| "Choose an image...", | |
| type=['png', 'jpg', 'jpeg', 'bmp', 'tiff'] | |
| ) | |
| if uploaded_file is not None: | |
| # Display original image | |
| image = Image.open(uploaded_file) | |
| st.image(image, caption="Original Image", use_container_width=True) | |
| # Model selection | |
| st.header("π€ Select Model") | |
| models = { | |
| "OpenAI GPT-4o": "openai", # Updated model name | |
| "Google Gemini": "gemini", | |
| "GROQ Vision": "groq" | |
| } | |
| selected_model = st.selectbox("Choose a model", list(models.keys())) | |
| # Show model-specific info | |
| model_info = { | |
| "OpenAI GPT-4o": "Uses GPT-4o vision model for detailed image analysis", | |
| "Google Gemini": "Uses Gemini-1.5-flash for fast and accurate captions", | |
| "GROQ Vision": "Uses Llama-3.2-11b-vision for high-speed processing" | |
| } | |
| st.info(model_info[selected_model]) | |
| if st.button("Generate Caption", type="primary"): | |
| # Check if APIs are configured | |
| if not any([openai_key, gemini_key, groq_key]): | |
| st.error("Please add API keys to your .env file and click 'Configure APIs'") | |
| return | |
| try: | |
| model_key = models[selected_model] | |
| # Check specific API availability | |
| if model_key == "openai" and not openai_key: | |
| st.error("OpenAI API key not available. Please add it to your .env file.") | |
| return | |
| elif model_key == "gemini" and not gemini_key: | |
| st.error("Gemini API key not available. Please add it to your .env file.") | |
| return | |
| elif model_key == "groq" and not groq_key: | |
| st.error("GROQ API key not available. Please add it to your .env file.") | |
| return | |
| with st.spinner(f"Generating caption with {selected_model}..."): | |
| if model_key == "openai": | |
| caption = st.session_state.caption_generator.generate_caption_openai(image) | |
| elif model_key == "gemini": | |
| caption = st.session_state.caption_generator.generate_caption_gemini(image) | |
| elif model_key == "groq": | |
| caption = st.session_state.caption_generator.generate_caption_groq(image) | |
| st.session_state.current_caption = caption | |
| st.session_state.current_image = image | |
| st.session_state.current_model = selected_model | |
| # Add to history | |
| st.session_state.caption_history.add_interaction( | |
| uploaded_file.name, | |
| selected_model, | |
| caption | |
| ) | |
| st.success(f"Caption generated successfully with {selected_model}!") | |
| except Exception as e: | |
| st.error(f"Error generating caption: {str(e)}") | |
| st.error("Please check your API keys and internet connection.") | |
| with col2: | |
| st.header("β¨ Generated Caption & Preview") | |
| if hasattr(st.session_state, 'current_caption'): | |
| # Editable caption | |
| edited_caption = st.text_area( | |
| "Generated Caption (editable)", | |
| st.session_state.current_caption, | |
| height=100, | |
| help="You can edit the caption before applying it to the image" | |
| ) | |
| # Update the caption if edited | |
| if edited_caption != st.session_state.current_caption: | |
| st.session_state.current_caption = edited_caption | |
| # Generate preview with caption | |
| if hasattr(st.session_state, 'current_image'): | |
| # Convert PIL to OpenCV format | |
| cv_image = cv2.cvtColor(np.array(st.session_state.current_image), cv2.COLOR_RGB2BGR) | |
| try: | |
| if caption_method == "Overlay on Image": | |
| result_image = ImageCaptionOverlay.add_caption_overlay( | |
| cv_image, | |
| st.session_state.current_caption, | |
| position=position, | |
| font_size=font_size, | |
| thickness=thickness | |
| ) | |
| else: | |
| # Convert hex colors to RGB tuples | |
| bg_rgb = tuple(int(bg_color[i:i+2], 16) for i in (1, 3, 5)) | |
| text_rgb = tuple(int(text_color[i:i+2], 16) for i in (1, 3, 5)) | |
| # Use custom font if provided | |
| font_path = custom_font if custom_font and os.path.exists(custom_font) else None | |
| result_image = ImageCaptionOverlay.add_caption_background( | |
| cv_image, | |
| st.session_state.current_caption, | |
| font_path=font_path, | |
| background_color=bg_rgb, | |
| text_color=text_rgb, | |
| margin=margin | |
| ) | |
| # Convert back to PIL for display | |
| result_pil = Image.fromarray(cv2.cvtColor(result_image, cv2.COLOR_BGR2RGB)) | |
| st.image(result_pil, caption="Image with Caption", use_container_width=True) | |
| # Download button | |
| img_buffer = io.BytesIO() | |
| result_pil.save(img_buffer, format='PNG') | |
| st.download_button( | |
| label="π₯ Download Image with Caption", | |
| data=img_buffer.getvalue(), | |
| file_name=f"captioned_{uploaded_file.name if uploaded_file else 'image'}.png", | |
| mime="image/png" | |
| ) | |
| except Exception as e: | |
| st.error(f"Error processing image: {str(e)}") | |
| else: | |
| st.info("π Upload an image and generate a caption to see the preview here") | |
| # History display | |
| if getattr(st.session_state, 'show_history', False): | |
| st.markdown("---") | |
| st.header("π Caption Generation History") | |
| history = st.session_state.caption_history.get_history() | |
| if history: | |
| # Add search/filter functionality | |
| search_term = st.text_input("π Search history", placeholder="Search by image name or caption...") | |
| filtered_history = history | |
| if search_term: | |
| filtered_history = [ | |
| item for item in history | |
| if search_term.lower() in item['image_name'].lower() | |
| or search_term.lower() in item['caption'].lower() | |
| or search_term.lower() in item['model'].lower() | |
| ] | |
| if filtered_history: | |
| for i, item in enumerate(reversed(filtered_history[-20:])): # Show last 20 items | |
| with st.expander(f"{item['timestamp'][:19]} - {item['image_name']} ({item['model']})"): | |
| st.write(f"**Model:** {item['model']}") | |
| st.write(f"**Image:** {item['image_name']}") | |
| st.write(f"**Caption:** {item['caption']}") | |
| st.write(f"**Timestamp:** {item['timestamp']}") | |
| else: | |
| st.info("No matching history found.") | |
| else: | |
| st.info("No caption history available.") | |
| # Footer | |
| st.markdown("---") | |
| st.markdown(""" | |
| <div style='text-align: center'> | |
| <p>Built with Streamlit, LangChain, OpenCV, and multi-model AI APIs</p> | |
| <p>Supports OpenAI GPT-4o, Google Gemini, and GROQ Vision models</p> | |
| <p><small>Make sure to add your API keys to the .env file</small></p> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| if __name__ == "__main__": | |
| main() |