Spaces:

HarshitX
/

Multi_LLM_Image_Captioning

Sleeping

File size: 12,575 Bytes

8a8f3ed

from caption_history import CaptionHistory
from caption_generation import MultiModelCaptionGenerator
from caption_overlay import ImageCaptionOverlay

import io
import os

import cv2
import numpy as np
from PIL import Image
import streamlit as st
from dotenv import load_dotenv

load_dotenv()

openai_key = os.getenv("OPENAI_API_KEY_IC")
gemini_key = os.getenv("GEMINI_API_KEY_IC")
groq_key = os.getenv("GROQ_API_KEY_IC")

def main():
    st.set_page_config(
        page_title="Multi-Model Image Caption Generator",
        page_icon="🖼️",
        layout="wide"
    )
    
    st.title("🖼️ Multi-Model Image Caption Generator")
    st.markdown("Generate captions using OpenAI GPT-4V, Google Gemini, and GROQ Vision models")
    
    # Initialize session state
    if 'caption_history' not in st.session_state:
        st.session_state.caption_history = CaptionHistory()
    
    if 'caption_generator' not in st.session_state:
        st.session_state.caption_generator = MultiModelCaptionGenerator()
    
    # Sidebar for API configuration
    with st.sidebar: 
        st.header("🔑 API Configuration")
        
        # Show API status
        if openai_key:
            st.success("✅ OpenAI API Key loaded from .env")
        else:
            st.warning("⚠️ OpenAI API Key not found in .env")
            
        if gemini_key:
            st.success("✅ Gemini API Key loaded from .env")
        else:
            st.warning("⚠️ Gemini API Key not found in .env")
            
        if groq_key:
            st.success("✅ GROQ API Key loaded from .env")
        else:
            st.warning("⚠️ GROQ API Key not found in .env")
        
        if st.button("Configure APIs"):
            try:
                st.session_state.caption_generator.configure_apis(
                    openai_key=openai_key,
                    gemini_key=gemini_key,
                    groq_key=groq_key
                )
                st.success("APIs configured successfully!")
            except Exception as e:
                st.error(f"Error configuring APIs: {str(e)}")
        
        st.markdown("---")
        
        # Caption overlay settings
        st.header("🎨 Caption Settings")
        caption_method = st.selectbox(
            "Caption Method",
            ["Overlay on Image", "Background Behind Image"]
        )
        
        if caption_method == "Overlay on Image":
            position = st.selectbox("Position", ["bottom", "top", "center"])
            font_size = st.slider("Font Size", 0.5, 3.0, 1.0, 0.1)
            thickness = st.slider("Thickness", 1, 5, 2)
        else:
            bg_color = st.color_picker("Background Color", "#000000")
            text_color = st.color_picker("Text Color", "#FFFFFF")
            margin = st.slider("Margin", 20, 100, 50)
            
            # Optional: Custom font path
            custom_font = st.text_input(
                "Custom Font Path (optional)", 
                placeholder="e.g., fonts/Poppins-Regular.ttf"
            )
        
        st.markdown("---")
        
        # History management
        st.header("📝 Caption History")
        if st.button("View History"):
            st.session_state.show_history = True
        
        if st.button("Hide History"):
            st.session_state.show_history = False
        
        if st.button("Clear History"):
            st.session_state.caption_history.clear_history()
            st.success("History cleared!")
    
    # Main content area
    col1, col2 = st.columns([1, 1])
    
    with col1:
        st.header("📤 Upload Image")
        uploaded_file = st.file_uploader(
            "Choose an image...",
            type=['png', 'jpg', 'jpeg', 'bmp', 'tiff']
        )
        
        if uploaded_file is not None:
            # Display original image
            image = Image.open(uploaded_file)
            st.image(image, caption="Original Image", use_container_width=True)
            
            # Model selection
            st.header("🤖 Select Model")
            models = {
                "OpenAI GPT-4o": "openai",      # Updated model name
                "Google Gemini": "gemini",
                "GROQ Vision": "groq"
            }
            
            selected_model = st.selectbox("Choose a model", list(models.keys()))
            
            # Show model-specific info
            model_info = {
                "OpenAI GPT-4o": "Uses GPT-4o vision model for detailed image analysis",
                "Google Gemini": "Uses Gemini-1.5-flash for fast and accurate captions", 
                "GROQ Vision": "Uses Llama-3.2-11b-vision for high-speed processing"
            }
            st.info(model_info[selected_model])
            
            if st.button("Generate Caption", type="primary"):
                # Check if APIs are configured
                if not any([openai_key, gemini_key, groq_key]):
                    st.error("Please add API keys to your .env file and click 'Configure APIs'")
                    return
                
                try:
                    model_key = models[selected_model]
                    
                    # Check specific API availability
                    if model_key == "openai" and not openai_key:
                        st.error("OpenAI API key not available. Please add it to your .env file.")
                        return
                    elif model_key == "gemini" and not gemini_key:
                        st.error("Gemini API key not available. Please add it to your .env file.")
                        return
                    elif model_key == "groq" and not groq_key:
                        st.error("GROQ API key not available. Please add it to your .env file.")
                        return
                    
                    with st.spinner(f"Generating caption with {selected_model}..."):
                        if model_key == "openai":
                            caption = st.session_state.caption_generator.generate_caption_openai(image)
                        elif model_key == "gemini":
                            caption = st.session_state.caption_generator.generate_caption_gemini(image)
                        elif model_key == "groq":
                            caption = st.session_state.caption_generator.generate_caption_groq(image)
                    
                    st.session_state.current_caption = caption
                    st.session_state.current_image = image
                    st.session_state.current_model = selected_model
                    
                    # Add to history
                    st.session_state.caption_history.add_interaction(
                        uploaded_file.name,
                        selected_model,
                        caption
                    )
                    
                    st.success(f"Caption generated successfully with {selected_model}!")
                    
                except Exception as e:
                    st.error(f"Error generating caption: {str(e)}")
                    st.error("Please check your API keys and internet connection.")
    
    with col2:
        st.header("✨ Generated Caption & Preview")
        
        if hasattr(st.session_state, 'current_caption'):
            # Editable caption
            edited_caption = st.text_area(
                "Generated Caption (editable)",
                st.session_state.current_caption,
                height=100,
                help="You can edit the caption before applying it to the image"
            )
            
            # Update the caption if edited
            if edited_caption != st.session_state.current_caption:
                st.session_state.current_caption = edited_caption
            
            # Generate preview with caption
            if hasattr(st.session_state, 'current_image'):
                # Convert PIL to OpenCV format
                cv_image = cv2.cvtColor(np.array(st.session_state.current_image), cv2.COLOR_RGB2BGR)
                
                try:
                    if caption_method == "Overlay on Image":
                        result_image = ImageCaptionOverlay.add_caption_overlay(
                            cv_image,
                            st.session_state.current_caption,
                            position=position,
                            font_size=font_size,
                            thickness=thickness
                        )
                    else:
                        # Convert hex colors to RGB tuples
                        bg_rgb = tuple(int(bg_color[i:i+2], 16) for i in (1, 3, 5))
                        text_rgb = tuple(int(text_color[i:i+2], 16) for i in (1, 3, 5))
                        
                        # Use custom font if provided
                        font_path = custom_font if custom_font and os.path.exists(custom_font) else None
                        
                        result_image = ImageCaptionOverlay.add_caption_background(
                            cv_image,
                            st.session_state.current_caption,
                            font_path=font_path,
                            background_color=bg_rgb,
                            text_color=text_rgb,
                            margin=margin
                        )
                    
                    # Convert back to PIL for display
                    result_pil = Image.fromarray(cv2.cvtColor(result_image, cv2.COLOR_BGR2RGB))
                    st.image(result_pil, caption="Image with Caption", use_container_width=True)
                    
                    # Download button
                    img_buffer = io.BytesIO()
                    result_pil.save(img_buffer, format='PNG')
                    
                    st.download_button(
                        label="📥 Download Image with Caption",
                        data=img_buffer.getvalue(),
                        file_name=f"captioned_{uploaded_file.name if uploaded_file else 'image'}.png",
                        mime="image/png"
                    )
                    
                except Exception as e:
                    st.error(f"Error processing image: {str(e)}")
        else:
            st.info("👆 Upload an image and generate a caption to see the preview here")
    
    # History display
    if getattr(st.session_state, 'show_history', False):
        st.markdown("---")
        st.header("📋 Caption Generation History")
        
        history = st.session_state.caption_history.get_history()
        
        if history:
            # Add search/filter functionality
            search_term = st.text_input("🔍 Search history", placeholder="Search by image name or caption...")
            
            filtered_history = history
            if search_term:
                filtered_history = [
                    item for item in history 
                    if search_term.lower() in item['image_name'].lower() 
                    or search_term.lower() in item['caption'].lower()
                    or search_term.lower() in item['model'].lower()
                ]
            
            if filtered_history:
                for i, item in enumerate(reversed(filtered_history[-20:])):  # Show last 20 items
                    with st.expander(f"{item['timestamp'][:19]} - {item['image_name']} ({item['model']})"):
                        st.write(f"**Model:** {item['model']}")
                        st.write(f"**Image:** {item['image_name']}")
                        st.write(f"**Caption:** {item['caption']}")
                        st.write(f"**Timestamp:** {item['timestamp']}")
            else:
                st.info("No matching history found.")
        else:
            st.info("No caption history available.")
    
    # Footer
    st.markdown("---")
    st.markdown("""

    <div style='text-align: center'>

        <p>Built with Streamlit, LangChain, OpenCV, and multi-model AI APIs</p>

        <p>Supports OpenAI GPT-4o, Google Gemini, and GROQ Vision models</p>

        <p><small>Make sure to add your API keys to the .env file</small></p>

    </div>

    """, unsafe_allow_html=True)

if __name__ == "__main__":
    main()