Spaces:
Sleeping
Sleeping
| """ | |
| VISUAL CONVERSATIONAL INTELLIGENCE ENGINE | |
| ========================================== | |
| A pluggable, image-grounded multi-turn conversational system. | |
| Architecture: | |
| - Session-based image memory (stored once, queried multiple times) | |
| - Vision-Language Model (BLIP) for image-question answering | |
| - REST-style core logic (pure functions) | |
| - Gradio UI for demonstration | |
| Academic Purpose: | |
| Demonstrates AI system design for visual question answering with | |
| conversational context, suitable for research evaluation. | |
| """ | |
| import gradio as gr | |
| from PIL import Image | |
| from transformers import BlipProcessor, BlipForQuestionAnswering | |
| import torch | |
| from typing import Optional, Tuple, List | |
| import uuid | |
| import re | |
| # ============================================================================ | |
| # SESSION MEMORY MANAGEMENT | |
| # ============================================================================ | |
| class SessionMemory: | |
| """ | |
| Manages session state for image-grounded conversations. | |
| Each session stores: | |
| - uploaded_image: PIL Image object | |
| - conversation_history: List of (question, answer) tuples | |
| - session_id: Unique identifier for the session | |
| """ | |
| def __init__(self): | |
| self.sessions = {} | |
| def create_session(self) -> str: | |
| """Create a new session and return its ID.""" | |
| session_id = str(uuid.uuid4()) | |
| self.sessions[session_id] = { | |
| 'uploaded_image': None, | |
| 'conversation_history': [] | |
| } | |
| return session_id | |
| def store_image(self, session_id: str, image: Image.Image) -> None: | |
| """Store an image in session memory.""" | |
| if session_id in self.sessions: | |
| self.sessions[session_id]['uploaded_image'] = image | |
| def get_image(self, session_id: str) -> Optional[Image.Image]: | |
| """Retrieve the stored image from session.""" | |
| if session_id in self.sessions: | |
| return self.sessions[session_id]['uploaded_image'] | |
| return None | |
| def add_to_history(self, session_id: str, question: str, answer: str) -> None: | |
| """Add a Q&A pair to conversation history.""" | |
| if session_id in self.sessions: | |
| self.sessions[session_id]['conversation_history'].append((question, answer)) | |
| def get_history(self, session_id: str) -> List[Tuple[str, str]]: | |
| """Retrieve conversation history.""" | |
| if session_id in self.sessions: | |
| return self.sessions[session_id]['conversation_history'] | |
| return [] | |
| def reset_session(self, session_id: str) -> None: | |
| """Clear all session data (image + conversation history).""" | |
| if session_id in self.sessions: | |
| self.sessions[session_id] = { | |
| 'uploaded_image': None, | |
| 'conversation_history': [] | |
| } | |
| # ============================================================================ | |
| # VISION-LANGUAGE MODEL INITIALIZATION | |
| # ============================================================================ | |
| class VisualQAEngine: | |
| """ | |
| Core inference engine using BLIP (Bootstrapping Language-Image Pre-training). | |
| BLIP is a vision-language model that can answer questions about images. | |
| We use the pretrained model without any fine-tuning. | |
| """ | |
| def __init__(self, model_name: str = "Salesforce/blip-vqa-base"): | |
| """ | |
| Initialize the BLIP model and processor. | |
| Args: | |
| model_name: HuggingFace model identifier | |
| """ | |
| print(f"Loading model: {model_name}") | |
| self.processor = BlipProcessor.from_pretrained(model_name) | |
| self.model = BlipForQuestionAnswering.from_pretrained(model_name) | |
| # Use GPU if available, otherwise CPU (for HuggingFace Spaces compatibility) | |
| self.device = "cuda" if torch.cuda.is_available() else "cpu" | |
| self.model.to(self.device) | |
| print(f"Model loaded on device: {self.device}") | |
| def answer_question(self, image: Image.Image, question: str) -> str: | |
| """ | |
| Generate an answer to a question about the image. | |
| This is a PURE FUNCTION suitable for REST APIs: | |
| - Takes image + question as input | |
| - Returns answer as output | |
| - No side effects | |
| Args: | |
| image: PIL Image object | |
| question: User's question about the image | |
| Returns: | |
| Generated answer grounded in the image | |
| """ | |
| # Preprocess image and question | |
| inputs = self.processor(image, question, return_tensors="pt").to(self.device) | |
| # Generate answer using the vision-language model | |
| with torch.no_grad(): | |
| outputs = self.model.generate(**inputs, max_length=50) | |
| # Decode the generated answer | |
| answer = self.processor.decode(outputs[0], skip_special_tokens=True) | |
| return answer | |
| # ============================================================================ | |
| # APPLICATION LOGIC (REST-STYLE PURE FUNCTIONS) | |
| # ============================================================================ | |
| def validate_question(question: str, image: Optional[Image.Image]) -> Tuple[bool, str]: | |
| """ | |
| Validate that conditions are met for answering a question. | |
| Validation rules: | |
| 1. Image must be uploaded | |
| 2. Question must not be empty | |
| Args: | |
| question: User's input question | |
| image: Stored image (or None) | |
| Returns: | |
| (is_valid, error_message) | |
| """ | |
| if image is None: | |
| return False, "⚠️ Please upload an image first before asking questions." | |
| if not question or question.strip() == "": | |
| return False, "⚠️ Please enter a question." | |
| return True, "" | |
| def process_question( | |
| vqa_engine: VisualQAEngine, | |
| session_memory: SessionMemory, | |
| session_id: str, | |
| question: str | |
| ) -> Tuple[str, List[Tuple[str, str]]]: | |
| """ | |
| Process a user question and generate an image-grounded answer. | |
| This function orchestrates the core conversational flow: | |
| 1. Validate inputs | |
| 2. Retrieve image from session | |
| 3. Generate answer using vision-language model | |
| 4. Update conversation history | |
| 5. Return answer + updated history | |
| Args: | |
| vqa_engine: Visual QA inference engine | |
| session_memory: Session storage | |
| session_id: Current session identifier | |
| question: User's question | |
| Returns: | |
| (answer, updated_conversation_history) | |
| """ | |
| # Retrieve stored image | |
| image = session_memory.get_image(session_id) | |
| # Validate inputs | |
| is_valid, error_msg = validate_question(question, image) | |
| if not is_valid: | |
| return error_msg, session_memory.get_history(session_id) | |
| # Generate image-grounded answer | |
| answer = vqa_engine.answer_question(image, question) | |
| # Update conversation history | |
| session_memory.add_to_history(session_id, question, answer) | |
| # Return answer and updated history | |
| return answer, session_memory.get_history(session_id) | |
| def handle_image_upload( | |
| session_memory: SessionMemory, | |
| session_id: str, | |
| image: Image.Image | |
| ) -> str: | |
| """ | |
| Handle image upload and store in session memory. | |
| Args: | |
| session_memory: Session storage | |
| session_id: Current session identifier | |
| image: Uploaded PIL Image | |
| Returns: | |
| Confirmation message | |
| """ | |
| if image is None: | |
| return "⚠️ No image uploaded." | |
| # Store image in session | |
| session_memory.store_image(session_id, image) | |
| return "✅ Image uploaded successfully! You can now ask questions about this image." | |
| def reset_conversation( | |
| session_memory: SessionMemory, | |
| session_id: str | |
| ) -> Tuple[str, List, None]: | |
| """ | |
| Reset the conversation (clear image and history). | |
| Args: | |
| session_memory: Session storage | |
| session_id: Current session identifier | |
| Returns: | |
| (status_message, empty_history, None_for_image) | |
| """ | |
| session_memory.reset_session(session_id) | |
| return "🔄 Conversation reset. Please upload a new image.", [], None | |
| # ============================================================================ | |
| # GRADIO UI INTERFACE | |
| # ============================================================================ | |
| def format_history_for_chatbot(history: List[Tuple[str, str]]) -> List[dict]: | |
| """ | |
| Convert internal (question, answer) tuples into | |
| Gradio v4 Chatbot message format. | |
| """ | |
| messages = [] | |
| for q, a in history: | |
| messages.append({"role": "user", "content": q}) | |
| messages.append({"role": "assistant", "content": a}) | |
| return messages | |
| def generate_visual_topic_suggestions( | |
| vqa_engine: VisualQAEngine, | |
| image: Image.Image | |
| ) -> List[str]: | |
| """ | |
| Generate guided visual topic suggestions using the SAME BLIP VQA model. | |
| IMPORTANT: | |
| - This is GUIDANCE ONLY, not object detection | |
| - Uses a small, fixed set of internal prompts | |
| - Extracts 1-4 single-word nouns only | |
| - Does NOT claim to list all objects | |
| Args: | |
| vqa_engine: Visual QA inference engine | |
| image: Uploaded PIL Image | |
| Returns: | |
| List of 1-4 single-word topic suggestions | |
| """ | |
| if image is None: | |
| return [] | |
| # Fixed set of internal prompts for guidance | |
| internal_prompts = [ | |
| "What is the main object in the image?", | |
| "Is there a furniture item?", | |
| "Is there an electronic device?", | |
| "Is there a plant?" | |
| ] | |
| suggestions = [] | |
| for prompt in internal_prompts: | |
| try: | |
| answer = vqa_engine.answer_question(image, prompt) | |
| # Extract single-word nouns only | |
| words = re.findall(r'\b[a-zA-Z]+\b', answer.lower()) | |
| # Filter out common stop words and keep only meaningful nouns | |
| stop_words = {'yes', 'no', 'the', 'a', 'an', 'is', 'are', 'there', 'not'} | |
| meaningful_words = [w for w in words if w not in stop_words and len(w) > 2] | |
| if meaningful_words: | |
| suggestions.append(meaningful_words[0]) | |
| except: | |
| continue | |
| # Return unique suggestions, max 4 | |
| unique_suggestions = list(dict.fromkeys(suggestions))[:4] | |
| return unique_suggestions | |
| def clear_chat_only( | |
| session_memory: SessionMemory, | |
| session_id: str | |
| ) -> Tuple[str, List]: | |
| """ | |
| Clear conversation history only (keep image). | |
| Args: | |
| session_memory: Session storage | |
| session_id: Current session identifier | |
| Returns: | |
| (status_message, empty_history) | |
| """ | |
| if session_id in session_memory.sessions: | |
| session_memory.sessions[session_id]['conversation_history'] = [] | |
| return "💬 Chat cleared. Image retained.", [] | |
| def remove_image_only( | |
| session_memory: SessionMemory, | |
| session_id: str | |
| ) -> Tuple[str, None]: | |
| """ | |
| Remove image only (keep conversation history). | |
| Args: | |
| session_memory: Session storage | |
| session_id: Current session identifier | |
| Returns: | |
| (status_message, None_for_image) | |
| """ | |
| if session_id in session_memory.sessions: | |
| session_memory.sessions[session_id]['uploaded_image'] = None | |
| return "🖼️ Image removed. Chat history retained.", None | |
| def get_session_metadata( | |
| session_memory: SessionMemory, | |
| session_id: str | |
| ) -> str: | |
| """ | |
| Get session metadata for Advanced Mode display. | |
| Args: | |
| session_memory: Session storage | |
| session_id: Current session identifier | |
| Returns: | |
| Formatted metadata string | |
| """ | |
| if session_id not in session_memory.sessions: | |
| return "Session ID: Unknown\nImage Loaded: No\nConversation Turns: 0" | |
| session = session_memory.sessions[session_id] | |
| image_loaded = "Yes" if session['uploaded_image'] is not None else "No" | |
| turn_count = len(session['conversation_history']) | |
| return f"""**Session ID:** `{session_id[:8]}...` | |
| **Image Loaded:** {image_loaded} | |
| **Conversation Turns:** {turn_count}""" | |
| def create_gradio_interface(vqa_engine: VisualQAEngine, session_memory: SessionMemory) -> gr.Blocks: | |
| """ | |
| Create the Gradio UI for the Visual Conversational Intelligence Engine. | |
| UI Components: | |
| - Mode selector (Basic / Advanced) | |
| - Image upload with guided topic suggestions | |
| - Question input with type selector (Advanced Mode) | |
| - Chat history display | |
| - Advanced controls and metadata (Advanced Mode only) | |
| """ | |
| # Custom CSS for visual polish and theming | |
| custom_css = """ | |
| .mode-selector {font-size: 16px; font-weight: bold;} | |
| .topic-chip {margin: 4px; padding: 8px 16px; border-radius: 16px; background: #e3f2fd; cursor: pointer;} | |
| .capability-box {background: #f5f5f5; padding: 16px; border-radius: 8px; margin: 8px 0;} | |
| .metadata-box {background: #fafafa; padding: 12px; border-radius: 6px; font-family: monospace;} | |
| """ | |
| with gr.Blocks(title="Visual Conversational Intelligence Engine", css=custom_css) as demo: | |
| # Session state (hidden) | |
| session_id = gr.State(value=session_memory.create_session()) | |
| # Mode state (Basic = default) | |
| mode_state = gr.State(value="Basic") | |
| # Header | |
| gr.Markdown(""" | |
| # 🔍 Visual Conversational Intelligence Engine | |
| **An image-grounded multi-turn conversational system for academic demonstration** | |
| """) | |
| # MODE SELECTOR (TOP OF UI) | |
| with gr.Row(): | |
| mode_selector = gr.Radio( | |
| choices=["Basic Mode", "Advanced Mode"], | |
| value="Basic Mode", | |
| label="Interface Mode", | |
| info="Basic Mode: Student-friendly interface | Advanced Mode: Research/admin view with additional controls", | |
| elem_classes="mode-selector" | |
| ) | |
| # BASIC MODE INSTRUCTIONS (shown only in Basic Mode) | |
| basic_instructions = gr.Markdown(""" | |
| ### 🎓 How to use (Student View): | |
| 1. **Upload an image** 📤 | |
| 2. **Ask questions** about the image 💬 | |
| 3. **Continue the conversation** - ask follow-up questions without re-uploading | |
| 4. **Reset** to start over with a new image 🔄 | |
| **Note:** All answers are strictly grounded in the uploaded image. | |
| """, visible=True) | |
| # MAIN LAYOUT (TWO COLUMNS) | |
| with gr.Row(): | |
| # LEFT COLUMN: IMAGE UPLOAD SECTION | |
| with gr.Column(scale=1): | |
| with gr.Group(): | |
| gr.Markdown("### 📤 Upload Image") | |
| image_input = gr.Image( | |
| type="pil", | |
| label="Drag and drop or click to upload", | |
| height=400 | |
| ) | |
| upload_status = gr.Textbox( | |
| label="Status", | |
| interactive=False, | |
| lines=1 | |
| ) | |
| upload_btn = gr.Button("📥 Upload Image", variant="primary", size="lg") | |
| # GUIDED VISUAL TOPIC SUGGESTIONS (shown after upload) | |
| gr.Markdown("#### 💡 Suggested Visual Topics (Guidance Only)") | |
| gr.Markdown("*Click a topic to prefill your question. These are suggestions, not exhaustive object lists.*") | |
| with gr.Row(): | |
| topic_btn_1 = gr.Button("", visible=False, size="sm") | |
| topic_btn_2 = gr.Button("", visible=False, size="sm") | |
| topic_btn_3 = gr.Button("", visible=False, size="sm") | |
| topic_btn_4 = gr.Button("", visible=False, size="sm") | |
| # RIGHT COLUMN: CHAT / CONVERSATION SECTION | |
| with gr.Column(scale=1): | |
| with gr.Group(): | |
| gr.Markdown("### 💬 Ask Questions") | |
| chatbot = gr.Chatbot( | |
| label="Conversation History", | |
| height=400 | |
| ) | |
| question_input = gr.Textbox( | |
| label="Your Question", | |
| placeholder="Ask a question about the uploaded image...", | |
| lines=2 | |
| ) | |
| with gr.Row(): | |
| submit_btn = gr.Button("🚀 Ask Question", variant="primary", size="lg") | |
| reset_btn_basic = gr.Button("🔄 Reset All", variant="secondary", size="lg") | |
| # ADVANCED MODE PANEL (shown only in Advanced Mode) | |
| with gr.Group(visible=False) as advanced_panel: | |
| gr.Markdown("## 🔬 Advanced Controls & Metadata") | |
| with gr.Row(): | |
| # QUESTION TYPE SELECTOR (GUIDANCE ONLY) | |
| with gr.Column(scale=1): | |
| gr.Markdown("### Question Type Selector (Guidance)") | |
| question_type = gr.Dropdown( | |
| choices=[ | |
| "Object Presence", | |
| "Object Attribute (Color / Shape)", | |
| "Scene Understanding", | |
| "Yes / No Verification" | |
| ], | |
| label="Select Question Type", | |
| info="This is for guidance only. It does not change AI logic.", | |
| value="Object Presence" | |
| ) | |
| # SESSION METADATA PANEL (READ-ONLY) | |
| with gr.Column(scale=1): | |
| gr.Markdown("### Session Metadata") | |
| session_metadata = gr.Markdown( | |
| "**Session ID:** Not initialized\n**Image Loaded:** No\n**Conversation Turns:** 0" | |
| ) | |
| refresh_metadata_btn = gr.Button("🔄 Refresh Metadata", size="sm") | |
| # CAPABILITY / SCOPE INDICATOR (STATIC INFO BOX) | |
| with gr.Row(): | |
| gr.Markdown(""" | |
| ### ⚙️ System Capabilities & Limitations | |
| **What this system CAN do:** | |
| - ✅ Image-grounded Question Answering | |
| - ✅ Single-image Conversational Memory | |
| - ✅ Multi-turn dialogue about the same image | |
| **What this system CANNOT do:** | |
| - ❌ Exhaustive Object Listing (not object detection) | |
| - ❌ Multi-image Reasoning | |
| - ❌ Precise Counting (VQA models have known limitations) | |
| - ❌ Open-domain knowledge questions unrelated to the image | |
| *This is an academic prototype demonstrating AI system design, not a production object detection system.* | |
| """) | |
| # ADVANCED RESET CONTROLS | |
| with gr.Row(): | |
| gr.Markdown("### Reset Controls") | |
| with gr.Row(): | |
| clear_chat_btn = gr.Button("💬 Clear Chat Only", variant="secondary") | |
| remove_image_btn = gr.Button("🖼️ Remove Image Only", variant="secondary") | |
| full_reset_btn = gr.Button("🔄 Full Reset (Image + Chat)", variant="stop") | |
| # Footer | |
| gr.Markdown(""" | |
| --- | |
| **Academic Prototype** | Demonstrates AI system design for visual question answering | |
| **Tech Stack:** Python • HuggingFace BLIP • Gradio • Session-based Memory | |
| """) | |
| # ==================================================================== | |
| # EVENT HANDLERS | |
| # ==================================================================== | |
| def toggle_mode(mode_choice): | |
| """ | |
| Toggle between Basic and Advanced Mode. | |
| Mode toggle does NOT reset session or image. | |
| """ | |
| is_advanced = (mode_choice == "Advanced Mode") | |
| return { | |
| advanced_panel: gr.update(visible=is_advanced), | |
| basic_instructions: gr.update(visible=not is_advanced), | |
| reset_btn_basic: gr.update(visible=not is_advanced) | |
| } | |
| def upload_image_handler(image, session_id): | |
| """ | |
| Handle image upload event. | |
| Stores image and generates guided topic suggestions. | |
| """ | |
| status = handle_image_upload(session_memory, session_id, image) | |
| # Generate guided topic suggestions | |
| suggestions = generate_visual_topic_suggestions(vqa_engine, image) | |
| # Update topic buttons | |
| updates = [] | |
| for i in range(4): | |
| if i < len(suggestions): | |
| updates.append(gr.update(value=suggestions[i], visible=True)) | |
| else: | |
| updates.append(gr.update(value="", visible=False)) | |
| return [status] + updates | |
| def topic_click_handler(topic_text): | |
| """ | |
| Handle topic chip click. | |
| Prefills question input with suggested topic. | |
| User can edit before submitting. | |
| """ | |
| return f"What is the {topic_text} in the image?" | |
| def ask_question_handler(question, session_id): | |
| """ | |
| Handle question submission. | |
| Uses existing process_question logic (unchanged). | |
| """ | |
| answer, history = process_question( | |
| vqa_engine, session_memory, session_id, question | |
| ) | |
| formatted_history = format_history_for_chatbot(history) | |
| return formatted_history, "" | |
| def question_type_change_handler(question_type): | |
| """ | |
| Handle question type selector change. | |
| Optionally prefills question input with example. | |
| This is GUIDANCE ONLY. | |
| """ | |
| examples = { | |
| "Object Presence": "Is there a [object] in the image?", | |
| "Object Attribute (Color / Shape)": "What color is the [object]?", | |
| "Scene Understanding": "What is happening in the image?", | |
| "Yes / No Verification": "Is the [object] [attribute]?" | |
| } | |
| return examples.get(question_type, "") | |
| def refresh_metadata_handler(session_id): | |
| """ | |
| Refresh session metadata display. | |
| """ | |
| return get_session_metadata(session_memory, session_id) | |
| def clear_chat_handler(session_id): | |
| """ | |
| Clear chat only (Advanced Mode). | |
| """ | |
| status, history = clear_chat_only(session_memory, session_id) | |
| return status, [] | |
| def remove_image_handler(session_id): | |
| """ | |
| Remove image only (Advanced Mode). | |
| """ | |
| status, image = remove_image_only(session_memory, session_id) | |
| return status, image | |
| def full_reset_handler(session_id): | |
| """ | |
| Full reset (Advanced Mode). | |
| """ | |
| status, history, image = reset_conversation(session_memory, session_id) | |
| return status, [], image, "", "", "", "" | |
| def basic_reset_handler(session_id): | |
| """ | |
| Basic mode reset. | |
| """ | |
| status, history, image = reset_conversation(session_memory, session_id) | |
| return status, [], image | |
| # ==================================================================== | |
| # WIRE UP EVENTS | |
| # ==================================================================== | |
| # Mode toggle | |
| mode_selector.change( | |
| fn=toggle_mode, | |
| inputs=[mode_selector], | |
| outputs=[advanced_panel, basic_instructions, reset_btn_basic] | |
| ) | |
| # Image upload | |
| upload_btn.click( | |
| fn=upload_image_handler, | |
| inputs=[image_input, session_id], | |
| outputs=[upload_status, topic_btn_1, topic_btn_2, topic_btn_3, topic_btn_4] | |
| ) | |
| # Topic chip clicks | |
| topic_btn_1.click( | |
| fn=topic_click_handler, | |
| inputs=[topic_btn_1], | |
| outputs=[question_input] | |
| ) | |
| topic_btn_2.click( | |
| fn=topic_click_handler, | |
| inputs=[topic_btn_2], | |
| outputs=[question_input] | |
| ) | |
| topic_btn_3.click( | |
| fn=topic_click_handler, | |
| inputs=[topic_btn_3], | |
| outputs=[question_input] | |
| ) | |
| topic_btn_4.click( | |
| fn=topic_click_handler, | |
| inputs=[topic_btn_4], | |
| outputs=[question_input] | |
| ) | |
| # Question submission | |
| submit_btn.click( | |
| fn=ask_question_handler, | |
| inputs=[question_input, session_id], | |
| outputs=[chatbot, question_input] | |
| ) | |
| question_input.submit( | |
| fn=ask_question_handler, | |
| inputs=[question_input, session_id], | |
| outputs=[chatbot, question_input] | |
| ) | |
| # Question type selector (Advanced Mode) | |
| question_type.change( | |
| fn=question_type_change_handler, | |
| inputs=[question_type], | |
| outputs=[question_input] | |
| ) | |
| # Metadata refresh (Advanced Mode) | |
| refresh_metadata_btn.click( | |
| fn=refresh_metadata_handler, | |
| inputs=[session_id], | |
| outputs=[session_metadata] | |
| ) | |
| # Advanced reset controls | |
| clear_chat_btn.click( | |
| fn=clear_chat_handler, | |
| inputs=[session_id], | |
| outputs=[upload_status, chatbot] | |
| ) | |
| remove_image_btn.click( | |
| fn=remove_image_handler, | |
| inputs=[session_id], | |
| outputs=[upload_status, image_input] | |
| ) | |
| full_reset_btn.click( | |
| fn=full_reset_handler, | |
| inputs=[session_id], | |
| outputs=[upload_status, chatbot, image_input, topic_btn_1, topic_btn_2, topic_btn_3, topic_btn_4] | |
| ) | |
| # Basic mode reset | |
| reset_btn_basic.click( | |
| fn=basic_reset_handler, | |
| inputs=[session_id], | |
| outputs=[upload_status, chatbot, image_input] | |
| ) | |
| return demo | |
| # ============================================================================ | |
| # MAIN APPLICATION ENTRY POINT | |
| # ============================================================================ | |
| def main(): | |
| """ | |
| Initialize and launch the Visual Conversational Intelligence Engine. | |
| """ | |
| print("=" * 60) | |
| print("VISUAL CONVERSATIONAL INTELLIGENCE ENGINE") | |
| print("=" * 60) | |
| # Initialize core components | |
| print("\n[1/3] Initializing Vision-Language Model...") | |
| vqa_engine = VisualQAEngine(model_name="Salesforce/blip-vqa-base") | |
| print("\n[2/3] Setting up session memory...") | |
| session_memory = SessionMemory() | |
| print("\n[3/3] Creating Gradio interface...") | |
| demo = create_gradio_interface(vqa_engine, session_memory) | |
| print("\n" + "=" * 60) | |
| print("🚀 Launching application...") | |
| print("=" * 60) | |
| # Launch the application | |
| demo.launch( | |
| share=True, # Set to True for public sharing | |
| server_name="0.0.0.0", # Allow external access | |
| server_port=7860 # Standard Gradio port | |
| ) | |
| if __name__ == "__main__": | |
| main() | |