Spaces:

Clone77
/

Intelligent_Multimodal_Chatbot

Sleeping

App Files Files Community

Clone77 commited on May 29, 2025

Commit

98a50c4

verified ·

1 Parent(s): c0452f2

Create app.py

Browse files

Files changed (1) hide show

app.py +99 -0

app.py ADDED Viewed

	@@ -0,0 +1,99 @@

+import streamlit as st
+import cv2
+import numpy as np
+import os
+from PIL import Image
+from transformers import pipeline
+from langchain import LLMChain, PromptTemplate
+from langchain_huggingface import ChatHuggingFace
+from pydantic import BaseModel, validator
+from typing import Optional
+hf = os.getenv('hf')
+os.environ['HUGGINGFACEHUB_API_TOKEN'] = hf
+os.environ['HF_TOKEN'] = hf
+# Pydantic models for input/output validation
+class UserInput(BaseModel):
+    question: str
+    @validator('question')
+    def check_question(cls, v):
+        if not v.strip():
+            raise ValueError('Question cannot be empty')
+        return v
+class ChatResponse(BaseModel):
+    answer: str
+    confidence: Optional[float] = 0.95
+    @validator('answer')
+    def check_answer(cls, v):
+        if not v.strip():
+            raise ValueError('Answer cannot be empty')
+        return v
+# Image preprocessing with OpenCV
+def preprocess_image(image):
+    img = np.array(image)  # Convert PIL Image to numpy array
+    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)  # Convert to BGR for OpenCV
+    img = cv2.resize(img, (224, 224))  # Resize for model compatibility
+    return img
+# HuggingFace pipeline for visual question answering
+vqa_pipeline = pipeline("visual-question-answering", model="Salesforce/blip-vqa-base")
+def get_image_context(image, question):
+    result = vqa_pipeline(image, question, top_k=1)
+    return result[0]['answer']
+# LangChain setup for conversational response
+llm = ChatHuggingFace(model_id="meta-llama/Llama-3-7b-chat-hf", huggingfacehub_api_token=hf)
+prompt = PromptTemplate(
+    input_variables=["image_context", "question"],
+    template="Based on the image context: {image_context}, answer the question: {question}"
+)
+chain = LLMChain(llm=llm, prompt=prompt)
+def generate_response(image_context, question):
+    return chain.run(image_context=image_context, question=question)
+# Streamlit app
+st.title("Intelligent Multimodal Chatbot")
+st.write("Upload an image and ask a question about it.")
+# Image upload
+uploaded_image = st.file_uploader("Upload an image", type=["png", "jpg", "jpeg"])
+question = st.text_input("Ask a question about the image")
+if uploaded_image and question:
+    try:
+        # Validate input using Pydantic
+        user_input = UserInput(question=question)
+        # Display uploaded image
+        image = Image.open(uploaded_image)
+        st.image(image, caption="Uploaded Image", use_column_width=True)
+        # Preprocess image
+        processed_image = preprocess_image(image)
+        # Get image context
+        image_context = get_image_context(image, question)
+        # Generate response
+        response = generate_response(image_context, question)
+        # Validate response using Pydantic
+        chat_response = ChatResponse(answer=response)
+        # Display response
+        st.write("**Answer**: ", chat_response.answer)
+        st.write("**Confidence**: ", chat_response.confidence)
+    except Exception as e:
+        st.error(f"Error: {str(e)}")
+else:
+    st.write("Please upload an image and enter a question.")