Clone77 commited on
Commit
98a50c4
·
verified ·
1 Parent(s): c0452f2

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +99 -0
app.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import cv2
3
+ import numpy as np
4
+ import os
5
+ from PIL import Image
6
+ from transformers import pipeline
7
+ from langchain import LLMChain, PromptTemplate
8
+ from langchain_huggingface import ChatHuggingFace
9
+ from pydantic import BaseModel, validator
10
+ from typing import Optional
11
+
12
+
13
+ hf = os.getenv('hf')
14
+ os.environ['HUGGINGFACEHUB_API_TOKEN'] = hf
15
+ os.environ['HF_TOKEN'] = hf
16
+
17
+
18
+ # Pydantic models for input/output validation
19
+ class UserInput(BaseModel):
20
+ question: str
21
+
22
+ @validator('question')
23
+ def check_question(cls, v):
24
+ if not v.strip():
25
+ raise ValueError('Question cannot be empty')
26
+ return v
27
+
28
+ class ChatResponse(BaseModel):
29
+ answer: str
30
+ confidence: Optional[float] = 0.95
31
+
32
+ @validator('answer')
33
+ def check_answer(cls, v):
34
+ if not v.strip():
35
+ raise ValueError('Answer cannot be empty')
36
+ return v
37
+
38
+ # Image preprocessing with OpenCV
39
+ def preprocess_image(image):
40
+ img = np.array(image) # Convert PIL Image to numpy array
41
+ img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) # Convert to BGR for OpenCV
42
+ img = cv2.resize(img, (224, 224)) # Resize for model compatibility
43
+ return img
44
+
45
+ # HuggingFace pipeline for visual question answering
46
+ vqa_pipeline = pipeline("visual-question-answering", model="Salesforce/blip-vqa-base")
47
+
48
+ def get_image_context(image, question):
49
+ result = vqa_pipeline(image, question, top_k=1)
50
+ return result[0]['answer']
51
+
52
+ # LangChain setup for conversational response
53
+ llm = ChatHuggingFace(model_id="meta-llama/Llama-3-7b-chat-hf", huggingfacehub_api_token=hf)
54
+ prompt = PromptTemplate(
55
+ input_variables=["image_context", "question"],
56
+ template="Based on the image context: {image_context}, answer the question: {question}"
57
+ )
58
+ chain = LLMChain(llm=llm, prompt=prompt)
59
+
60
+ def generate_response(image_context, question):
61
+ return chain.run(image_context=image_context, question=question)
62
+
63
+ # Streamlit app
64
+ st.title("Intelligent Multimodal Chatbot")
65
+ st.write("Upload an image and ask a question about it.")
66
+
67
+ # Image upload
68
+ uploaded_image = st.file_uploader("Upload an image", type=["png", "jpg", "jpeg"])
69
+ question = st.text_input("Ask a question about the image")
70
+
71
+ if uploaded_image and question:
72
+ try:
73
+ # Validate input using Pydantic
74
+ user_input = UserInput(question=question)
75
+
76
+ # Display uploaded image
77
+ image = Image.open(uploaded_image)
78
+ st.image(image, caption="Uploaded Image", use_column_width=True)
79
+
80
+ # Preprocess image
81
+ processed_image = preprocess_image(image)
82
+
83
+ # Get image context
84
+ image_context = get_image_context(image, question)
85
+
86
+ # Generate response
87
+ response = generate_response(image_context, question)
88
+
89
+ # Validate response using Pydantic
90
+ chat_response = ChatResponse(answer=response)
91
+
92
+ # Display response
93
+ st.write("**Answer**: ", chat_response.answer)
94
+ st.write("**Confidence**: ", chat_response.confidence)
95
+
96
+ except Exception as e:
97
+ st.error(f"Error: {str(e)}")
98
+ else:
99
+ st.write("Please upload an image and enter a question.")