Yordann's picture
Update app.py
1209c1c verified
raw
history blame
22 kB
# Refined Chatbot Code with Multimodal Capabilities
import gradio as gr
import openai
import os
import base64
import requests
from PIL import Image
import cv2
import torch
from urllib.parse import urlparse
import mimetypes
from bs4 import BeautifulSoup
import io
# --- Configuration and Initialization ---
# Modern OpenAI API client initialization
client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
# Translation pipelines (keeping for language support)
from transformers import pipeline
bg_to_en = pipeline("translation", model="Helsinki-NLP/opus-mt-bg-en")
en_to_bg = pipeline("translation", model="Helsinki-NLP/opus-mt-en-bg")
# --- Utility Functions ---
def local_image_to_data_url(image_path):
"""Convert a local image file to a data URL for API submission."""
mime_type, _ = mimetypes.guess_type(image_path)
if mime_type is None:
mime_type = 'application/octet-stream'
with open(image_path, "rb") as image_file:
base64_encoded_data = base64.b64encode(image_file.read()).decode('utf-8')
return f"data:{mime_type};base64,{base64_encoded_data}"
def pil_image_to_data_url(pil_image):
"""Convert a PIL image object to a data URL."""
buffered = io.BytesIO()
pil_image.save(buffered, format="PNG")
base64_encoded_data = base64.b64encode(buffered.getvalue()).decode('utf-8')
return f"data:image/png;base64,{base64_encoded_data}"
# --- Advanced Content Processing ---
def process_video_for_analysis(video_path):
"""Extract frames and audio from a video for multimodal analysis."""
cap = cv2.VideoCapture(video_path)
frames = []
frame_count = 0
while cap.isOpened() and frame_count < 9:
success, frame = cap.read()
if not success:
break
# Get every Nth frame to get a good sample
if frame_count % (int(cap.get(cv2.CAP_PROP_FRAME_COUNT) / 9) + 1) == 0:
img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
frames.append(img)
frame_count += 1
cap.release()
if not frames:
return None, "Could not extract frames from video."
# Create a grid of frames
grid_size = 3
grid_img = Image.new('RGB', (frames[0].width * grid_size, frames[0].height * grid_size))
for i, frame in enumerate(frames):
grid_img.paste(frame, ((i % grid_size) * frames[0].width, (i // grid_size) * frames[0].height))
# For now, we are not processing audio, but this is where you would add it
# audio_transcript = transcribe_audio(video_path)
return grid_img, None
def detect_content_type(url):
"""Detect the content type of a URL."""
try:
response = requests.head(url, timeout=5, allow_redirects=True)
content_type = response.headers.get('content-type', '').lower()
if 'text/html' in content_type:
return 'webpage'
elif 'application/pdf' in content_type:
return 'pdf'
elif content_type.startswith('image/'):
return 'image'
elif content_type.startswith('video/'):
return 'video'
else:
return 'unknown'
except requests.RequestException:
return 'error'
def extract_webpage_content(url):
"""Extract meaningful content from a webpage."""
try:
response = requests.get(url, timeout=10)
soup = BeautifulSoup(response.text, 'html.parser')
for script in soup(["script", "style"]):
script.decompose()
text = soup.get_text(separator=' ', strip=True)
return text[:4000] # Limit content length
except Exception as e:
return f"Failed to fetch or parse URL: {str(e)}"
# --- Core AI Interaction ---
def generate_chatgpt_response(messages, temperature=0.7):
"""Generate a response using the OpenAI Chat API."""
try:
response = client.chat.completions.create(
model="gpt-4o",
messages=messages,
temperature=temperature,
max_tokens=1024,
top_p=1.0,
frequency_penalty=0.0,
presence_penalty=0.0
)
return response.choices[0].message.content
except Exception as e:
return f"Error generating response: {str(e)}"
# --- Main Gradio Application Logic ---
def generate_response(user_input, top_p, temperature, chat_counter, chatbot, history, image=None, video=None, url=None, request: gr.Request = None):
"""Main function to generate AI responses for the Gradio app."""
lang = 'en' # Default to english
if user_input:
try:
lang = detect(user_input)
except:
pass
# Translate input if necessary
if lang == "bg" and user_input:
user_input_translated = bg_to_en(user_input)[0]["translation_text"]
else:
user_input_translated = user_input
# Initialize messages for the API call
messages = [
{"role": "system", "content": "You are a helpful AI assistant. Provide clear, friendly, and informative responses to user questions. Analyze any provided images, videos, or web content to inform your response."}
]
for msg in history:
messages.append(msg)
# Prepare multimodal content
multimodal_content = []
if user_input_translated:
multimodal_content.append({"type": "text", "text": user_input_translated})
if image is not None:
try:
data_url = pil_image_to_data_url(image)
multimodal_content.append({"type": "image_url", "image_url": {"url": data_url}})
except Exception as e:
multimodal_content.append({"type": "text", "text": f"[Image Error: {str(e)}]"})
if video is not None:
try:
grid_img, error = process_video_for_analysis(video)
if error:
multimodal_content.append({"type": "text", "text": f"[Video Error: {error}]"})
else:
data_url = pil_image_to_data_url(grid_img)
multimodal_content.append({"type": "image_url", "image_url": {"url": data_url}})
multimodal_content.append({"type": "text", "text": "[The user has uploaded a video. The image above is a grid of frames from the video. Please analyze the sequence of frames to understand what is happening in the video.]"})
except Exception as e:
multimodal_content.append({"type": "text", "text": f"[Video Processing Error: {str(e)}]"})
if url:
content_type = detect_content_type(url)
if content_type == 'webpage':
web_content = extract_webpage_content(url)
multimodal_content.append({"type": "text", "text": f"[Content from URL ({url})]:\n{web_content}"})
elif content_type == 'image':
multimodal_content.append({"type": "image_url", "image_url": {"url": url}})
else:
multimodal_content.append({"type": "text", "text": f"[URL content of type '{content_type}' is not supported for direct analysis. Please describe the content if you can access it.]"})
messages.append({"role": "user", "content": multimodal_content})
# Generate response
response_text = generate_chatgpt_response(messages, temperature)
# Translate response if necessary
if lang == "bg":
response_text = en_to_bg(response_text)[0]["translation_text"]
# Update history
history.append({"role": "user", "content": user_input})
history.append({"role": "assistant", "content": response_text})
chat_counter += 1
return history, history, chat_counter, "โœ… Success", gr.update(value="", interactive=True), gr.update(interactive=True)
# --- Gradio UI ---
# (The user's original CSS and UI structure can be pasted here, as it does not need to be changed)
# ==== Launch ====
# if __name__ == "__main__":
# demo.queue(max_size=10).launch(server_name="0.0.0.0", server_port=7860)
def reset_textbox():
return gr.update(value=\'\')
# ==== Enhanced Custom CSS with Masterbrand Styling ====
custom_css = f'''
@import url(\'https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800;900&display=swap\');
* {{
font-family: \'Inter\', sans-serif !important;
}}
.gradio-container {{
background: linear-gradient(135deg, #0a0a0a 0%, #1a0a0a 50%, #2a1a1a 100%) !important;
min-height: 100vh !important;
}}
.main-header {{
background: linear-gradient(135deg, #0a0a0a 0%, #1a0a0a 50%, #2a1a1a 100%) !important;
padding: 2rem 0 3rem 0 !important;
text-align: center !important;
border-bottom: 3px solid #ff0000 !important;
margin-bottom: 2rem !important;
position: relative !important;
overflow: hidden !important;
}}
.main-header::before {{
content: \'\';
position: absolute;
top: 0;
left: 0;
right: 0;
bottom: 0;
background: radial-gradient(circle at 50% 50%, rgba(255, 0, 0, 0.1) 0%, transparent 70%);
pointer-events: none;
}}
.logo-container {{
display: flex !important;
justify-content: center !important;
align-items: center !important;
margin-bottom: 1.5rem !important;
gap: 1rem !important;
}}
.logo-image {{
width: 50px !important;
height: 50px !important;
border-radius: 15px !important;
box-shadow: 0 0 30px rgba(255, 0, 0, 0.5) !important;
animation: pulse-glow 2s ease-in-out infinite alternate !important;
transition: transform 0.3s ease !important;
}}
.logo-image:hover {{
transform: scale(1.1) !important;
}}
.brand-text {{
background: linear-gradient(135deg, #00ff41 0%, #ffffff 50%, #ff0000 100%) !important;
-webkit-background-clip: text !important;
-webkit-text-fill-color: transparent !important;
background-clip: text !important;
font-size: 2.5rem !important;
font-weight: 900 !important;
letter-spacing: 2px !important;
text-shadow: 0 0 20px rgba(0, 255, 65, 0.3) !important;
}}
.main-header h1 {{
color: #ffffff !important;
font-size: 3.5rem !important;
font-weight: 900 !important;
text-shadow: 0 0 30px rgba(255, 0, 0, 0.5) !important;
background: linear-gradient(135deg, #ffffff 0%, #ff0000 100%) !important;
-webkit-background-clip: text !important;
-webkit-text-fill-color: transparent !important;
background-clip: text !important;
margin: 0 !important;
}}
.main-header p {{
color: #cccccc !important;
font-size: 1.3rem !important;
font-weight: 500 !important;
margin-top: 0.5rem !important;
}}
.chatbot-container {{
background: rgba(26, 26, 26, 0.95) !important;
border: 2px solid #ff0000 !important;
border-radius: 25px !important;
box-shadow: 0 15px 40px rgba(255, 0, 0, 0.2), inset 0 1px 0 rgba(255, 255, 255, 0.1) !important;
margin-bottom: 2rem !important;
backdrop-filter: blur(10px) !important;
}}
.input-section {{
background: rgba(26, 26, 26, 0.8) !important;
border: 2px solid #333333 !important;
border-radius: 20px !important;
padding: 2rem !important;
margin: 1rem 0 !important;
box-shadow: 0 10px 30px rgba(0, 0, 0, 0.3) !important;
backdrop-filter: blur(10px) !important;
transition: all 0.3s ease !important;
}}
.input-section:hover {{
border-color: #ff0000 !important;
box-shadow: 0 15px 40px rgba(255, 0, 0, 0.2) !important;
}}
.input-group {{
margin-bottom: 1.5rem !important;
}}
.input-group:last-child {{
margin-bottom: 0 !important;
}}
.input-label {{
color: #ffffff !important;
font-size: 1.1rem !important;
font-weight: 600 !important;
margin-bottom: 0.8rem !important;
display: flex !important;
align-items: center !important;
gap: 0.5rem !important;
}}
.input-icon {{
font-size: 1.2rem !important;
color: #ff0000 !important;
}}
.enhanced-textbox textarea {{
background: linear-gradient(135deg, #1a1a1a 0%, #2a2a1a 100%) !important;
border: 2px solid #444444 !important;
border-radius: 15px !important;
color: #ffffff !important;
font-size: 1.1rem !important;
padding: 1.2rem !important;
resize: none !important;
transition: all 0.3s ease !important;
box-shadow: 0 5px 15px rgba(0, 0, 0, 0.3) !important;
}}
.enhanced-textbox textarea:focus {{
border-color: #ff0000 !important;
box-shadow: 0 0 25px rgba(255, 0, 0, 0.4), 0 5px 15px rgba(0, 0, 0, 0.3) !important;
outline: none !important;
transform: translateY(-2px) !important;
}}
.enhanced-textbox textarea::placeholder {{
color: #888888 !important;
font-style: italic !important;
}}
.enhanced-image-input, .enhanced-video-input {{
background: rgba(42, 42, 42, 0.8) !important;
border: 2px dashed #444444 !important;
border-radius: 15px !important;
padding: 2rem !important;
text-align: center !important;
transition: all 0.3s ease !important;
min-height: 120px !important;
display: flex !important;
align-items: center !important;
justify-content: center !important;
}}
.enhanced-image-input:hover, .enhanced-video-input:hover {{
border-color: #ff0000 !important;
background: rgba(255, 0, 0, 0.05) !important;
}}
.enhanced-url-input input {{
background: linear-gradient(135deg, #1a1a1a 0%, #2a2a1a 100%) !important;
border: 2px solid #444444 !important;
border-radius: 15px !important;
color: #ffffff !important;
font-size: 1.1rem !important;
padding: 1.2rem !important;
transition: all 0.3s ease !important;
box-shadow: 0 5px 15px rgba(0, 0, 0, 0.3) !important;
}}
.enhanced-url-input input:focus {{
border-color: #ff0000 !important;
box-shadow: 0 0 25px rgba(255, 0, 0, 0.4), 0 5px 15px rgba(0, 0, 0, 0.3) !important;
outline: none !important;
transform: translateY(-2px) !important;
}}
.enhanced-url-input input::placeholder {{
color: #888888 !important;
font-style: italic !important;
}}
.submit-button {{
background: linear-gradient(135deg, #ff0000 0%, #cc0000 100%) !important;
border: none !important;
border-radius: 15px !important;
color: #ffffff !important;
font-size: 1.2rem !important;
font-weight: 700 !important;
padding: 1.2rem 2.5rem !important;
transition: all 0.3s ease !important;
box-shadow: 0 8px 20px rgba(255, 0, 0, 0.3) !important;
text-transform: uppercase !important;
letter-spacing: 1px !important;
width: 100% !important;
margin-top: 1rem !important;
}}
.submit-button:hover {{
background: linear-gradient(135deg, #cc0000 0%, #aa0000 100%) !important;
transform: translateY(-3px) !important;
box-shadow: 0 12px 30px rgba(255, 0, 0, 0.4) !important;
}}
.submit-button:active {{
transform: translateY(-1px) !important;
}}
.accordion-container {{
background: rgba(26, 26, 26, 0.8) !important;
border: 1px solid #333333 !important;
border-radius: 15px !important;
margin-top: 2rem !important;
}}
.slider-container label {{
color: #ffffff !important;
font-weight: 600 !important;
font-size: 1rem !important;
}}
.slider-container input[type=\
"[type=\"range\"] {{
accent-color: #ff0000 !important;
}}
.status-container textarea {{
background: transparent !important;
border: none !important;
color: #ff0000 !important;
font-weight: 600 !important;
text-align: center !important;
}}
.footer {{
display: none !important;
}}
/* Enhanced chat styling */
.message.user {{
background: linear-gradient(135deg, #2a2a2a 0%, #3a3a2a 100%) !important;
border-left: 4px solid #ff0000 !important;
margin-left: 2rem !important;
}}
.message.bot {{
background: linear-gradient(135deg, #1a2a1a 0%, #2a3a2a 100%) !important;
border-left: 4px solid #cc0000 !important;
margin-right: 2rem !important;
}}
/* Responsive design */
@media (max-width: 768px) {{
.main-header h1 {{
font-size: 2.5rem !important;
}}
.main-header p {{
font-size: 1.1rem !important;
}}
.logo-image {{
width: 60px !important;
height: 60px !important;
}}
.brand-text {{
font-size: 2rem !important;
}}
}}
/* Animations */
@keyframes pulse-glow {{
0% {{
box-shadow: 0 0 20px rgba(255, 0, 0, 0.3);
transform: scale(1);
}}
50% {{
box-shadow: 0 0 40px rgba(255, 0, 0, 0.6);
transform: scale(1.02);
}}
100% {{
box-shadow: 0 0 20px rgba(255, 0, 0, 0.3);
transform: scale(1);
}}
}}
@keyframes float {{
0%, 100% {{ transform: translateY(0px); }}
50% {{ transform: translateY(-10px); }}
}}
.floating {{
animation: float 3s ease-in-out infinite;
}}
'''
# ==== Enhanced Theme ====
masterbrand_theme = gr.themes.Base(primary_hue="red").set(
body_background_fill="#0a0a0a",
body_text_color="#ffffff",
border_color_accent="#ff0000",
button_primary_background_fill="#ff0000",
button_primary_background_fill_hover="#cc0000",
button_primary_text_color="#ffffff",
block_background_fill="#1a1a1a",
block_border_color="#333333"
)
# ==== Enhanced UI ====
with gr.Blocks(theme=masterbrand_theme, css=custom_css, title="MasterBrand Assistant") as demo:
# Enhanced header with logo
logo_html = f'''
<div class="main-header">
<div class="logo-container">
<img src="https://huggingface.co/spaces/Yordann/MasterBrand_ChatBot/resolve/main/logo.png" alt="MasterBrand Logo" class="logo-image floating" style="height:100px;">
<div class="brand-text">MASTERBRAND</div>
</div>
<h1>AI ASSISTANT</h1>
<p>Your Personal AI Assistant - Powered by ChatGPT - Available in English & Bulgarian</p>
<p style="font-size: 1rem; color: #ff0000; font-weight: 600; margin-top: 1rem;">
๐Ÿš€ Powered by ChatGPT โ€ข ๐ŸŒ Multilingual Support โ€ข ๐Ÿ’ผ General AI Assistance
</p>
</div>
'''
gr.HTML(logo_html)
with gr.Row():
with gr.Column(scale=1):
chatbot = gr.Chatbot(
label="๐Ÿ’ฌ Chat with MasterBrand AI",
height=450,
elem_classes=["chatbot-container"],
show_label=True,
container=True,
show_copy_button=True
)
# Enhanced Input Section
with gr.Column(elem_classes=["input-section"]):
gr.HTML('<div class="input-label"><span class="input-icon">๐Ÿ’ฌ</span>Ask Your Question</div>')
inputs = gr.Textbox(
placeholder="Ask me anything... ๐Ÿ’ก",
label="",
lines=3,
elem_classes=["enhanced-textbox"],
show_label=False
)
with gr.Row():
with gr.Column():
gr.HTML('<div class="input-label"><span class="input-icon">๐Ÿ–ผ๏ธ</span>Upload Image (Optional)</div>')
image_input = gr.Image(
label="",
type="pil",
elem_classes=["enhanced-image-input"],
show_label=False
)
with gr.Column():
gr.HTML('<div class="input-label"><span class="input-icon">๐ŸŽฅ</span>Upload Video (Optional)</div>')
video_input = gr.Video(
label="",
elem_classes=["enhanced-video-input"],
show_label=False
)
gr.HTML('<div class="input-label"><span class="input-icon">๐Ÿ”—</span>Paste URL (Optional)</div>')
url_input = gr.Textbox(
label="",
placeholder="https://example.com - Paste any URL for analysis",
elem_classes=["enhanced-url-input"],
show_label=False
)
submit_btn = gr.Button(
"๐Ÿš€ Get AI Response",
variant="primary",
size="lg",
elem_classes=["submit-button"]
)
with gr.Row():
status_box = gr.Textbox(
label="Status",
interactive=False,
elem_classes=["status-container"],
show_label=False
)
with gr.Accordion("โš™๏ธ Advanced Settings", open=False, elem_classes=["accordion-container"]):
with gr.Row():
top_p = gr.Slider(
0.1, 1.0,
value=0.9,
step=0.05,
label="๐ŸŽฏ Creativity (Top-p)",
elem_classes=["slider-container"],
info="Higher values make responses more creative"
)
temperature = gr.Slider(
0.1, 2.0,
value=0.7,
step=0.1,
label="๐ŸŒก๏ธ Temperature",
elem_classes=["slider-container"],
info="Controls randomness in responses"
)
# Enhanced footer
gr.HTML('''
<div style="text-align: center; padding: 2rem; color: #666666; border-top: 1px solid #333333; margin-top: 3rem;">
<p style="margin: 0; font-size: 0.9rem;">
ยฉ 2025 MasterBrand AI Assistant โ€ข Powered by ChatGPT
</p>
<p style="margin: 0.5rem 0 0 0; font-size: 0.8rem; color: #888888;">
Your intelligent AI companion for any task
</p>
</div>
''')
# State
state = gr.State([])
chat_counter = gr.Number(value=0, visible=False)
# Event Handlers
inputs.submit(reset_textbox, [], [inputs], queue=False)
submit_btn.click(
generate_response,
[inputs, top_p, temperature, chat_counter, chatbot, state, image_input, video_input, url_input],
[chatbot, state, chat_counter, status_box, inputs, submit_btn]
)
# ==== Launch ====
if __name__ == "__main__":
demo.queue(max_size=10).launch(server_name="0.0.0.0", server_port=7860, share=True)