keefereuther's picture
Update app.py
7a2f3bd verified
import gradio as gr
import litellm
import pypdf
import base64
import mimetypes
import os
# ==========================================
# CONFIGURATION & SYSTEM INSTRUCTIONS
# ==========================================
# The "openai/" prefix forces LiteLLM to use your OPENAI_API_BASE secret
MODEL_NAME = "openai/gemini-3-pro-preview"
# Hardcoded backend system instructions
SYSTEM_PROMPT = """You are a brilliant, highly capable AI assistant.
You carefully analyze any images or text documents provided to you.
If a user uploads code or text files, read the contents and assist them accordingly."""
# ==========================================
# HELPER FUNCTIONS
# ==========================================
def encode_file_to_base64(file_path):
"""Converts an image file to a base64 data URL expected by Vision LLMs."""
mime_type, _ = mimetypes.guess_type(file_path)
if not mime_type:
mime_type = "image/jpeg"
with open(file_path, "rb") as file:
encoded = base64.b64encode(file.read()).decode("utf-8")
return f"data:{mime_type};base64,{encoded}"
# ==========================================
# CORE CHAT LOGIC
# ==========================================
def predict(message, history):
# 1. Initialize messages with the hidden System Prompt
messages = [{"role": "system", "content": SYSTEM_PROMPT}]
# 2. Append conversation history
for msg in history:
# To save tokens, we only retain the text portions of previous turns
if isinstance(msg["content"], str):
messages.append({"role": msg["role"], "content": msg["content"]})
# 3. Process the current user input (Multimodal Dictionary)
current_content = []
# Extract text from the chat bar
user_text = message.get("text", "")
if user_text:
current_content.append({"type": "text", "text": user_text})
# Extract and process uploaded files
for file_path in message.get("files", []):
mime_type, _ = mimetypes.guess_type(file_path)
# --- A. IMAGE HANDLING ---
if mime_type and mime_type.startswith("image/"):
base64_url = encode_file_to_base64(file_path)
current_content.append({
"type": "image_url",
"image_url": {"url": base64_url}
})
# --- B. PDF HANDLING ---
elif file_path.lower().endswith(".pdf"):
try:
reader = pypdf.PdfReader(file_path)
pdf_text = ""
# Loop through every page and extract the text
for i, page in enumerate(reader.pages):
extracted = page.extract_text()
if extracted:
pdf_text += f"--- Page {i+1} ---\n{extracted}\n\n"
# Package it nicely for the LLM prompt
doc_text = f"\n\n--- PDF Document: {os.path.basename(file_path)} ---\n{pdf_text}\n--- End PDF ---\n"
current_content.append({"type": "text", "text": doc_text})
except Exception as e:
error_msg = f"\n[System: Could not read PDF {os.path.basename(file_path)}. Error: {str(e)}]\n"
current_content.append({"type": "text", "text": error_msg})
# --- C. GENERIC TEXT/CODE HANDLING ---
else:
try:
with open(file_path, "r", encoding="utf-8") as f:
file_text = f.read()
doc_text = f"\n\n--- Document: {os.path.basename(file_path)} ---\n{file_text}\n--- End Document ---\n"
current_content.append({"type": "text", "text": doc_text})
except Exception:
current_content.append({"type": "text", "text": f"\n[System: Could not read file: {os.path.basename(file_path)}]\n"})
messages.append({"role": "user", "content": current_content})
# 4. Stream the response from LiteLLM
try:
response = litellm.completion(
model=MODEL_NAME,
messages=messages,
stream=True
)
partial_message = ""
for chunk in response:
# Safely extract streamed text chunks
delta = chunk.choices[0].delta.content
if delta:
partial_message += delta
yield partial_message
except Exception as e:
yield f"**An error occurred:** {str(e)}\n\n*Check your API keys and LiteLLM model name.*"
# ==========================================
# USER INTERFACE & SECURITY
# ==========================================
demo = gr.ChatInterface(
fn=predict,
multimodal=True,
textbox=gr.MultimodalTextbox(
placeholder="Type a message, or drop an image/document here...",
# Fixed the missing dot on .pdf below:
file_types=["image", ".txt", ".csv", ".py", ".md", ".json", ".pdf"],
scale=7
),
title="OmniChat",
description="A minimal, multimodal chat interface routed through LiteLLM.",
fill_height=True,
)
if __name__ == "__main__":
# Added the custom HTML from earlier to fix the lame UI
custom_login_html = """
<div style="text-align: center; padding: 10px;">
<h2 style="color: #4F46E5; margin-bottom: 5px; font-family: sans-serif;">🌌 OmniChat Secure Portal</h2>
<p style="color: #6B7280; font-size: 14px; font-family: sans-serif;">Authorized access only. Please provide your credentials to wake up the LLMs.</p>
</div>
"""
def verify_login(username, password):
users_env = os.environ.get("ALLOWED_USERS", "")
if not users_env:
return False
try:
valid_users = dict(pair.split(":") for pair in users_env.split(","))
except ValueError:
return False
return valid_users.get(username) == password
# Pass the custom HTML into the auth_message parameter
demo.launch(auth=verify_login, auth_message=custom_login_html)