import gradio as gr import litellm import pypdf import base64 import mimetypes import os # ========================================== # CONFIGURATION & SYSTEM INSTRUCTIONS # ========================================== # The "openai/" prefix forces LiteLLM to use your OPENAI_API_BASE secret MODEL_NAME = "openai/gemini-3-pro-preview" # Hardcoded backend system instructions SYSTEM_PROMPT = """You are a brilliant, highly capable AI assistant. You carefully analyze any images or text documents provided to you. If a user uploads code or text files, read the contents and assist them accordingly.""" # ========================================== # HELPER FUNCTIONS # ========================================== def encode_file_to_base64(file_path): """Converts an image file to a base64 data URL expected by Vision LLMs.""" mime_type, _ = mimetypes.guess_type(file_path) if not mime_type: mime_type = "image/jpeg" with open(file_path, "rb") as file: encoded = base64.b64encode(file.read()).decode("utf-8") return f"data:{mime_type};base64,{encoded}" # ========================================== # CORE CHAT LOGIC # ========================================== def predict(message, history): # 1. Initialize messages with the hidden System Prompt messages = [{"role": "system", "content": SYSTEM_PROMPT}] # 2. Append conversation history for msg in history: # To save tokens, we only retain the text portions of previous turns if isinstance(msg["content"], str): messages.append({"role": msg["role"], "content": msg["content"]}) # 3. Process the current user input (Multimodal Dictionary) current_content = [] # Extract text from the chat bar user_text = message.get("text", "") if user_text: current_content.append({"type": "text", "text": user_text}) # Extract and process uploaded files for file_path in message.get("files", []): mime_type, _ = mimetypes.guess_type(file_path) # --- A. IMAGE HANDLING --- if mime_type and mime_type.startswith("image/"): base64_url = encode_file_to_base64(file_path) current_content.append({ "type": "image_url", "image_url": {"url": base64_url} }) # --- B. PDF HANDLING --- elif file_path.lower().endswith(".pdf"): try: reader = pypdf.PdfReader(file_path) pdf_text = "" # Loop through every page and extract the text for i, page in enumerate(reader.pages): extracted = page.extract_text() if extracted: pdf_text += f"--- Page {i+1} ---\n{extracted}\n\n" # Package it nicely for the LLM prompt doc_text = f"\n\n--- PDF Document: {os.path.basename(file_path)} ---\n{pdf_text}\n--- End PDF ---\n" current_content.append({"type": "text", "text": doc_text}) except Exception as e: error_msg = f"\n[System: Could not read PDF {os.path.basename(file_path)}. Error: {str(e)}]\n" current_content.append({"type": "text", "text": error_msg}) # --- C. GENERIC TEXT/CODE HANDLING --- else: try: with open(file_path, "r", encoding="utf-8") as f: file_text = f.read() doc_text = f"\n\n--- Document: {os.path.basename(file_path)} ---\n{file_text}\n--- End Document ---\n" current_content.append({"type": "text", "text": doc_text}) except Exception: current_content.append({"type": "text", "text": f"\n[System: Could not read file: {os.path.basename(file_path)}]\n"}) messages.append({"role": "user", "content": current_content}) # 4. Stream the response from LiteLLM try: response = litellm.completion( model=MODEL_NAME, messages=messages, stream=True ) partial_message = "" for chunk in response: # Safely extract streamed text chunks delta = chunk.choices[0].delta.content if delta: partial_message += delta yield partial_message except Exception as e: yield f"**An error occurred:** {str(e)}\n\n*Check your API keys and LiteLLM model name.*" # ========================================== # USER INTERFACE & SECURITY # ========================================== demo = gr.ChatInterface( fn=predict, multimodal=True, textbox=gr.MultimodalTextbox( placeholder="Type a message, or drop an image/document here...", # Fixed the missing dot on .pdf below: file_types=["image", ".txt", ".csv", ".py", ".md", ".json", ".pdf"], scale=7 ), title="OmniChat", description="A minimal, multimodal chat interface routed through LiteLLM.", fill_height=True, ) if __name__ == "__main__": # Added the custom HTML from earlier to fix the lame UI custom_login_html = """

🌌 OmniChat Secure Portal

Authorized access only. Please provide your credentials to wake up the LLMs.

""" def verify_login(username, password): users_env = os.environ.get("ALLOWED_USERS", "") if not users_env: return False try: valid_users = dict(pair.split(":") for pair in users_env.split(",")) except ValueError: return False return valid_users.get(username) == password # Pass the custom HTML into the auth_message parameter demo.launch(auth=verify_login, auth_message=custom_login_html)