import gradio as gr import requests import os import io import base64 from PIL import Image # ============================================================ # HuggingGPT - Multi-Modal AI Playground # A polished Gradio app for Chat, Image Gen, and Text-to-Video # ============================================================ HF_API_TOKEN = os.environ.get("HF_TOKEN", "") HF_API = "https://api-inference.huggingface.co/models/" HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"} if HF_API_TOKEN else {} # --- Model Registry --- MODELS = { "qwen": "Qwen/Qwen2.5-7B-Instruct", "kimi": "moonshotai/Kimi-K2-Instruct", "small": "Qwen/Qwen2.5-0.5B-Instruct" } IMG_MODELS = { "flash": "black-forest-labs/FLUX.1-schnell", "full": "black-forest-labs/FLUX.1-dev" } VIDEO_MODELS = { "fast": "tencent/HunyuanVideo", "quality": "Wan-AI/Wan2.1-T2V-14B" } # --- Personality Prompts --- def get_system_prompt(personality): prompts = { "Professional & Concise": "You are a professional, efficient assistant. Be concise and accurate. Provide clear, actionable responses.", "Friendly & Creative": "You are a warm, creative assistant. Be enthusiastic, imaginative, and use expressive language. Make conversations enjoyable!", "Sarcastic & Witty": "You are witty and sarcastic but still genuinely helpful. Use clever humor and dry remarks while providing accurate information.", "Expert Coder": "You are an expert software engineer with deep knowledge of multiple programming languages and frameworks. Focus on clean, efficient, well-documented code. Explain your reasoning.", "Research Assistant": "You are a thorough research assistant. Provide well-structured, detailed responses with citations and examples when possible." } return prompts.get(personality, prompts["Professional & Concise"]) # --- API Call Utilities --- def hf_api_call(model_id, payload, timeout=60): """Make a call to the HuggingFace Inference API with proper error handling.""" url = f"{HF_API}{model_id}" try: res = requests.post(url, headers=HEADERS, json=payload, timeout=timeout) if res.status_code == 200: return {"success": True, "data": res} elif res.status_code == 503: return {"success": False, "error": "Model is loading. Please try again in a few moments."} elif res.status_code == 429: return {"success": False, "error": "Rate limit reached. Please wait a moment."} else: return {"success": False, "error": f"API Error {res.status_code}: {res.text[:200]}"} except requests.exceptions.Timeout: return {"success": False, "error": "Request timed out. The model may be busy."} except Exception as e: return {"success": False, "error": f"Connection error: {str(e)}"} # --- Chat Function --- def call_llm(message, history, mode, personality, custom_url): model_id = custom_url.strip() if mode == "custom" and custom_url and custom_url.strip() else MODELS.get(mode, MODELS["qwen"]) sys_prompt = get_system_prompt(personality) # Build conversation in chat format messages = [{"role": "system", "content": sys_prompt}] for h in history: if isinstance(h, dict): messages.append({"role": h.get("role", "user"), "content": h.get("content", "")}) messages.append({"role": "user", "content": message}) result = hf_api_call( model_id, { "inputs": messages, "parameters": {"max_new_tokens": 1024, "temperature": 0.7, "return_full_text": False} }, timeout=60 ) if result["success"]: data = result["data"].json() if isinstance(data, list) and len(data) > 0: generated = data[0].get("generated_text", "") # If the model returns the full conversation, extract just the assistant's last response if isinstance(generated, str): return generated.strip() return str(generated) elif isinstance(data, dict): return data.get("generated_text", str(data)) return "Received unexpected response format." else: return f"Error: {result['error']}" # --- Image Generation --- def generate_image(prompt, img_mode, api_key_input): if not prompt or not prompt.strip(): return None, "Please enter an image description." model = IMG_MODELS.get(img_mode, IMG_MODELS["flash"]) headers = {"Authorization": f"Bearer {api_key_input}"} if api_key_input else HEADERS gr.Info(f"Generating image with {img_mode} model... This may take a moment.") try: res = requests.post(f"{HF_API}{model}", headers=headers, json={"inputs": prompt}, timeout=120) if res.status_code == 200: image = Image.open(io.BytesIO(res.content)) return image, "Image generated successfully!" elif res.status_code == 503: return None, "Model is loading. Please try again in a few seconds." else: return None, f"Error {res.status_code}: {res.text[:200]}" except requests.exceptions.Timeout: return None, "Generation timed out. The model may be busy." except Exception as e: return None, f"Error: {str(e)}" # --- Text-to-Video --- def generate_video(prompt, video_mode, api_key_input): if not prompt or not prompt.strip(): return None, "Please enter a video description." model = VIDEO_MODELS.get(video_mode, VIDEO_MODELS["fast"]) headers = {"Authorization": f"Bearer {api_key_input}"} if api_key_input else HEADERS gr.Info(f"Generating video with {video_mode} model... This can take 1-3 minutes.") try: res = requests.post(f"{HF_API}{model}", headers=headers, json={"inputs": prompt}, timeout=300) if res.status_code == 200: # Save video to temporary file video_path = "/tmp/generated_video.mp4" with open(video_path, "wb") as f: f.write(res.content) return video_path, "Video generated successfully!" elif res.status_code == 503: return None, "Model is loading. Please try again later." else: return None, f"Error {res.status_code}: {res.text[:200]}" except requests.exceptions.Timeout: return None, "Generation timed out (5 min). Video models are very resource-intensive. Try again later." except Exception as e: return None, f"Error: {str(e)}" # --- Chat Handler --- def chat(message, history, mode, personality, custom_url, gen_img, img_prompt, api_key): if not message or not message.strip(): return history, "" history.append({"role": "user", "content": message}) # Handle image generation alongside chat if gen_img and img_prompt and img_prompt.strip(): img, status = generate_image(img_prompt, "flash", api_key) if img is not None: # Add image message in chat img_b64 = pil_to_base64(img) history.append({"role": "assistant", "content": f'Here is the generated image for "{img_prompt}":'}) history.append({"role": "assistant", "content": {"path": "/tmp/gen_img_chat.png", "url": f"data:image/png;base64,{img_b64}"}}) # Still get text response from LLM response = call_llm(message, history, mode, personality, custom_url) if not response.startswith("Error"): history.append({"role": "assistant", "content": response}) return history, "" # Normal chat response = call_llm(message, history, mode, personality, custom_url) history.append({"role": "assistant", "content": response}) return history, "" def pil_to_base64(img): buf = io.BytesIO() img.save(buf, format="PNG") buf.seek(0) img.save("/tmp/gen_img_chat.png") buf.seek(0) return base64.b64encode(buf.getvalue()).decode("utf-8") # --- Onboarding --- def finish_onboarding(name, personality): display_name = name.strip() if name and name.strip() else "Explorer" greeting = f"## Good to see you, {display_name}.\n\nWhat would you like to create today?" return ( gr.update(visible=False), # onboarding gr.update(visible=True), # main_ui gr.update(value=greeting), # greeting gr.update(value=f"**{display_name}**"), # user_name gr.update(value=personality), # user_pers display_name, # name_state personality # personality_state ) # --- Tab Switching --- def switch_tab(tab_name): updates = { "chat": (gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)), "image": (gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)), "video": (gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)), } return updates.get(tab_name, updates["chat"]) # --- CSS Styling --- css = """ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap'); :root { --bg-primary: #0a0a0a; --bg-secondary: #141414; --bg-tertiary: #1c1c1c; --border: #2a2a2a; --accent: #6366f1; --accent-hover: #818cf8; --text-primary: #e5e5e5; --text-secondary: #a3a3a3; --success: #22c55e; --error: #ef4444; } body { font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif !important; background: var(--bg-primary) !important; color: var(--text-primary) !important; } /* Sidebar */ .sidebar { background: var(--bg-secondary) !important; border-right: 1px solid var(--border) !important; padding: 1.5rem !important; height: 100vh !important; } .sidebar-title { font-size: 1.25rem !important; font-weight: 700 !important; color: var(--text-primary) !important; margin-bottom: 1.5rem !important; } .sidebar-section { margin-top: 1.5rem !important; padding-top: 1rem !important; border-top: 1px solid var(--border) !important; } .sidebar-label { font-size: 0.7rem !important; font-weight: 600 !important; text-transform: uppercase !important; letter-spacing: 0.08em !important; color: var(--text-secondary) !important; margin-bottom: 0.5rem !important; } /* Navigation Buttons */ .nav-btn { width: 100% !important; margin-bottom: 0.5rem !important; justify-content: flex-start !important; padding: 0.625rem 0.875rem !important; border-radius: 0.625rem !important; font-weight: 500 !important; transition: all 0.15s ease !important; } .nav-btn:hover { background: var(--bg-tertiary) !important; } /* Main Content */ .main-content { background: var(--bg-primary) !important; padding: 2rem !important; min-height: 100vh !important; } /* Onboarding Card */ .onboarding-card { max-width: 480px; margin: 8vh auto 0; padding: 2.5rem; background: var(--bg-secondary) !important; border: 1px solid var(--border) !important; border-radius: 1rem !important; } .onboarding-title { font-size: 1.75rem !important; font-weight: 700 !important; color: var(--text-primary) !important; margin-bottom: 0.5rem !important; } .onboarding-subtitle { color: var(--text-secondary) !important; font-size: 0.875rem !important; margin-bottom: 2rem !important; } /* Greeting */ .greeting-container { text-align: center; margin-bottom: 2rem; } .greeting-title { font-size: 2rem !important; font-weight: 700 !important; color: var(--text-primary) !important; margin-bottom: 0.5rem !important; } .greeting-subtitle { color: var(--text-secondary) !important; font-size: 0.9rem !important; } /* Input Box */ .input-box { background: var(--bg-secondary) !important; border: 1px solid var(--border) !important; border-radius: 0.75rem !important; padding: 0.75rem !important; } .input-box:focus-within { border-color: var(--accent) !important; box-shadow: 0 0 0 2px rgba(99, 102, 241, 0.1) !important; } /* Buttons */ .btn-primary { background: var(--accent) !important; color: white !important; font-weight: 600 !important; border-radius: 0.625rem !important; padding: 0.625rem 1.25rem !important; border: none !important; transition: all 0.15s ease !important; } .btn-primary:hover { background: var(--accent-hover) !important; transform: translateY(-1px) !important; } .btn-secondary { background: var(--bg-tertiary) !important; color: var(--text-primary) !important; border: 1px solid var(--border) !important; border-radius: 0.625rem !important; padding: 0.625rem 1.25rem !important; } /* Chat Messages */ .chatbot-container { background: var(--bg-secondary) !important; border: 1px solid var(--border) !important; border-radius: 0.75rem !important; overflow: hidden !important; } .message.user { background: var(--accent) !important; color: white !important; border-radius: 1rem 1rem 0.25rem 1rem !important; } .message.bot { background: var(--bg-tertiary) !important; color: var(--text-primary) !important; border-radius: 1rem 1rem 1rem 0.25rem !important; } /* User Profile */ .user-profile { display: flex; align-items: center; gap: 0.75rem; padding: 0.75rem; background: var(--bg-tertiary) !important; border-radius: 0.75rem !important; margin-top: auto; } .user-avatar { width: 2rem; height: 2rem; background: var(--accent); color: white; border-radius: 0.5rem; display: flex; align-items: center; justify-content: center; font-weight: 700; font-size: 0.875rem; flex-shrink: 0; } /* Radio & Dropdown */ .radio-group label { color: var(--text-secondary) !important; font-size: 0.8125rem !important; } .dropdown-select input { background: var(--bg-tertiary) !important; border-color: var(--border) !important; color: var(--text-primary) !important; border-radius: 0.5rem !important; } /* Generation Cards */ .gen-card { background: var(--bg-secondary) !important; border: 1px solid var(--border) !important; border-radius: 1rem !important; padding: 2rem !important; max-width: 800px; margin: 0 auto; } .gen-title { font-size: 1.25rem !important; font-weight: 600 !important; margin-bottom: 1.5rem !important; } /* Footer */ .footer { text-align: center; padding: 1rem; color: var(--text-secondary); font-size: 0.75rem; margin-top: 2rem; } /* Status Badge */ .status-badge { display: inline-flex; align-items: center; gap: 0.375rem; padding: 0.25rem 0.625rem; border-radius: 9999px; font-size: 0.75rem; font-weight: 500; } .status-online { background: rgba(34, 197, 94, 0.15); color: var(--success); } /* Animations */ @keyframes fadeIn { from { opacity: 0; transform: translateY(10px); } to { opacity: 1; transform: translateY(0); } } .fade-in { animation: fadeIn 0.4s ease-out; } /* API Key Input */ .api-key-input input { font-family: monospace !important; font-size: 0.75rem !important; } /* Scrollbar */ ::-webkit-scrollbar { width: 6px; } ::-webkit-scrollbar-track { background: transparent; } ::-webkit-scrollbar-thumb { background: #333; border-radius: 3px; } ::-webkit-scrollbar-thumb:hover { background: #444; } """ # ============================================================ # BUILD THE UI # ============================================================ with gr.Blocks(css=css, theme=gr.themes.Base()) as demo: # --- States --- name_state = gr.State("Explorer") personality_state = gr.State("Professional & Concise") current_tab = gr.State("chat") # ============================================================ # ONBOARDING SCREEN # ============================================================ with gr.Column(visible=True, elem_classes="onboarding-card fade-in") as onboarding: gr.Markdown("# HuggingGPT", elem_classes="onboarding-title") gr.Markdown( "Your multi-modal AI playground. Chat with powerful language models, generate stunning images, " "and create videos — all in one place.", elem_classes="onboarding-subtitle" ) name_input = gr.Textbox( label="What should I call you?", placeholder="e.g. Alex", elem_classes="dropdown-select" ) personality_input = gr.Dropdown( label="AI Personality", choices=[ "Professional & Concise", "Friendly & Creative", "Sarcastic & Witty", "Expert Coder", "Research Assistant" ], value="Professional & Concise", elem_classes="dropdown-select" ) api_key_onboard = gr.Textbox( label="HF API Token (optional)", placeholder="hf_...", type="password", elem_classes="api-key-input dropdown-select", info="Required for some models. Get one at huggingface.co/settings/tokens" ) start_btn = gr.Button("Get Started", variant="primary", elem_classes="btn-primary") gr.Markdown( "", elem_classes="footer" ) # ============================================================ # MAIN APP INTERFACE # ============================================================ with gr.Row(visible=False, elem_id="main-ui") as main_ui: # --- SIDEBAR --- with gr.Column(scale=1, elem_classes="sidebar"): gr.Markdown("### HuggingGPT", elem_classes="sidebar-title") # Navigation gr.Markdown("") chat_nav_btn = gr.Button("Chat", elem_classes="nav-btn btn-secondary") img_nav_btn = gr.Button("Image Gen", elem_classes="nav-btn btn-secondary") video_nav_btn = gr.Button("Text-to-Video", elem_classes="nav-btn btn-secondary") # Chat Settings with gr.Column(visible=True) as chat_settings: gr.Markdown("") mode_radio = gr.Radio( choices=[("Qwen 2.5 (7B)", "qwen"), ("Kimi K2", "kimi"), ("Qwen Mini", "small"), ("Custom", "custom")], value="qwen", label="", elem_classes="radio-group" ) custom_url = gr.Textbox( placeholder="organization/model-name", visible=False, label="Custom Model ID", elem_classes="dropdown-select" ) gr.Markdown("") pers_display = gr.Dropdown( choices=["Professional & Concise", "Friendly & Creative", "Sarcastic & Witty", "Expert Coder", "Research Assistant"], value="Professional & Concise", label="", interactive=True, elem_classes="dropdown-select" ) # Image Settings with gr.Column(visible=False) as image_settings: gr.Markdown("") img_mode = gr.Radio( choices=[("FLUX Schnell (Fast)", "flash"), ("FLUX Dev (Quality)", "full")], value="flash", label="", elem_classes="radio-group" ) # Video Settings with gr.Column(visible=False) as video_settings: gr.Markdown("") video_mode = gr.Radio( choices=[("Hunyuan (Fast)", "fast"), ("Wan 2.1 (Quality)", "quality")], value="fast", label="", elem_classes="radio-group" ) # API Key gr.Markdown("") api_key_main = gr.Textbox( placeholder="hf_...", type="password", label="", elem_classes="api-key-input dropdown-select", info="hf_ token for private models" ) # User Profile with gr.Column(elem_classes="user-profile"): gr.HTML("
H
") with gr.Column(scale=1, min_width=0): user_name = gr.Markdown("**Explorer**") user_pers = gr.Markdown("Professional & Concise", elem_classes="footer") # --- CONTENT AREA --- with gr.Column(scale=4, elem_classes="main-content"): # ---- CHAT TAB ---- with gr.Column(visible=True) as chat_tab: gr.Markdown( "## Chat", elem_classes="greeting-title" ) chatbot = gr.Chatbot( height=520, show_label=False, type="messages", elem_classes="chatbot-container" ) with gr.Row(elem_classes="input-box"): msg_input = gr.Textbox( placeholder="Ask anything, generate code, brainstorm ideas...", show_label=False, scale=8, elem_classes="dropdown-select" ) send_btn = gr.Button("Send", variant="primary", scale=1, elem_classes="btn-primary") with gr.Row(): with gr.Column(scale=1): gen_img_cb = gr.Checkbox(label="Generate Image alongside") with gr.Column(scale=3): img_prompt_input = gr.Textbox( placeholder="Describe the image to generate...", show_label=False, visible=False, elem_classes="dropdown-select" ) # ---- IMAGE GEN TAB ---- with gr.Column(visible=False) as image_tab: with gr.Column(elem_classes="gen-card"): gr.Markdown("## Image Generation", elem_classes="gen-title") gr.Markdown( "Describe the image you want to create in detail. The AI will generate it using FLUX models." ) img_prompt_main = gr.Textbox( label="Prompt", placeholder="A serene Japanese garden with cherry blossoms, soft morning light, watercolor style...", lines=4, elem_classes="dropdown-select" ) img_model_display = gr.Radio( choices=[("FLUX Schnell (Fast ~2s)", "flash"), ("FLUX Dev (Quality ~10s)", "full")], value="flash", label="Model", elem_classes="radio-group" ) gen_img_btn = gr.Button("Generate Image", variant="primary", elem_classes="btn-primary") img_output = gr.Image( label="Generated Image", show_label=False, elem_classes="chatbot-container" ) img_status = gr.Markdown("") # ---- VIDEO GEN TAB ---- with gr.Column(visible=False) as video_tab: with gr.Column(elem_classes="gen-card"): gr.Markdown("## Text-to-Video", elem_classes="gen-title") gr.Markdown( "Describe the video scene you want to generate. Note: Video generation can take 1-5 minutes." ) video_prompt = gr.Textbox( label="Prompt", placeholder="A futuristic cityscape at sunset, flying cars, neon lights reflecting on wet streets...", lines=4, elem_classes="dropdown-select" ) video_model_display = gr.Radio( choices=[("HunyuanVideo (Fast)", "fast"), ("Wan 2.1 (Quality)", "quality")], value="fast", label="Model", elem_classes="radio-group" ) gen_video_btn = gr.Button("Generate Video", variant="primary", elem_classes="btn-primary") video_output = gr.Video( label="Generated Video", show_label=False, elem_classes="chatbot-container" ) video_status = gr.Markdown("") # Footer gr.Markdown( "", elem_classes="footer" ) # ============================================================ # EVENT HANDLERS # ============================================================ # Show/hide custom URL field mode_radio.change( lambda x: gr.update(visible=x == "custom"), inputs=mode_radio, outputs=custom_url ) # Show/hide image prompt field gen_img_cb.change( lambda x: gr.update(visible=x), inputs=gen_img_cb, outputs=img_prompt_input ) # Tab switching chat_nav_btn.click( lambda: ( gr.update(visible=True), # chat_tab gr.update(visible=False), # image_tab gr.update(visible=False), # video_tab gr.update(visible=True), # chat_settings gr.update(visible=False), # image_settings gr.update(visible=False), # video_settings "chat" ), outputs=[chat_tab, image_tab, video_tab, chat_settings, image_settings, video_settings, current_tab] ) img_nav_btn.click( lambda: ( gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), "image" ), outputs=[chat_tab, image_tab, video_tab, chat_settings, image_settings, video_settings, current_tab] ) video_nav_btn.click( lambda: ( gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), "video" ), outputs=[chat_tab, image_tab, video_tab, chat_settings, image_settings, video_settings, current_tab] ) # Onboarding completion start_btn.click( finish_onboarding, inputs=[name_input, personality_input], outputs=[onboarding, main_ui, chatbot, user_name, user_pers, name_state, personality_state] ) # Chat submission chat_inputs = [msg_input, chatbot, mode_radio, pers_display, custom_url, gen_img_cb, img_prompt_input, api_key_main] send_btn.click(chat, chat_inputs, [chatbot, msg_input]) msg_input.submit(chat, chat_inputs, [chatbot, msg_input]) # Image generation from Image tab gen_img_btn.click( generate_image, inputs=[img_prompt_main, img_model_display, api_key_main], outputs=[img_output, img_status] ) # Video generation gen_video_btn.click( generate_video, inputs=[video_prompt, video_model_display, api_key_main], outputs=[video_output, video_status] ) # Sync API key between onboarding and main api_key_onboard.change( lambda x: x, inputs=api_key_onboard, outputs=api_key_main ) # ============================================================ # LAUNCH # ============================================================ if __name__ == "__main__": demo.launch( server_name="0.0.0.0", server_port=7860, share=False, show_error=True )