import gradio as gr import requests import os import base64 import io import json from PIL import Image import numpy as np HF_TOKEN = os.environ.get("HF_TOKEN", "") HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"} # ── Model IDs ────────────────────────────────────────────────────────────────── TEXT_MODEL = "Qwen/Qwen2.5-0.5B-Instruct" # tiny chat LLM IMAGE_MODEL = "black-forest-labs/FLUX.1-schnell" # fast image gen AUDIO_MODEL = "facebook/musicgen-small" # audio/music gen VIDEO_MODEL = "ali-vilab/text-to-video-ms-1.7b" # text-to-video MULTIMODAL_MODEL = "Salesforce/blip2-opt-2.7b" # image+text (VQA / caption) # ── HF Inference API helpers ─────────────────────────────────────────────────── def query_text(messages: list, system: str = "") -> str: """Chat via HF serverless inference (text generation).""" url = f"https://api-inference.huggingface.co/models/{TEXT_MODEL}/v1/chat/completions" payload = { "model": TEXT_MODEL, "messages": messages, "max_tokens": 1024, "temperature": 0.7, } r = requests.post(url, headers=HEADERS, json=payload, timeout=60) if r.status_code == 200: return r.json()["choices"][0]["message"]["content"] # fallback plain text-generation endpoint url2 = f"https://api-inference.huggingface.co/models/{TEXT_MODEL}" prompt = "\n".join(m["content"] for m in messages) r2 = requests.post(url2, headers=HEADERS, json={"inputs": prompt, "parameters": {"max_new_tokens": 512}}, timeout=60) if r2.status_code == 200: result = r2.json() if isinstance(result, list): return result[0].get("generated_text", str(result)) return str(result) return f"⚠️ Error {r.status_code}: {r.text[:300]}" def query_image(prompt: str) -> Image.Image | str: """Generate image via HF inference.""" url = f"https://api-inference.huggingface.co/models/{IMAGE_MODEL}" r = requests.post(url, headers=HEADERS, json={"inputs": prompt}, timeout=120) if r.status_code == 200: return Image.open(io.BytesIO(r.content)) return f"⚠️ Error {r.status_code}: {r.text[:300]}" def query_audio(prompt: str) -> str | None: """Generate audio via HF inference, returns a temp file path.""" url = f"https://api-inference.huggingface.co/models/{AUDIO_MODEL}" r = requests.post(url, headers=HEADERS, json={"inputs": prompt}, timeout=120) if r.status_code == 200: path = "/tmp/generated_audio.wav" with open(path, "wb") as f: f.write(r.content) return path return None def query_video(prompt: str) -> str | None: """Generate short video via HF inference, returns a temp file path.""" url = f"https://api-inference.huggingface.co/models/{VIDEO_MODEL}" r = requests.post(url, headers=HEADERS, json={"inputs": prompt}, timeout=180) if r.status_code == 200: path = "/tmp/generated_video.mp4" with open(path, "wb") as f: f.write(r.content) return path return None def query_multimodal(image: Image.Image | None, text: str) -> tuple[str, Image.Image | None]: """VQA / image captioning with BLIP-2. Also returns the original image.""" if image is None: # No image → just caption with a placeholder or echo return "Please upload an image for multimodal analysis.", None # Encode image to base64 buf = io.BytesIO() image.save(buf, format="PNG") b64 = base64.b64encode(buf.getvalue()).decode() url = f"https://api-inference.huggingface.co/models/{MULTIMODAL_MODEL}" payload = {"inputs": {"image": b64, "question": text or "Describe this image in detail."}} r = requests.post(url, headers=HEADERS, json=payload, timeout=90) if r.status_code == 200: result = r.json() if isinstance(result, list): answer = result[0].get("answer", str(result[0])) elif isinstance(result, dict): answer = result.get("answer", str(result)) else: answer = str(result) return answer, image return f"⚠️ Error {r.status_code}: {r.text[:300]}", image # ── Chat state helper ────────────────────────────────────────────────────────── def chat_respond(user_msg: str, history: list) -> tuple[str, list]: if not user_msg.strip(): return "", history messages = [{"role": "system", "content": "You are NEXUS, an advanced AI assistant. Be helpful, thorough, and thoughtful."}] for human, bot in history: messages.append({"role": "user", "content": human}) messages.append({"role": "assistant", "content": bot}) messages.append({"role": "user", "content": user_msg}) reply = query_text(messages) history.append((user_msg, reply)) return "", history def gen_image(prompt: str, progress=gr.Progress()): progress(0.2, desc="Connecting to FLUX…") result = query_image(prompt) progress(1.0, desc="Done") if isinstance(result, str): # error string return None, result return result, "✅ Image generated successfully!" def gen_audio(prompt: str, progress=gr.Progress()): progress(0.2, desc="Composing music…") path = query_audio(prompt) progress(1.0, desc="Done") if path: return path, "✅ Audio generated!" return None, "⚠️ Audio generation failed. The model may be loading – try again in a moment." def gen_video(prompt: str, progress=gr.Progress()): progress(0.2, desc="Rendering frames…") path = query_video(prompt) progress(1.0, desc="Done") if path: return path, "✅ Video generated!" return None, "⚠️ Video generation failed. The model may be loading – try again in a moment." def gen_multimodal(image, question: str, progress=gr.Progress()): progress(0.3, desc="Analyzing image…") answer, img_out = query_multimodal(image, question) progress(1.0, desc="Done") return answer, img_out # ── CSS ──────────────────────────────────────────────────────────────────────── CSS = """ @import url('https://fonts.googleapis.com/css2?family=Space+Mono:wght@400;700&family=DM+Sans:ital,wght@0,300;0,500;0,700;1,300&display=swap'); :root { --bg: #0a0a0f; --panel: #111118; --border: #1e1e2e; --accent: #7c3aed; --accent2: #06b6d4; --accent3: #f59e0b; --text: #e2e8f0; --muted: #64748b; --success: #10b981; --danger: #ef4444; } body, .gradio-container { background: var(--bg) !important; font-family: 'DM Sans', sans-serif; color: var(--text); } /* Header */ .nexus-header { text-align: center; padding: 2.5rem 1rem 1rem; background: linear-gradient(135deg, #0a0a0f 0%, #130d22 50%, #0a0a0f 100%); border-bottom: 1px solid var(--border); margin-bottom: 1.5rem; } .nexus-title { font-family: 'Space Mono', monospace; font-size: clamp(2rem, 6vw, 3.5rem); font-weight: 700; background: linear-gradient(90deg, var(--accent) 0%, var(--accent2) 50%, var(--accent3) 100%); -webkit-background-clip: text; -webkit-text-fill-color: transparent; letter-spacing: -0.02em; margin: 0; } .nexus-sub { color: var(--muted); font-size: 0.95rem; margin-top: 0.5rem; letter-spacing: 0.08em; text-transform: uppercase; } .badge-row { display: flex; justify-content: center; gap: 0.5rem; flex-wrap: wrap; margin-top: 1rem; } .badge { font-family: 'Space Mono', monospace; font-size: 0.65rem; padding: 0.25rem 0.75rem; border-radius: 999px; border: 1px solid; letter-spacing: 0.05em; } .badge-chat { border-color: var(--accent); color: var(--accent); } .badge-img { border-color: var(--accent2); color: var(--accent2); } .badge-audio { border-color: var(--accent3); color: var(--accent3); } .badge-video { border-color: var(--success); color: var(--success); } .badge-mm { border-color: var(--danger); color: var(--danger); } /* Tabs */ .tab-nav { border-bottom: 1px solid var(--border) !important; } .tab-nav button { font-family: 'Space Mono', monospace !important; font-size: 0.8rem !important; letter-spacing: 0.05em !important; color: var(--muted) !important; padding: 0.75rem 1.25rem !important; border-bottom: 2px solid transparent !important; transition: all 0.2s !important; } .tab-nav button.selected { color: var(--accent2) !important; border-bottom-color: var(--accent2) !important; } /* Chatbot */ .chatbot .message.user { background: rgba(124,58,237,0.15) !important; border-left: 3px solid var(--accent) !important; } .chatbot .message.bot { background: rgba(6,182,212,0.08) !important; border-left: 3px solid var(--accent2) !important; } /* Inputs */ textarea, input[type=text] { background: var(--panel) !important; border: 1px solid var(--border) !important; color: var(--text) !important; border-radius: 8px !important; font-family: 'DM Sans', sans-serif !important; } textarea:focus, input:focus { border-color: var(--accent2) !important; box-shadow: 0 0 0 2px rgba(6,182,212,0.15) !important; } /* Buttons */ .btn-primary { background: linear-gradient(135deg, var(--accent) 0%, var(--accent2) 100%) !important; color: #fff !important; font-family: 'Space Mono', monospace !important; font-size: 0.8rem !important; letter-spacing: 0.05em !important; border: none !important; border-radius: 8px !important; padding: 0.65rem 1.5rem !important; cursor: pointer !important; transition: opacity 0.2s !important; } .btn-primary:hover { opacity: 0.85 !important; } /* Status box */ .status-box { background: var(--panel); border: 1px solid var(--border); border-radius: 8px; padding: 0.75rem 1rem; font-family: 'Space Mono', monospace; font-size: 0.75rem; color: var(--muted); min-height: 2.5rem; } /* Section labels */ .section-label { font-family: 'Space Mono', monospace; font-size: 0.7rem; letter-spacing: 0.1em; color: var(--muted); text-transform: uppercase; margin-bottom: 0.4rem; } """ # ── Build UI ─────────────────────────────────────────────────────────────────── with gr.Blocks(css=CSS, title="NEXUS AI Studio", theme=gr.themes.Base()) as demo: # Header gr.HTML("""
Multi-Modal Intelligence Platform · Powered by Hugging Face
Conversational AI · Qwen 2.5
') chatbot = gr.Chatbot( label="", height=460, bubble_full_width=False, elem_classes="chatbot", show_label=False, avatar_images=(None, "https://huggingface.co/front/assets/huggingface_logo-noborder.svg"), ) with gr.Row(): chat_in = gr.Textbox( placeholder="Ask me anything — code, math, science, creative writing…", show_label=False, scale=8, lines=1, ) send_btn = gr.Button("SEND →", elem_classes="btn-primary", scale=1) clear_btn = gr.Button("Clear conversation", variant="secondary", size="sm") send_btn.click(chat_respond, [chat_in, chatbot], [chat_in, chatbot]) chat_in.submit(chat_respond, [chat_in, chatbot], [chat_in, chatbot]) clear_btn.click(lambda: ([], ""), None, [chatbot, chat_in]) # ── TAB 2: Image Generation ──────────────────────────────────────────── with gr.Tab("🖼 Image Gen"): gr.HTML('Text → Image · FLUX.1-schnell
') with gr.Row(): with gr.Column(scale=1): img_prompt = gr.Textbox( label="Prompt", placeholder="A neon-lit cyberpunk city at midnight, rain reflections, ultra-detailed…", lines=4, ) img_btn = gr.Button("✦ GENERATE IMAGE", elem_classes="btn-primary") img_status = gr.Textbox(label="Status", elem_classes="status-box", show_label=False) with gr.Column(scale=1): img_out = gr.Image(label="Generated Image", show_label=False) img_btn.click(gen_image, [img_prompt], [img_out, img_status]) # ── TAB 3: Audio Generation ──────────────────────────────────────────── with gr.Tab("🎵 Audio Gen"): gr.HTML('Text → Music · MusicGen Small
') with gr.Row(): with gr.Column(scale=1): audio_prompt = gr.Textbox( label="Describe the music", placeholder="Lo-fi hip hop beat, warm piano chords, gentle rain ambiance, 80 BPM…", lines=4, ) audio_btn = gr.Button("♪ GENERATE AUDIO", elem_classes="btn-primary") audio_status = gr.Textbox(label="Status", elem_classes="status-box", show_label=False) with gr.Column(scale=1): audio_out = gr.Audio(label="Generated Audio", show_label=True) audio_btn.click(gen_audio, [audio_prompt], [audio_out, audio_status]) # ── TAB 4: Video Generation ──────────────────────────────────────────── with gr.Tab("🎬 Video Gen"): gr.HTML('Text → Video · ModelScope 1.7B
') with gr.Row(): with gr.Column(scale=1): video_prompt = gr.Textbox( label="Describe the video", placeholder="A lone astronaut walking on Mars at sunset, dust swirling around boots…", lines=4, ) video_btn = gr.Button("▶ GENERATE VIDEO", elem_classes="btn-primary") video_status = gr.Textbox(label="Status", elem_classes="status-box", show_label=False) with gr.Column(scale=1): video_out = gr.Video(label="Generated Video", show_label=True) video_btn.click(gen_video, [video_prompt], [video_out, video_status]) # ── TAB 5: Multimodal ───────────────────────────────────────────────── with gr.Tab("🔮 Multimodal"): gr.HTML('Image + Text → Answer · BLIP-2
') with gr.Row(): with gr.Column(scale=1): mm_image = gr.Image( label="Upload an Image", type="pil", show_label=True, ) mm_question = gr.Textbox( label="Your question about the image", placeholder="What is happening in this image? What objects do you see?", lines=3, ) mm_btn = gr.Button("🔮 ANALYZE", elem_classes="btn-primary") with gr.Column(scale=1): mm_answer = gr.Textbox(label="AI Answer", lines=6, show_label=True) mm_img_out = gr.Image(label="Processed Image", show_label=True) mm_btn.click(gen_multimodal, [mm_image, mm_question], [mm_answer, mm_img_out]) # Footer gr.HTML("""