Spaces:

hari7261
/

Nexus-AI-Studio

Runtime error

File size: 17,645 Bytes

fd4d7fa

import gradio as gr
import requests
import os
import base64
import io
import json
from PIL import Image
import numpy as np

HF_TOKEN = os.environ.get("HF_TOKEN", "")

HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"}

# ── Model IDs ──────────────────────────────────────────────────────────────────
TEXT_MODEL        = "Qwen/Qwen2.5-0.5B-Instruct"          # tiny chat LLM
IMAGE_MODEL       = "black-forest-labs/FLUX.1-schnell"     # fast image gen
AUDIO_MODEL       = "facebook/musicgen-small"              # audio/music gen
VIDEO_MODEL       = "ali-vilab/text-to-video-ms-1.7b"     # text-to-video
MULTIMODAL_MODEL  = "Salesforce/blip2-opt-2.7b"           # image+text (VQA / caption)

# ── HF Inference API helpers ───────────────────────────────────────────────────

def query_text(messages: list, system: str = "") -> str:
    """Chat via HF serverless inference (text generation)."""
    url = f"https://api-inference.huggingface.co/models/{TEXT_MODEL}/v1/chat/completions"
    payload = {
        "model": TEXT_MODEL,
        "messages": messages,
        "max_tokens": 1024,
        "temperature": 0.7,
    }
    r = requests.post(url, headers=HEADERS, json=payload, timeout=60)
    if r.status_code == 200:
        return r.json()["choices"][0]["message"]["content"]
    # fallback plain text-generation endpoint
    url2 = f"https://api-inference.huggingface.co/models/{TEXT_MODEL}"
    prompt = "\n".join(m["content"] for m in messages)
    r2 = requests.post(url2, headers=HEADERS, json={"inputs": prompt, "parameters": {"max_new_tokens": 512}}, timeout=60)
    if r2.status_code == 200:
        result = r2.json()
        if isinstance(result, list):
            return result[0].get("generated_text", str(result))
        return str(result)
    return f"⚠️ Error {r.status_code}: {r.text[:300]}"


def query_image(prompt: str) -> Image.Image | str:
    """Generate image via HF inference."""
    url = f"https://api-inference.huggingface.co/models/{IMAGE_MODEL}"
    r = requests.post(url, headers=HEADERS, json={"inputs": prompt}, timeout=120)
    if r.status_code == 200:
        return Image.open(io.BytesIO(r.content))
    return f"⚠️ Error {r.status_code}: {r.text[:300]}"


def query_audio(prompt: str) -> str | None:
    """Generate audio via HF inference, returns a temp file path."""
    url = f"https://api-inference.huggingface.co/models/{AUDIO_MODEL}"
    r = requests.post(url, headers=HEADERS, json={"inputs": prompt}, timeout=120)
    if r.status_code == 200:
        path = "/tmp/generated_audio.wav"
        with open(path, "wb") as f:
            f.write(r.content)
        return path
    return None


def query_video(prompt: str) -> str | None:
    """Generate short video via HF inference, returns a temp file path."""
    url = f"https://api-inference.huggingface.co/models/{VIDEO_MODEL}"
    r = requests.post(url, headers=HEADERS, json={"inputs": prompt}, timeout=180)
    if r.status_code == 200:
        path = "/tmp/generated_video.mp4"
        with open(path, "wb") as f:
            f.write(r.content)
        return path
    return None


def query_multimodal(image: Image.Image | None, text: str) -> tuple[str, Image.Image | None]:
    """VQA / image captioning with BLIP-2. Also returns the original image."""
    if image is None:
        # No image → just caption with a placeholder or echo
        return "Please upload an image for multimodal analysis.", None
    # Encode image to base64
    buf = io.BytesIO()
    image.save(buf, format="PNG")
    b64 = base64.b64encode(buf.getvalue()).decode()
    url = f"https://api-inference.huggingface.co/models/{MULTIMODAL_MODEL}"
    payload = {"inputs": {"image": b64, "question": text or "Describe this image in detail."}}
    r = requests.post(url, headers=HEADERS, json=payload, timeout=90)
    if r.status_code == 200:
        result = r.json()
        if isinstance(result, list):
            answer = result[0].get("answer", str(result[0]))
        elif isinstance(result, dict):
            answer = result.get("answer", str(result))
        else:
            answer = str(result)
        return answer, image
    return f"⚠️ Error {r.status_code}: {r.text[:300]}", image


# ── Chat state helper ──────────────────────────────────────────────────────────

def chat_respond(user_msg: str, history: list) -> tuple[str, list]:
    if not user_msg.strip():
        return "", history
    messages = [{"role": "system", "content": "You are NEXUS, an advanced AI assistant. Be helpful, thorough, and thoughtful."}]
    for human, bot in history:
        messages.append({"role": "user", "content": human})
        messages.append({"role": "assistant", "content": bot})
    messages.append({"role": "user", "content": user_msg})
    reply = query_text(messages)
    history.append((user_msg, reply))
    return "", history


def gen_image(prompt: str, progress=gr.Progress()):
    progress(0.2, desc="Connecting to FLUX…")
    result = query_image(prompt)
    progress(1.0, desc="Done")
    if isinstance(result, str):   # error string
        return None, result
    return result, "✅ Image generated successfully!"


def gen_audio(prompt: str, progress=gr.Progress()):
    progress(0.2, desc="Composing music…")
    path = query_audio(prompt)
    progress(1.0, desc="Done")
    if path:
        return path, "✅ Audio generated!"
    return None, "⚠️ Audio generation failed. The model may be loading – try again in a moment."


def gen_video(prompt: str, progress=gr.Progress()):
    progress(0.2, desc="Rendering frames…")
    path = query_video(prompt)
    progress(1.0, desc="Done")
    if path:
        return path, "✅ Video generated!"
    return None, "⚠️ Video generation failed. The model may be loading – try again in a moment."


def gen_multimodal(image, question: str, progress=gr.Progress()):
    progress(0.3, desc="Analyzing image…")
    answer, img_out = query_multimodal(image, question)
    progress(1.0, desc="Done")
    return answer, img_out


# ── CSS ────────────────────────────────────────────────────────────────────────
CSS = """
@import url('https://fonts.googleapis.com/css2?family=Space+Mono:wght@400;700&family=DM+Sans:ital,wght@0,300;0,500;0,700;1,300&display=swap');

:root {
    --bg:        #0a0a0f;
    --panel:     #111118;
    --border:    #1e1e2e;
    --accent:    #7c3aed;
    --accent2:   #06b6d4;
    --accent3:   #f59e0b;
    --text:      #e2e8f0;
    --muted:     #64748b;
    --success:   #10b981;
    --danger:    #ef4444;
}

body, .gradio-container { background: var(--bg) !important; font-family: 'DM Sans', sans-serif; color: var(--text); }

/* Header */
.nexus-header {
    text-align: center;
    padding: 2.5rem 1rem 1rem;
    background: linear-gradient(135deg, #0a0a0f 0%, #130d22 50%, #0a0a0f 100%);
    border-bottom: 1px solid var(--border);
    margin-bottom: 1.5rem;
}
.nexus-title {
    font-family: 'Space Mono', monospace;
    font-size: clamp(2rem, 6vw, 3.5rem);
    font-weight: 700;
    background: linear-gradient(90deg, var(--accent) 0%, var(--accent2) 50%, var(--accent3) 100%);
    -webkit-background-clip: text;
    -webkit-text-fill-color: transparent;
    letter-spacing: -0.02em;
    margin: 0;
}
.nexus-sub {
    color: var(--muted);
    font-size: 0.95rem;
    margin-top: 0.5rem;
    letter-spacing: 0.08em;
    text-transform: uppercase;
}
.badge-row { display: flex; justify-content: center; gap: 0.5rem; flex-wrap: wrap; margin-top: 1rem; }
.badge {
    font-family: 'Space Mono', monospace;
    font-size: 0.65rem;
    padding: 0.25rem 0.75rem;
    border-radius: 999px;
    border: 1px solid;
    letter-spacing: 0.05em;
}
.badge-chat   { border-color: var(--accent);  color: var(--accent);  }
.badge-img    { border-color: var(--accent2); color: var(--accent2); }
.badge-audio  { border-color: var(--accent3); color: var(--accent3); }
.badge-video  { border-color: var(--success); color: var(--success); }
.badge-mm     { border-color: var(--danger);  color: var(--danger);  }

/* Tabs */
.tab-nav { border-bottom: 1px solid var(--border) !important; }
.tab-nav button {
    font-family: 'Space Mono', monospace !important;
    font-size: 0.8rem !important;
    letter-spacing: 0.05em !important;
    color: var(--muted) !important;
    padding: 0.75rem 1.25rem !important;
    border-bottom: 2px solid transparent !important;
    transition: all 0.2s !important;
}
.tab-nav button.selected {
    color: var(--accent2) !important;
    border-bottom-color: var(--accent2) !important;
}

/* Chatbot */
.chatbot .message.user { background: rgba(124,58,237,0.15) !important; border-left: 3px solid var(--accent) !important; }
.chatbot .message.bot  { background: rgba(6,182,212,0.08)  !important; border-left: 3px solid var(--accent2) !important; }

/* Inputs */
textarea, input[type=text] {
    background: var(--panel) !important;
    border: 1px solid var(--border) !important;
    color: var(--text) !important;
    border-radius: 8px !important;
    font-family: 'DM Sans', sans-serif !important;
}
textarea:focus, input:focus {
    border-color: var(--accent2) !important;
    box-shadow: 0 0 0 2px rgba(6,182,212,0.15) !important;
}

/* Buttons */
.btn-primary {
    background: linear-gradient(135deg, var(--accent) 0%, var(--accent2) 100%) !important;
    color: #fff !important;
    font-family: 'Space Mono', monospace !important;
    font-size: 0.8rem !important;
    letter-spacing: 0.05em !important;
    border: none !important;
    border-radius: 8px !important;
    padding: 0.65rem 1.5rem !important;
    cursor: pointer !important;
    transition: opacity 0.2s !important;
}
.btn-primary:hover { opacity: 0.85 !important; }

/* Status box */
.status-box {
    background: var(--panel);
    border: 1px solid var(--border);
    border-radius: 8px;
    padding: 0.75rem 1rem;
    font-family: 'Space Mono', monospace;
    font-size: 0.75rem;
    color: var(--muted);
    min-height: 2.5rem;
}

/* Section labels */
.section-label {
    font-family: 'Space Mono', monospace;
    font-size: 0.7rem;
    letter-spacing: 0.1em;
    color: var(--muted);
    text-transform: uppercase;
    margin-bottom: 0.4rem;
}
"""

# ── Build UI ───────────────────────────────────────────────────────────────────

with gr.Blocks(css=CSS, title="NEXUS AI Studio", theme=gr.themes.Base()) as demo:

    # Header
    gr.HTML("""
    <div class="nexus-header">
        <h1 class="nexus-title">⬡ NEXUS AI STUDIO</h1>
        <p class="nexus-sub">Multi-Modal Intelligence Platform · Powered by Hugging Face</p>
        <div class="badge-row">
            <span class="badge badge-chat">💬 CHAT</span>
            <span class="badge badge-img">🖼 IMAGE GEN</span>
            <span class="badge badge-audio">🎵 AUDIO GEN</span>
            <span class="badge badge-video">🎬 VIDEO GEN</span>
            <span class="badge badge-mm">🔮 MULTIMODAL</span>
        </div>
    </div>
    """)

    with gr.Tabs(elem_classes="tab-nav"):

        # ── TAB 1: Chat ────────────────────────────────────────────────────────
        with gr.Tab("💬  Chat"):
            gr.HTML('<p class="section-label">Conversational AI · Qwen 2.5</p>')
            chatbot = gr.Chatbot(
                label="",
                height=460,
                bubble_full_width=False,
                elem_classes="chatbot",
                show_label=False,
                avatar_images=(None, "https://huggingface.co/front/assets/huggingface_logo-noborder.svg"),
            )
            with gr.Row():
                chat_in = gr.Textbox(
                    placeholder="Ask me anything — code, math, science, creative writing…",
                    show_label=False,
                    scale=8,
                    lines=1,
                )
                send_btn = gr.Button("SEND →", elem_classes="btn-primary", scale=1)
            clear_btn = gr.Button("Clear conversation", variant="secondary", size="sm")

            send_btn.click(chat_respond, [chat_in, chatbot], [chat_in, chatbot])
            chat_in.submit(chat_respond, [chat_in, chatbot], [chat_in, chatbot])
            clear_btn.click(lambda: ([], ""), None, [chatbot, chat_in])

        # ── TAB 2: Image Generation ────────────────────────────────────────────
        with gr.Tab("🖼  Image Gen"):
            gr.HTML('<p class="section-label">Text → Image · FLUX.1-schnell</p>')
            with gr.Row():
                with gr.Column(scale=1):
                    img_prompt = gr.Textbox(
                        label="Prompt",
                        placeholder="A neon-lit cyberpunk city at midnight, rain reflections, ultra-detailed…",
                        lines=4,
                    )
                    img_btn = gr.Button("✦ GENERATE IMAGE", elem_classes="btn-primary")
                    img_status = gr.Textbox(label="Status", elem_classes="status-box", show_label=False)
                with gr.Column(scale=1):
                    img_out = gr.Image(label="Generated Image", show_label=False)

            img_btn.click(gen_image, [img_prompt], [img_out, img_status])

        # ── TAB 3: Audio Generation ────────────────────────────────────────────
        with gr.Tab("🎵  Audio Gen"):
            gr.HTML('<p class="section-label">Text → Music · MusicGen Small</p>')
            with gr.Row():
                with gr.Column(scale=1):
                    audio_prompt = gr.Textbox(
                        label="Describe the music",
                        placeholder="Lo-fi hip hop beat, warm piano chords, gentle rain ambiance, 80 BPM…",
                        lines=4,
                    )
                    audio_btn = gr.Button("♪ GENERATE AUDIO", elem_classes="btn-primary")
                    audio_status = gr.Textbox(label="Status", elem_classes="status-box", show_label=False)
                with gr.Column(scale=1):
                    audio_out = gr.Audio(label="Generated Audio", show_label=True)

            audio_btn.click(gen_audio, [audio_prompt], [audio_out, audio_status])

        # ── TAB 4: Video Generation ────────────────────────────────────────────
        with gr.Tab("🎬  Video Gen"):
            gr.HTML('<p class="section-label">Text → Video · ModelScope 1.7B</p>')
            with gr.Row():
                with gr.Column(scale=1):
                    video_prompt = gr.Textbox(
                        label="Describe the video",
                        placeholder="A lone astronaut walking on Mars at sunset, dust swirling around boots…",
                        lines=4,
                    )
                    video_btn = gr.Button("▶ GENERATE VIDEO", elem_classes="btn-primary")
                    video_status = gr.Textbox(label="Status", elem_classes="status-box", show_label=False)
                with gr.Column(scale=1):
                    video_out = gr.Video(label="Generated Video", show_label=True)

            video_btn.click(gen_video, [video_prompt], [video_out, video_status])

        # ── TAB 5: Multimodal ─────────────────────────────────────────────────
        with gr.Tab("🔮  Multimodal"):
            gr.HTML('<p class="section-label">Image + Text → Answer · BLIP-2</p>')
            with gr.Row():
                with gr.Column(scale=1):
                    mm_image = gr.Image(
                        label="Upload an Image",
                        type="pil",
                        show_label=True,
                    )
                    mm_question = gr.Textbox(
                        label="Your question about the image",
                        placeholder="What is happening in this image? What objects do you see?",
                        lines=3,
                    )
                    mm_btn = gr.Button("🔮 ANALYZE", elem_classes="btn-primary")
                with gr.Column(scale=1):
                    mm_answer = gr.Textbox(label="AI Answer", lines=6, show_label=True)
                    mm_img_out = gr.Image(label="Processed Image", show_label=True)

            mm_btn.click(gen_multimodal, [mm_image, mm_question], [mm_answer, mm_img_out])

    # Footer
    gr.HTML("""
    <div style="text-align:center; padding: 1.5rem; border-top: 1px solid #1e1e2e; margin-top: 1.5rem; color: #475569; font-size: 0.75rem; font-family: 'Space Mono', monospace; letter-spacing: 0.05em;">
        NEXUS AI STUDIO · Built with ❤ on Hugging Face Spaces · Models: Qwen2.5 · FLUX.1-schnell · MusicGen · ModelScope · BLIP-2
    </div>
    """)

demo.launch()