Nexus-AI-Studio / app.py
hari7261's picture
Create app.py
fd4d7fa verified
import gradio as gr
import requests
import os
import base64
import io
import json
from PIL import Image
import numpy as np
HF_TOKEN = os.environ.get("HF_TOKEN", "")
HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"}
# โ”€โ”€ Model IDs โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
TEXT_MODEL = "Qwen/Qwen2.5-0.5B-Instruct" # tiny chat LLM
IMAGE_MODEL = "black-forest-labs/FLUX.1-schnell" # fast image gen
AUDIO_MODEL = "facebook/musicgen-small" # audio/music gen
VIDEO_MODEL = "ali-vilab/text-to-video-ms-1.7b" # text-to-video
MULTIMODAL_MODEL = "Salesforce/blip2-opt-2.7b" # image+text (VQA / caption)
# โ”€โ”€ HF Inference API helpers โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
def query_text(messages: list, system: str = "") -> str:
"""Chat via HF serverless inference (text generation)."""
url = f"https://api-inference.huggingface.co/models/{TEXT_MODEL}/v1/chat/completions"
payload = {
"model": TEXT_MODEL,
"messages": messages,
"max_tokens": 1024,
"temperature": 0.7,
}
r = requests.post(url, headers=HEADERS, json=payload, timeout=60)
if r.status_code == 200:
return r.json()["choices"][0]["message"]["content"]
# fallback plain text-generation endpoint
url2 = f"https://api-inference.huggingface.co/models/{TEXT_MODEL}"
prompt = "\n".join(m["content"] for m in messages)
r2 = requests.post(url2, headers=HEADERS, json={"inputs": prompt, "parameters": {"max_new_tokens": 512}}, timeout=60)
if r2.status_code == 200:
result = r2.json()
if isinstance(result, list):
return result[0].get("generated_text", str(result))
return str(result)
return f"โš ๏ธ Error {r.status_code}: {r.text[:300]}"
def query_image(prompt: str) -> Image.Image | str:
"""Generate image via HF inference."""
url = f"https://api-inference.huggingface.co/models/{IMAGE_MODEL}"
r = requests.post(url, headers=HEADERS, json={"inputs": prompt}, timeout=120)
if r.status_code == 200:
return Image.open(io.BytesIO(r.content))
return f"โš ๏ธ Error {r.status_code}: {r.text[:300]}"
def query_audio(prompt: str) -> str | None:
"""Generate audio via HF inference, returns a temp file path."""
url = f"https://api-inference.huggingface.co/models/{AUDIO_MODEL}"
r = requests.post(url, headers=HEADERS, json={"inputs": prompt}, timeout=120)
if r.status_code == 200:
path = "/tmp/generated_audio.wav"
with open(path, "wb") as f:
f.write(r.content)
return path
return None
def query_video(prompt: str) -> str | None:
"""Generate short video via HF inference, returns a temp file path."""
url = f"https://api-inference.huggingface.co/models/{VIDEO_MODEL}"
r = requests.post(url, headers=HEADERS, json={"inputs": prompt}, timeout=180)
if r.status_code == 200:
path = "/tmp/generated_video.mp4"
with open(path, "wb") as f:
f.write(r.content)
return path
return None
def query_multimodal(image: Image.Image | None, text: str) -> tuple[str, Image.Image | None]:
"""VQA / image captioning with BLIP-2. Also returns the original image."""
if image is None:
# No image โ†’ just caption with a placeholder or echo
return "Please upload an image for multimodal analysis.", None
# Encode image to base64
buf = io.BytesIO()
image.save(buf, format="PNG")
b64 = base64.b64encode(buf.getvalue()).decode()
url = f"https://api-inference.huggingface.co/models/{MULTIMODAL_MODEL}"
payload = {"inputs": {"image": b64, "question": text or "Describe this image in detail."}}
r = requests.post(url, headers=HEADERS, json=payload, timeout=90)
if r.status_code == 200:
result = r.json()
if isinstance(result, list):
answer = result[0].get("answer", str(result[0]))
elif isinstance(result, dict):
answer = result.get("answer", str(result))
else:
answer = str(result)
return answer, image
return f"โš ๏ธ Error {r.status_code}: {r.text[:300]}", image
# โ”€โ”€ Chat state helper โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
def chat_respond(user_msg: str, history: list) -> tuple[str, list]:
if not user_msg.strip():
return "", history
messages = [{"role": "system", "content": "You are NEXUS, an advanced AI assistant. Be helpful, thorough, and thoughtful."}]
for human, bot in history:
messages.append({"role": "user", "content": human})
messages.append({"role": "assistant", "content": bot})
messages.append({"role": "user", "content": user_msg})
reply = query_text(messages)
history.append((user_msg, reply))
return "", history
def gen_image(prompt: str, progress=gr.Progress()):
progress(0.2, desc="Connecting to FLUXโ€ฆ")
result = query_image(prompt)
progress(1.0, desc="Done")
if isinstance(result, str): # error string
return None, result
return result, "โœ… Image generated successfully!"
def gen_audio(prompt: str, progress=gr.Progress()):
progress(0.2, desc="Composing musicโ€ฆ")
path = query_audio(prompt)
progress(1.0, desc="Done")
if path:
return path, "โœ… Audio generated!"
return None, "โš ๏ธ Audio generation failed. The model may be loading โ€“ try again in a moment."
def gen_video(prompt: str, progress=gr.Progress()):
progress(0.2, desc="Rendering framesโ€ฆ")
path = query_video(prompt)
progress(1.0, desc="Done")
if path:
return path, "โœ… Video generated!"
return None, "โš ๏ธ Video generation failed. The model may be loading โ€“ try again in a moment."
def gen_multimodal(image, question: str, progress=gr.Progress()):
progress(0.3, desc="Analyzing imageโ€ฆ")
answer, img_out = query_multimodal(image, question)
progress(1.0, desc="Done")
return answer, img_out
# โ”€โ”€ CSS โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
CSS = """
@import url('https://fonts.googleapis.com/css2?family=Space+Mono:wght@400;700&family=DM+Sans:ital,wght@0,300;0,500;0,700;1,300&display=swap');
:root {
--bg: #0a0a0f;
--panel: #111118;
--border: #1e1e2e;
--accent: #7c3aed;
--accent2: #06b6d4;
--accent3: #f59e0b;
--text: #e2e8f0;
--muted: #64748b;
--success: #10b981;
--danger: #ef4444;
}
body, .gradio-container { background: var(--bg) !important; font-family: 'DM Sans', sans-serif; color: var(--text); }
/* Header */
.nexus-header {
text-align: center;
padding: 2.5rem 1rem 1rem;
background: linear-gradient(135deg, #0a0a0f 0%, #130d22 50%, #0a0a0f 100%);
border-bottom: 1px solid var(--border);
margin-bottom: 1.5rem;
}
.nexus-title {
font-family: 'Space Mono', monospace;
font-size: clamp(2rem, 6vw, 3.5rem);
font-weight: 700;
background: linear-gradient(90deg, var(--accent) 0%, var(--accent2) 50%, var(--accent3) 100%);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
letter-spacing: -0.02em;
margin: 0;
}
.nexus-sub {
color: var(--muted);
font-size: 0.95rem;
margin-top: 0.5rem;
letter-spacing: 0.08em;
text-transform: uppercase;
}
.badge-row { display: flex; justify-content: center; gap: 0.5rem; flex-wrap: wrap; margin-top: 1rem; }
.badge {
font-family: 'Space Mono', monospace;
font-size: 0.65rem;
padding: 0.25rem 0.75rem;
border-radius: 999px;
border: 1px solid;
letter-spacing: 0.05em;
}
.badge-chat { border-color: var(--accent); color: var(--accent); }
.badge-img { border-color: var(--accent2); color: var(--accent2); }
.badge-audio { border-color: var(--accent3); color: var(--accent3); }
.badge-video { border-color: var(--success); color: var(--success); }
.badge-mm { border-color: var(--danger); color: var(--danger); }
/* Tabs */
.tab-nav { border-bottom: 1px solid var(--border) !important; }
.tab-nav button {
font-family: 'Space Mono', monospace !important;
font-size: 0.8rem !important;
letter-spacing: 0.05em !important;
color: var(--muted) !important;
padding: 0.75rem 1.25rem !important;
border-bottom: 2px solid transparent !important;
transition: all 0.2s !important;
}
.tab-nav button.selected {
color: var(--accent2) !important;
border-bottom-color: var(--accent2) !important;
}
/* Chatbot */
.chatbot .message.user { background: rgba(124,58,237,0.15) !important; border-left: 3px solid var(--accent) !important; }
.chatbot .message.bot { background: rgba(6,182,212,0.08) !important; border-left: 3px solid var(--accent2) !important; }
/* Inputs */
textarea, input[type=text] {
background: var(--panel) !important;
border: 1px solid var(--border) !important;
color: var(--text) !important;
border-radius: 8px !important;
font-family: 'DM Sans', sans-serif !important;
}
textarea:focus, input:focus {
border-color: var(--accent2) !important;
box-shadow: 0 0 0 2px rgba(6,182,212,0.15) !important;
}
/* Buttons */
.btn-primary {
background: linear-gradient(135deg, var(--accent) 0%, var(--accent2) 100%) !important;
color: #fff !important;
font-family: 'Space Mono', monospace !important;
font-size: 0.8rem !important;
letter-spacing: 0.05em !important;
border: none !important;
border-radius: 8px !important;
padding: 0.65rem 1.5rem !important;
cursor: pointer !important;
transition: opacity 0.2s !important;
}
.btn-primary:hover { opacity: 0.85 !important; }
/* Status box */
.status-box {
background: var(--panel);
border: 1px solid var(--border);
border-radius: 8px;
padding: 0.75rem 1rem;
font-family: 'Space Mono', monospace;
font-size: 0.75rem;
color: var(--muted);
min-height: 2.5rem;
}
/* Section labels */
.section-label {
font-family: 'Space Mono', monospace;
font-size: 0.7rem;
letter-spacing: 0.1em;
color: var(--muted);
text-transform: uppercase;
margin-bottom: 0.4rem;
}
"""
# โ”€โ”€ Build UI โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
with gr.Blocks(css=CSS, title="NEXUS AI Studio", theme=gr.themes.Base()) as demo:
# Header
gr.HTML("""
<div class="nexus-header">
<h1 class="nexus-title">โฌก NEXUS AI STUDIO</h1>
<p class="nexus-sub">Multi-Modal Intelligence Platform ยท Powered by Hugging Face</p>
<div class="badge-row">
<span class="badge badge-chat">๐Ÿ’ฌ CHAT</span>
<span class="badge badge-img">๐Ÿ–ผ IMAGE GEN</span>
<span class="badge badge-audio">๐ŸŽต AUDIO GEN</span>
<span class="badge badge-video">๐ŸŽฌ VIDEO GEN</span>
<span class="badge badge-mm">๐Ÿ”ฎ MULTIMODAL</span>
</div>
</div>
""")
with gr.Tabs(elem_classes="tab-nav"):
# โ”€โ”€ TAB 1: Chat โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
with gr.Tab("๐Ÿ’ฌ Chat"):
gr.HTML('<p class="section-label">Conversational AI ยท Qwen 2.5</p>')
chatbot = gr.Chatbot(
label="",
height=460,
bubble_full_width=False,
elem_classes="chatbot",
show_label=False,
avatar_images=(None, "https://huggingface.co/front/assets/huggingface_logo-noborder.svg"),
)
with gr.Row():
chat_in = gr.Textbox(
placeholder="Ask me anything โ€” code, math, science, creative writingโ€ฆ",
show_label=False,
scale=8,
lines=1,
)
send_btn = gr.Button("SEND โ†’", elem_classes="btn-primary", scale=1)
clear_btn = gr.Button("Clear conversation", variant="secondary", size="sm")
send_btn.click(chat_respond, [chat_in, chatbot], [chat_in, chatbot])
chat_in.submit(chat_respond, [chat_in, chatbot], [chat_in, chatbot])
clear_btn.click(lambda: ([], ""), None, [chatbot, chat_in])
# โ”€โ”€ TAB 2: Image Generation โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
with gr.Tab("๐Ÿ–ผ Image Gen"):
gr.HTML('<p class="section-label">Text โ†’ Image ยท FLUX.1-schnell</p>')
with gr.Row():
with gr.Column(scale=1):
img_prompt = gr.Textbox(
label="Prompt",
placeholder="A neon-lit cyberpunk city at midnight, rain reflections, ultra-detailedโ€ฆ",
lines=4,
)
img_btn = gr.Button("โœฆ GENERATE IMAGE", elem_classes="btn-primary")
img_status = gr.Textbox(label="Status", elem_classes="status-box", show_label=False)
with gr.Column(scale=1):
img_out = gr.Image(label="Generated Image", show_label=False)
img_btn.click(gen_image, [img_prompt], [img_out, img_status])
# โ”€โ”€ TAB 3: Audio Generation โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
with gr.Tab("๐ŸŽต Audio Gen"):
gr.HTML('<p class="section-label">Text โ†’ Music ยท MusicGen Small</p>')
with gr.Row():
with gr.Column(scale=1):
audio_prompt = gr.Textbox(
label="Describe the music",
placeholder="Lo-fi hip hop beat, warm piano chords, gentle rain ambiance, 80 BPMโ€ฆ",
lines=4,
)
audio_btn = gr.Button("โ™ช GENERATE AUDIO", elem_classes="btn-primary")
audio_status = gr.Textbox(label="Status", elem_classes="status-box", show_label=False)
with gr.Column(scale=1):
audio_out = gr.Audio(label="Generated Audio", show_label=True)
audio_btn.click(gen_audio, [audio_prompt], [audio_out, audio_status])
# โ”€โ”€ TAB 4: Video Generation โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
with gr.Tab("๐ŸŽฌ Video Gen"):
gr.HTML('<p class="section-label">Text โ†’ Video ยท ModelScope 1.7B</p>')
with gr.Row():
with gr.Column(scale=1):
video_prompt = gr.Textbox(
label="Describe the video",
placeholder="A lone astronaut walking on Mars at sunset, dust swirling around bootsโ€ฆ",
lines=4,
)
video_btn = gr.Button("โ–ถ GENERATE VIDEO", elem_classes="btn-primary")
video_status = gr.Textbox(label="Status", elem_classes="status-box", show_label=False)
with gr.Column(scale=1):
video_out = gr.Video(label="Generated Video", show_label=True)
video_btn.click(gen_video, [video_prompt], [video_out, video_status])
# โ”€โ”€ TAB 5: Multimodal โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
with gr.Tab("๐Ÿ”ฎ Multimodal"):
gr.HTML('<p class="section-label">Image + Text โ†’ Answer ยท BLIP-2</p>')
with gr.Row():
with gr.Column(scale=1):
mm_image = gr.Image(
label="Upload an Image",
type="pil",
show_label=True,
)
mm_question = gr.Textbox(
label="Your question about the image",
placeholder="What is happening in this image? What objects do you see?",
lines=3,
)
mm_btn = gr.Button("๐Ÿ”ฎ ANALYZE", elem_classes="btn-primary")
with gr.Column(scale=1):
mm_answer = gr.Textbox(label="AI Answer", lines=6, show_label=True)
mm_img_out = gr.Image(label="Processed Image", show_label=True)
mm_btn.click(gen_multimodal, [mm_image, mm_question], [mm_answer, mm_img_out])
# Footer
gr.HTML("""
<div style="text-align:center; padding: 1.5rem; border-top: 1px solid #1e1e2e; margin-top: 1.5rem; color: #475569; font-size: 0.75rem; font-family: 'Space Mono', monospace; letter-spacing: 0.05em;">
NEXUS AI STUDIO ยท Built with โค on Hugging Face Spaces ยท Models: Qwen2.5 ยท FLUX.1-schnell ยท MusicGen ยท ModelScope ยท BLIP-2
</div>
""")
demo.launch()