Spaces:

hari7261
/

Nexus-AI-Studio

Runtime error

App Files Files Community

Nexus-AI-Studio / app.py

hari7261

Create app.py

fd4d7fa verified about 2 months ago

raw

history blame contribute delete

17.6 kB

	import gradio as gr
	import requests
	import os
	import base64
	import io
	import json
	from PIL import Image
	import numpy as np

	HF_TOKEN = os.environ.get("HF_TOKEN", "")

	HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"}

	# ── Model IDs ──────────────────────────────────────────────────────────────────
	TEXT_MODEL = "Qwen/Qwen2.5-0.5B-Instruct" # tiny chat LLM
	IMAGE_MODEL = "black-forest-labs/FLUX.1-schnell" # fast image gen
	AUDIO_MODEL = "facebook/musicgen-small" # audio/music gen
	VIDEO_MODEL = "ali-vilab/text-to-video-ms-1.7b" # text-to-video
	MULTIMODAL_MODEL = "Salesforce/blip2-opt-2.7b" # image+text (VQA / caption)

	# ── HF Inference API helpers ───────────────────────────────────────────────────

	def query_text(messages: list, system: str = "") -> str:
	"""Chat via HF serverless inference (text generation)."""
	url = f"https://api-inference.huggingface.co/models/{TEXT_MODEL}/v1/chat/completions"
	payload = {
	"model": TEXT_MODEL,
	"messages": messages,
	"max_tokens": 1024,
	"temperature": 0.7,
	}
	r = requests.post(url, headers=HEADERS, json=payload, timeout=60)
	if r.status_code == 200:
	return r.json()["choices"][0]["message"]["content"]
	# fallback plain text-generation endpoint
	url2 = f"https://api-inference.huggingface.co/models/{TEXT_MODEL}"
	prompt = "\n".join(m["content"] for m in messages)
	r2 = requests.post(url2, headers=HEADERS, json={"inputs": prompt, "parameters": {"max_new_tokens": 512}}, timeout=60)
	if r2.status_code == 200:
	result = r2.json()
	if isinstance(result, list):
	return result[0].get("generated_text", str(result))
	return str(result)
	return f"⚠️ Error {r.status_code}: {r.text[:300]}"


	def query_image(prompt: str) -> Image.Image \| str:
	"""Generate image via HF inference."""
	url = f"https://api-inference.huggingface.co/models/{IMAGE_MODEL}"
	r = requests.post(url, headers=HEADERS, json={"inputs": prompt}, timeout=120)
	if r.status_code == 200:
	return Image.open(io.BytesIO(r.content))
	return f"⚠️ Error {r.status_code}: {r.text[:300]}"


	def query_audio(prompt: str) -> str \| None:
	"""Generate audio via HF inference, returns a temp file path."""
	url = f"https://api-inference.huggingface.co/models/{AUDIO_MODEL}"
	r = requests.post(url, headers=HEADERS, json={"inputs": prompt}, timeout=120)
	if r.status_code == 200:
	path = "/tmp/generated_audio.wav"
	with open(path, "wb") as f:
	f.write(r.content)
	return path
	return None


	def query_video(prompt: str) -> str \| None:
	"""Generate short video via HF inference, returns a temp file path."""
	url = f"https://api-inference.huggingface.co/models/{VIDEO_MODEL}"
	r = requests.post(url, headers=HEADERS, json={"inputs": prompt}, timeout=180)
	if r.status_code == 200:
	path = "/tmp/generated_video.mp4"
	with open(path, "wb") as f:
	f.write(r.content)
	return path
	return None


	def query_multimodal(image: Image.Image \| None, text: str) -> tuple[str, Image.Image \| None]:
	"""VQA / image captioning with BLIP-2. Also returns the original image."""
	if image is None:
	# No image → just caption with a placeholder or echo
	return "Please upload an image for multimodal analysis.", None
	# Encode image to base64
	buf = io.BytesIO()
	image.save(buf, format="PNG")
	b64 = base64.b64encode(buf.getvalue()).decode()
	url = f"https://api-inference.huggingface.co/models/{MULTIMODAL_MODEL}"
	payload = {"inputs": {"image": b64, "question": text or "Describe this image in detail."}}
	r = requests.post(url, headers=HEADERS, json=payload, timeout=90)
	if r.status_code == 200:
	result = r.json()
	if isinstance(result, list):
	answer = result[0].get("answer", str(result[0]))
	elif isinstance(result, dict):
	answer = result.get("answer", str(result))
	else:
	answer = str(result)
	return answer, image
	return f"⚠️ Error {r.status_code}: {r.text[:300]}", image


	# ── Chat state helper ──────────────────────────────────────────────────────────

	def chat_respond(user_msg: str, history: list) -> tuple[str, list]:
	if not user_msg.strip():
	return "", history
	messages = [{"role": "system", "content": "You are NEXUS, an advanced AI assistant. Be helpful, thorough, and thoughtful."}]
	for human, bot in history:
	messages.append({"role": "user", "content": human})
	messages.append({"role": "assistant", "content": bot})
	messages.append({"role": "user", "content": user_msg})
	reply = query_text(messages)
	history.append((user_msg, reply))
	return "", history


	def gen_image(prompt: str, progress=gr.Progress()):
	progress(0.2, desc="Connecting to FLUX…")
	result = query_image(prompt)
	progress(1.0, desc="Done")
	if isinstance(result, str): # error string
	return None, result
	return result, "✅ Image generated successfully!"


	def gen_audio(prompt: str, progress=gr.Progress()):
	progress(0.2, desc="Composing music…")
	path = query_audio(prompt)
	progress(1.0, desc="Done")
	if path:
	return path, "✅ Audio generated!"
	return None, "⚠️ Audio generation failed. The model may be loading – try again in a moment."


	def gen_video(prompt: str, progress=gr.Progress()):
	progress(0.2, desc="Rendering frames…")
	path = query_video(prompt)
	progress(1.0, desc="Done")
	if path:
	return path, "✅ Video generated!"
	return None, "⚠️ Video generation failed. The model may be loading – try again in a moment."


	def gen_multimodal(image, question: str, progress=gr.Progress()):
	progress(0.3, desc="Analyzing image…")
	answer, img_out = query_multimodal(image, question)
	progress(1.0, desc="Done")
	return answer, img_out


	# ── CSS ────────────────────────────────────────────────────────────────────────
	CSS = """
	@import url('https://fonts.googleapis.com/css2?family=Space+Mono:wght@400;700&family=DM+Sans:ital,wght@0,300;0,500;0,700;1,300&display=swap');

	:root {
	--bg: #0a0a0f;
	--panel: #111118;
	--border: #1e1e2e;
	--accent: #7c3aed;
	--accent2: #06b6d4;
	--accent3: #f59e0b;
	--text: #e2e8f0;
	--muted: #64748b;
	--success: #10b981;
	--danger: #ef4444;
	}

	body, .gradio-container { background: var(--bg) !important; font-family: 'DM Sans', sans-serif; color: var(--text); }

	/* Header */
	.nexus-header {
	text-align: center;
	padding: 2.5rem 1rem 1rem;
	background: linear-gradient(135deg, #0a0a0f 0%, #130d22 50%, #0a0a0f 100%);
	border-bottom: 1px solid var(--border);
	margin-bottom: 1.5rem;
	}
	.nexus-title {
	font-family: 'Space Mono', monospace;
	font-size: clamp(2rem, 6vw, 3.5rem);
	font-weight: 700;
	background: linear-gradient(90deg, var(--accent) 0%, var(--accent2) 50%, var(--accent3) 100%);
	-webkit-background-clip: text;
	-webkit-text-fill-color: transparent;
	letter-spacing: -0.02em;
	margin: 0;
	}
	.nexus-sub {
	color: var(--muted);
	font-size: 0.95rem;
	margin-top: 0.5rem;
	letter-spacing: 0.08em;
	text-transform: uppercase;
	}
	.badge-row { display: flex; justify-content: center; gap: 0.5rem; flex-wrap: wrap; margin-top: 1rem; }
	.badge {
	font-family: 'Space Mono', monospace;
	font-size: 0.65rem;
	padding: 0.25rem 0.75rem;
	border-radius: 999px;
	border: 1px solid;
	letter-spacing: 0.05em;
	}
	.badge-chat { border-color: var(--accent); color: var(--accent); }
	.badge-img { border-color: var(--accent2); color: var(--accent2); }
	.badge-audio { border-color: var(--accent3); color: var(--accent3); }
	.badge-video { border-color: var(--success); color: var(--success); }
	.badge-mm { border-color: var(--danger); color: var(--danger); }

	/* Tabs */
	.tab-nav { border-bottom: 1px solid var(--border) !important; }
	.tab-nav button {
	font-family: 'Space Mono', monospace !important;
	font-size: 0.8rem !important;
	letter-spacing: 0.05em !important;
	color: var(--muted) !important;
	padding: 0.75rem 1.25rem !important;
	border-bottom: 2px solid transparent !important;
	transition: all 0.2s !important;
	}
	.tab-nav button.selected {
	color: var(--accent2) !important;
	border-bottom-color: var(--accent2) !important;
	}

	/* Chatbot */
	.chatbot .message.user { background: rgba(124,58,237,0.15) !important; border-left: 3px solid var(--accent) !important; }
	.chatbot .message.bot { background: rgba(6,182,212,0.08) !important; border-left: 3px solid var(--accent2) !important; }

	/* Inputs */
	textarea, input[type=text] {
	background: var(--panel) !important;
	border: 1px solid var(--border) !important;
	color: var(--text) !important;
	border-radius: 8px !important;
	font-family: 'DM Sans', sans-serif !important;
	}
	textarea:focus, input:focus {
	border-color: var(--accent2) !important;
	box-shadow: 0 0 0 2px rgba(6,182,212,0.15) !important;
	}

	/* Buttons */
	.btn-primary {
	background: linear-gradient(135deg, var(--accent) 0%, var(--accent2) 100%) !important;
	color: #fff !important;
	font-family: 'Space Mono', monospace !important;
	font-size: 0.8rem !important;
	letter-spacing: 0.05em !important;
	border: none !important;
	border-radius: 8px !important;
	padding: 0.65rem 1.5rem !important;
	cursor: pointer !important;
	transition: opacity 0.2s !important;
	}
	.btn-primary:hover { opacity: 0.85 !important; }

	/* Status box */
	.status-box {
	background: var(--panel);
	border: 1px solid var(--border);
	border-radius: 8px;
	padding: 0.75rem 1rem;
	font-family: 'Space Mono', monospace;
	font-size: 0.75rem;
	color: var(--muted);
	min-height: 2.5rem;
	}

	/* Section labels */
	.section-label {
	font-family: 'Space Mono', monospace;
	font-size: 0.7rem;
	letter-spacing: 0.1em;
	color: var(--muted);
	text-transform: uppercase;
	margin-bottom: 0.4rem;
	}
	"""

	# ── Build UI ───────────────────────────────────────────────────────────────────

	with gr.Blocks(css=CSS, title="NEXUS AI Studio", theme=gr.themes.Base()) as demo:

	# Header
	gr.HTML("""
	<div class="nexus-header">
	<h1 class="nexus-title">⬡ NEXUS AI STUDIO</h1>
	<p class="nexus-sub">Multi-Modal Intelligence Platform · Powered by Hugging Face</p>
	<div class="badge-row">
	<span class="badge badge-chat">💬 CHAT</span>
	<span class="badge badge-img">🖼 IMAGE GEN</span>
	<span class="badge badge-audio">🎵 AUDIO GEN</span>
	<span class="badge badge-video">🎬 VIDEO GEN</span>
	<span class="badge badge-mm">🔮 MULTIMODAL</span>
	</div>
	</div>
	""")

	with gr.Tabs(elem_classes="tab-nav"):

	# ── TAB 1: Chat ────────────────────────────────────────────────────────
	with gr.Tab("💬 Chat"):
	gr.HTML('<p class="section-label">Conversational AI · Qwen 2.5</p>')
	chatbot = gr.Chatbot(
	label="",
	height=460,
	bubble_full_width=False,
	elem_classes="chatbot",
	show_label=False,
	avatar_images=(None, "https://huggingface.co/front/assets/huggingface_logo-noborder.svg"),
	)
	with gr.Row():
	chat_in = gr.Textbox(
	placeholder="Ask me anything — code, math, science, creative writing…",
	show_label=False,
	scale=8,
	lines=1,
	)
	send_btn = gr.Button("SEND →", elem_classes="btn-primary", scale=1)
	clear_btn = gr.Button("Clear conversation", variant="secondary", size="sm")

	send_btn.click(chat_respond, [chat_in, chatbot], [chat_in, chatbot])
	chat_in.submit(chat_respond, [chat_in, chatbot], [chat_in, chatbot])
	clear_btn.click(lambda: ([], ""), None, [chatbot, chat_in])

	# ── TAB 2: Image Generation ────────────────────────────────────────────
	with gr.Tab("🖼 Image Gen"):
	gr.HTML('<p class="section-label">Text → Image · FLUX.1-schnell</p>')
	with gr.Row():
	with gr.Column(scale=1):
	img_prompt = gr.Textbox(
	label="Prompt",
	placeholder="A neon-lit cyberpunk city at midnight, rain reflections, ultra-detailed…",
	lines=4,
	)
	img_btn = gr.Button("✦ GENERATE IMAGE", elem_classes="btn-primary")
	img_status = gr.Textbox(label="Status", elem_classes="status-box", show_label=False)
	with gr.Column(scale=1):
	img_out = gr.Image(label="Generated Image", show_label=False)

	img_btn.click(gen_image, [img_prompt], [img_out, img_status])

	# ── TAB 3: Audio Generation ────────────────────────────────────────────
	with gr.Tab("🎵 Audio Gen"):
	gr.HTML('<p class="section-label">Text → Music · MusicGen Small</p>')
	with gr.Row():
	with gr.Column(scale=1):
	audio_prompt = gr.Textbox(
	label="Describe the music",
	placeholder="Lo-fi hip hop beat, warm piano chords, gentle rain ambiance, 80 BPM…",
	lines=4,
	)
	audio_btn = gr.Button("♪ GENERATE AUDIO", elem_classes="btn-primary")
	audio_status = gr.Textbox(label="Status", elem_classes="status-box", show_label=False)
	with gr.Column(scale=1):
	audio_out = gr.Audio(label="Generated Audio", show_label=True)

	audio_btn.click(gen_audio, [audio_prompt], [audio_out, audio_status])

	# ── TAB 4: Video Generation ────────────────────────────────────────────
	with gr.Tab("🎬 Video Gen"):
	gr.HTML('<p class="section-label">Text → Video · ModelScope 1.7B</p>')
	with gr.Row():
	with gr.Column(scale=1):
	video_prompt = gr.Textbox(
	label="Describe the video",
	placeholder="A lone astronaut walking on Mars at sunset, dust swirling around boots…",
	lines=4,
	)
	video_btn = gr.Button("▶ GENERATE VIDEO", elem_classes="btn-primary")
	video_status = gr.Textbox(label="Status", elem_classes="status-box", show_label=False)
	with gr.Column(scale=1):
	video_out = gr.Video(label="Generated Video", show_label=True)

	video_btn.click(gen_video, [video_prompt], [video_out, video_status])

	# ── TAB 5: Multimodal ─────────────────────────────────────────────────
	with gr.Tab("🔮 Multimodal"):
	gr.HTML('<p class="section-label">Image + Text → Answer · BLIP-2</p>')
	with gr.Row():
	with gr.Column(scale=1):
	mm_image = gr.Image(
	label="Upload an Image",
	type="pil",
	show_label=True,
	)
	mm_question = gr.Textbox(
	label="Your question about the image",
	placeholder="What is happening in this image? What objects do you see?",
	lines=3,
	)
	mm_btn = gr.Button("🔮 ANALYZE", elem_classes="btn-primary")
	with gr.Column(scale=1):
	mm_answer = gr.Textbox(label="AI Answer", lines=6, show_label=True)
	mm_img_out = gr.Image(label="Processed Image", show_label=True)

	mm_btn.click(gen_multimodal, [mm_image, mm_question], [mm_answer, mm_img_out])

	# Footer
	gr.HTML("""
	<div style="text-align:center; padding: 1.5rem; border-top: 1px solid #1e1e2e; margin-top: 1.5rem; color: #475569; font-size: 0.75rem; font-family: 'Space Mono', monospace; letter-spacing: 0.05em;">
	NEXUS AI STUDIO · Built with ❤ on Hugging Face Spaces · Models: Qwen2.5 · FLUX.1-schnell · MusicGen · ModelScope · BLIP-2
	</div>
	""")

	demo.launch()