import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer from diffusers import DiffusionPipeline import torch import re import time import soundfile as sf from qwen_tts import Qwen3TTSModel from langdetect import detect import os device = "cuda" if torch.cuda.is_available() else "cpu" # --- Chat Logic --- chat_models = { "Normal": "Qwen/Qwen3-0.6B", "Thinking": "Qwen/Qwen2.5-1.5B-Instruct" } loaded_chat_models = {} loaded_chat_tokenizers = {} def load_chat_model(mode): model_id = chat_models[mode] if model_id not in loaded_chat_models: gr.Info(f"Loading {mode} Brain...") tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained( model_id, torch_dtype=torch.bfloat16 if device == "cuda" else torch.float32, device_map="auto" ) loaded_chat_models[model_id] = model loaded_chat_tokenizers[model_id] = tokenizer return "Status: 🟢 Loaded" return "Status: 🟢 Loaded" def chat_logic(user_input, mode): model_id = chat_models[mode] if model_id not in loaded_chat_models: return "❌ Please click 'Load Model' first!" model, tokenizer = loaded_chat_models[model_id], loaded_chat_tokenizers[model_id] messages = [{"role": "user", "content": user_input}] text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) model_inputs = tokenizer([text], return_tensors="pt").to(model.device) generated_ids = model.generate(**model_inputs, max_new_tokens=1024) response = tokenizer.batch_decode([generated_ids[0][len(model_inputs.input_ids[0]):]], skip_special_tokens=True)[0] return re.sub(r'.*?\s*\n?', '', response, flags=re.DOTALL).strip() # --- Image Logic --- image_model_id = "stabilityai/sdxl-turbo" image_pipe = None def load_image_model(): global image_pipe if image_pipe is None: gr.Info("Priming TNT Engine...") pipe = DiffusionPipeline.from_pretrained( image_model_id, torch_dtype=torch.float16 if device == "cuda" else torch.float32 ) pipe.to(device) image_pipe = pipe return "Status: 🟢 Loaded" return "Status: 🟢 Loaded" def image_logic(prompt, width, height, steps): if image_pipe is None: yield "❌ Load Model First", None return yield "💥 IGNITING...", None image = image_pipe( prompt=prompt, width=int(width), height=int(height), num_inference_steps=int(steps), guidance_scale=0.0 ).images[0] yield "💥 EXPLODED", image # --- TTS Logic --- tts_model_id = "Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice" SUPPORTED_VOICES = ['aiden', 'dylan', 'eric', 'ono_anna', 'ryan', 'serena', 'sohee', 'uncle_fu', 'vivian'] tts_model = None def load_tts_model(): global tts_model if tts_model is None: gr.Info("Tuning Note-Blocks...") tts_model = Qwen3TTSModel.from_pretrained( tts_model_id, device_map=device, torch_dtype=torch.bfloat16 if device == "cuda" else torch.float32 ) return "Status: 🟢 Loaded" return "Status: 🟢 Loaded" def tts_logic(text, voice, inst, auto): if tts_model is None: return None, "Status: ❌ Not Loaded" try: wavs, sr = tts_model.generate_custom_voice( language="English", speaker=voice, instruct=inst, text=text ) output_path = "creeper_voice.wav" sf.write(output_path, wavs[0], sr) return output_path, "Status: 🟢 Audio Generated" except Exception as e: return None, f"Status: ❌ Error: {str(e)}" # --- UI Styles --- creeper_css = """ body { background-color: #000000; } .gradio-container { background-color: #1e1e1e; border: 8px solid #2e8b57; color: #00ff00; } .small-status { font-size: 0.8em; color: #2e8b57; margin-top: -10px; } .gr-button-primary { background-color: #4A7023 !important; border: 2px solid #000 !important; } """ with gr.Blocks(css=creeper_css, title="CREEPER AI HUB") as demo: gr.Markdown("# 🟩 CREEPER AI HUB 🟩") with gr.Tabs(): # --- Chat Tab --- with gr.TabItem("SSSSS-CHAT"): with gr.Row(): chat_status = gr.Markdown("Status: 🔴 Not Loaded", elem_classes=["small-status"]) with gr.Row(): mode_radio = gr.Radio(["Normal", "Thinking"], value="Normal", label="Brain Mode") load_chat_btn = gr.Button("Load Model") chat_in = gr.Textbox(label="Message", lines=3) chat_out = gr.Textbox(label="Creeper Says") chat_btn = gr.Button("EXPLODE TEXT", variant="primary") load_chat_btn.click(load_chat_model, mode_radio, chat_status) chat_btn.click(chat_logic, [chat_in, mode_radio], chat_out) # --- Image Tab --- with gr.TabItem("TNT-IMAGE"): with gr.Row(): img_status = gr.Markdown("Status: 🔴 Not Loaded", elem_classes=["small-status"]) with gr.Row(): img_prompt = gr.Textbox(label="Visual Idea", placeholder="Pixel art forest...") load_img_btn = gr.Button("Load Model") with gr.Row(): w_s = gr.Slider(256, 1024, 512, step=64, label="Width") h_s = gr.Slider(256, 1024, 512, step=64, label="Height") s_s = gr.Slider(1, 10, 4, step=1, label="Steps") img_btn = gr.Button("EXPLODE IMAGE", variant="primary") img_out = gr.Image(label="Output") load_img_btn.click(load_image_model, None, img_status) img_btn.click(image_logic, [img_prompt, w_s, h_s, s_s], [img_status, img_out]) # --- TTS Tab --- with gr.TabItem("NOTE-BLOCK"): with gr.Row(): tts_status = gr.Markdown("Status: 🔴 Not Loaded", elem_classes=["small-status"]) with gr.Row(): voice_sel = gr.Dropdown(SUPPORTED_VOICES, value="vivian", label="Speaker") load_tts_btn = gr.Button("Load Model") tts_in = gr.Textbox(label="Text to Speak") style_in = gr.Textbox("Speak naturally", label="Instructions") tts_btn = gr.Button("EXPLODE AUDIO", variant="primary") aud_out = gr.Audio(label="Audio Output") load_tts_btn.click(load_tts_model, None, tts_status) tts_btn.click(tts_logic, [tts_in, voice_sel, style_in, gr.State(True)], [aud_out, tts_status]) if __name__ == "__main__": demo.launch(mcp_server=False)