Spaces:

HedronCreeper
/

CreeperAI

Running

App Files Files Community

CryptoCreeper commited on 7 days ago

Commit

df3c6eb

verified ·

1 Parent(s): 7a83adb

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -13

app.py CHANGED Viewed

@@ -4,33 +4,45 @@ from diffusers import DiffusionPipeline
 import torch
 import re
 import time
 device = "cuda" if torch.cuda.is_available() else "cpu"
-models = {
     "Normal": "Qwen/Qwen3-0.6B",
     "Thinking": "Qwen/Qwen2.5-1.5B-Instruct"
 }
-loaded_models = {}
-loaded_tokenizers = {}
 image_model_id = "SimianLuo/LCM_Dreamshaper_v7"
 image_pipe = DiffusionPipeline.from_pretrained(image_model_id)
 image_pipe.to(device)
 def get_chat_model(mode):
-    model_id = models[mode]
-    if model_id not in loaded_models:
         tokenizer = AutoTokenizer.from_pretrained(model_id)
         model = AutoModelForCausalLM.from_pretrained(
             model_id,
             torch_dtype="auto",
             device_map="auto"
         )
-        loaded_models[model_id] = model
-        loaded_tokenizers[model_id] = tokenizer
-    return loaded_models[model_id], loaded_tokenizers[model_id]
 def chat_logic(user_input, mode):
     model, tokenizer = get_chat_model(mode)
@@ -47,7 +59,6 @@ def image_logic(prompt, width, height, steps):
     start_time = time.time()
     final_prompt = f"{prompt}, centered and realistic (if applicable)"
     yield "💥 IGNITING... (Image generator AI)...", None
     image = image_pipe(
         prompt=final_prompt,
         width=int(width),
@@ -57,10 +68,35 @@ def image_logic(prompt, width, height, steps):
         lcm_origin_steps=50,
         output_type="pil"
     ).images[0]
     duration = round(time.time() - start_time, 2)
     yield f"💥 EXPLODED in {duration}s", image
 creeper_css = """
 body { background-color: #000000; }
 .gradio-container { background-color: #1e1e1e; border: 10px solid #2e8b57 !important; font-family: 'Courier New', Courier, monospace; color: #00ff00; }
@@ -68,7 +104,7 @@ footer { display: none !important; }
 .gr-button-primary { background-color: #4A7023 !important; border: 4px solid #000 !important; color: white !important; font-weight: bold; text-transform: uppercase; }
 .gr-button-primary:hover { background-color: #5ea032 !important; box-shadow: 0 0 20px #2e8b57; }
 label span { color: #2e8b57 !important; font-weight: bold; font-size: 1.2em; }
-textarea, input, .gr-box, .gr-input { background-color: #2e2e2e !important; color: #00ff00 !important; border: 3px solid #4A7023 !important; }
 .tabs { border-bottom: 5px solid #4A7023 !important; }
 .tab-nav button.selected { background-color: #4A7023 !important; color: white !important; }
 """
@@ -85,7 +121,6 @@ with gr.Blocks(css=creeper_css, title="CREEPER AI HUB") as demo:
                 chat_input = gr.Textbox(lines=4, placeholder="Ssssss... Talk to the Creeper...", label="Message")
                 chat_output = gr.Textbox(label="Creeper Says")
                 chat_btn = gr.Button("EXPLODE TEXT", variant="primary")
             chat_btn.click(fn=chat_logic, inputs=[chat_input, mode_radio], outputs=chat_output)
         with gr.TabItem("TNT-IMAGE"):
@@ -101,8 +136,22 @@ with gr.Blocks(css=creeper_css, title="CREEPER AI HUB") as demo:
                 with gr.Column(scale=1):
                     img_status = gr.Markdown("### Status: 🟢 Armed")
                     img_output = gr.Image(label="Rendered Loot")
             img_btn.click(fn=image_logic, inputs=[img_prompt, w_slider, h_slider, s_slider], outputs=[img_status, img_output])
 if __name__ == "__main__":
     demo.launch()

 import torch
 import re
 import time
+import soundfile as sf
+from qwen_tts import Qwen3TTSModel
+from langdetect import detect
+import os
 device = "cuda" if torch.cuda.is_available() else "cpu"
+chat_models = {
     "Normal": "Qwen/Qwen3-0.6B",
     "Thinking": "Qwen/Qwen2.5-1.5B-Instruct"
 }
+loaded_chat_models = {}
+loaded_chat_tokenizers = {}
 image_model_id = "SimianLuo/LCM_Dreamshaper_v7"
 image_pipe = DiffusionPipeline.from_pretrained(image_model_id)
 image_pipe.to(device)
+tts_model_id = "Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice"
+SUPPORTED_VOICES = ['aiden', 'dylan', 'eric', 'ono_anna', 'ryan', 'serena', 'sohee', 'uncle_fu', 'vivian']
+tts_model = Qwen3TTSModel.from_pretrained(
+    tts_model_id,
+    device_map=device,
+    torch_dtype=torch.bfloat16 if device == "cuda" else torch.float32
+)
 def get_chat_model(mode):
+    model_id = chat_models[mode]
+    if model_id not in loaded_chat_models:
         tokenizer = AutoTokenizer.from_pretrained(model_id)
         model = AutoModelForCausalLM.from_pretrained(
             model_id,
             torch_dtype="auto",
             device_map="auto"
         )
+        loaded_chat_models[model_id] = model
+        loaded_chat_tokenizers[model_id] = tokenizer
+    return loaded_chat_models[model_id], loaded_chat_tokenizers[model_id]
 def chat_logic(user_input, mode):
     model, tokenizer = get_chat_model(mode)
     start_time = time.time()
     final_prompt = f"{prompt}, centered and realistic (if applicable)"
     yield "💥 IGNITING... (Image generator AI)...", None
     image = image_pipe(
         prompt=final_prompt,
         width=int(width),
         lcm_origin_steps=50,
         output_type="pil"
     ).images[0]
     duration = round(time.time() - start_time, 2)
     yield f"💥 EXPLODED in {duration}s", image
+def tts_logic(text, voice, instructions, auto_detect):
+    try:
+        lang_map = {
+            'zh': 'Chinese', 'en': 'English', 'jp': 'Japanese',
+            'ko': 'Korean', 'de': 'German', 'fr': 'French',
+            'ru': 'Russian', 'pt': 'Portuguese', 'es': 'Spanish', 'it': 'Italian'
+        }
+        detected_lang = "English"
+        if auto_detect:
+            try:
+                raw_lang = detect(text).split('-')[0]
+                detected_lang = lang_map.get(raw_lang, "English")
+            except:
+                pass
+        wavs, sr = tts_model.generate_custom_voice(
+            language=detected_lang,
+            speaker=voice,
+            instruct=instructions,
+            text=text
+        )
+        output_path = "creeper_voice.wav"
+        sf.write(output_path, wavs[0], sr)
+        return output_path, f"Language: {detected_lang} | Speaker: {voice}"
+    except Exception as e:
+        return None, f"System Error: {str(e)}"
 creeper_css = """
 body { background-color: #000000; }
 .gradio-container { background-color: #1e1e1e; border: 10px solid #2e8b57 !important; font-family: 'Courier New', Courier, monospace; color: #00ff00; }
 .gr-button-primary { background-color: #4A7023 !important; border: 4px solid #000 !important; color: white !important; font-weight: bold; text-transform: uppercase; }
 .gr-button-primary:hover { background-color: #5ea032 !important; box-shadow: 0 0 20px #2e8b57; }
 label span { color: #2e8b57 !important; font-weight: bold; font-size: 1.2em; }
+textarea, input, .gr-box, .gr-input, select, .gr-dropdown { background-color: #2e2e2e !important; color: #00ff00 !important; border: 3px solid #4A7023 !important; }
 .tabs { border-bottom: 5px solid #4A7023 !important; }
 .tab-nav button.selected { background-color: #4A7023 !important; color: white !important; }
 """
                 chat_input = gr.Textbox(lines=4, placeholder="Ssssss... Talk to the Creeper...", label="Message")
                 chat_output = gr.Textbox(label="Creeper Says")
                 chat_btn = gr.Button("EXPLODE TEXT", variant="primary")
             chat_btn.click(fn=chat_logic, inputs=[chat_input, mode_radio], outputs=chat_output)
         with gr.TabItem("TNT-IMAGE"):
                 with gr.Column(scale=1):
                     img_status = gr.Markdown("### Status: 🟢 Armed")
                     img_output = gr.Image(label="Rendered Loot")
             img_btn.click(fn=image_logic, inputs=[img_prompt, w_slider, h_slider, s_slider], outputs=[img_status, img_output])
+        with gr.TabItem("NOTE-BLOCK (TTS)"):
+            gr.Markdown("### Smart Audio Studio")
+            with gr.Row():
+                with gr.Column():
+                    tts_input = gr.Textbox(label="Text to Speak", placeholder="Enter text for the block to play...", lines=4)
+                    with gr.Row():
+                        voice_select = gr.Dropdown(choices=SUPPORTED_VOICES, value="vivian", label="Select Speaker")
+                        auto_lang = gr.Checkbox(label="Auto-detect Language", value=True)
+                    style_instruct = gr.Textbox(label="Style Instruction", value="Speak naturally")
+                    tts_btn = gr.Button("EXPLODE AUDIO", variant="primary")
+                with gr.Column():
+                    audio_output = gr.Audio(label="Audio Output", type="filepath")
+                    status_info = gr.Label(label="Block Metadata")
+            tts_btn.click(fn=tts_logic, inputs=[tts_input, voice_select, style_instruct, auto_lang], outputs=[audio_output, status_info])
 if __name__ == "__main__":
     demo.launch()