Spaces:

Boobs00
/

xortron7

Running

App Files Files Community

Boobs00 commited on Apr 25, 2025

Commit

207ef1d

verified ·

1 Parent(s): 696ab70

Add 2 files

Browse files

Files changed (2) hide show

index.html +209 -4
prompts.txt +2 -1

index.html CHANGED Viewed

@@ -3,7 +3,7 @@
 <head>
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Xortron7 - Advanced AI Companion with IDE</title>
     <script src="https://cdn.tailwindcss.com"></script>
     <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
     <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.32.7/ace.js"></script>
@@ -156,6 +156,81 @@
             text-align: center;
             font-weight: bold;
         }
     </style>
 </head>
 <body class="bg-gray-900 text-gray-100 min-h-screen">
@@ -190,6 +265,7 @@
             <span class="text-purple-400"><i class="fas fa-shield-alt mr-1"></i> ENCRYPTED</span>
             <span class="ml-4 text-blue-400"><i class="fas fa-search mr-1"></i> WEB SEARCH ENABLED</span>
             <span class="ml-4 text-yellow-400"><i class="fas fa-code mr-1"></i> IDE ACTIVE</span>
         </div>
     </div>
@@ -212,6 +288,9 @@
             <button id="tools-tab" class="px-4 py-2 font-medium text-gray-500">
                 <i class="fas fa-tools mr-2"></i>Tools
             </button>
         </div>
         <!-- Chat panel -->
@@ -223,7 +302,7 @@
                     <div class="bot-message text-white rounded-lg p-4 max-w-xs md:max-w-md lg:max-w-lg relative">
                         <div class="absolute -left-2 top-3 w-4 h-4 rotate-45 bg-indigo-900"></div>
                         <p class="font-bold text-purple-300">Xortron7:</p>
-                        <p>Neural pathways initialized. Persistent memory database connected. Web search integration active. IDE environment loaded. Ready to serve, human.</p>
                         <div class="text-xs text-gray-400 mt-2 terminal-text">[SYSTEM BOOT COMPLETE]</div>
                     </div>
                 </div>
@@ -260,6 +339,10 @@
                             <input type="checkbox" id="ide-toggle" class="form-checkbox h-3 w-3 text-yellow-600" checked>
                             <span class="ml-2">IDE</span>
                         </label>
                     </div>
                     <span id="status-indicator">[SYSTEM READY]</span>
                 </div>
@@ -380,6 +463,7 @@
                             <p class="text-green-400">$ Python 3.10.6 | Java 17.0.6 | Node.js 18.12.1</p>
                             <p class="text-green-400">$ Android SDK tools available</p>
                             <p class="text-green-400">$ APK tools and decompilers ready</p>
                             <p class="text-green-400">$ Ready for commands</p>
                         </div>
                     </div>
@@ -556,6 +640,106 @@
             </div>
         </div>
         <!-- System controls -->
         <div class="mt-6 bg-gray-800 rounded-lg p-4 cyberpunk-border">
             <div class="flex flex-wrap justify-between items-center">
@@ -572,6 +756,9 @@
                     <button id="run-code" class="bg-green-700 hover:bg-green-600 text-white px-3 py-2 rounded text-sm terminal-text">
                         <i class="fas fa-play mr-1"></i> Run Code
                     </button>
                 </div>
                 <div class="flex items-center space-x-4">
                     <div class="flex items-center">
@@ -581,7 +768,7 @@
                         </div>
                     </div>
                     <div class="text-xs terminal-text">
-                        <span class="text-green-400">ACTIVE</span> | <span class="text-blue-400">SEARCH</span> | <span class="text-purple-400">SECURE</span> | <span class="text-yellow-400">IDE</span>
                     </div>
                 </div>
             </div>
@@ -591,7 +778,7 @@
     <!-- Footer -->
     <footer class="mt-8 py-4 px-6 bg-black text-center text-xs text-gray-500 terminal-text cyberpunk-border">
         <p>XORTRON CYBERNETIC LABORATORIES © 2023 | ALL SYSTEMS SECURE | SANDBOXED ENVIRONMENT</p>
-        <p class="mt-1">WARNING: This AI system operates with persistent memory, web search, and full development capabilities.</p>
     </footer>
     <script>
@@ -640,9 +827,11 @@
             const webSearchToggle = document.getElementById('web-search-toggle');
             const memoryToggle = document.getElementById('memory-toggle');
             const ideToggle = document.getElementById('ide-toggle');
             const clearChatButton = document.getElementById('clear-chat');
             const exportChatButton = document.getElementById('export-chat');
             const runCodeButton = document.getElementById('run-code');
             // Tab elements
             const chatTab = document.getElementById('chat-tab');
@@ -650,11 +839,13 @@
             const searchTab = document.getElementById('search-tab');
             const ideTab = document.getElementById('ide-tab');
             const toolsTab = document.getElementById('tools-tab');
             const chatPanel = document.getElementById('chat-panel');
             const memoryPanel = document.getElementById('memory-panel');
             const searchPanel = document.getElementById('search-panel');
             const idePanel = document.getElementById('ide-panel');
             const toolsPanel = document.getElementById('tools-panel');
             // Search elements
             const searchQuery = document.getElementById('search-query');
@@ -667,6 +858,20 @@
             const terminalOutput = document.getElementById('terminal-output');
             const fileItems = document.querySelectorAll('.file-item');
             // Initialize ACE Editor
             editor.setTheme("ace/theme/monokai");
             editor.session.setMode("ace/mode/python");

 <head>
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Xortron7 - Advanced AI Companion with IDE & TTS</title>
     <script src="https://cdn.tailwindcss.com"></script>
     <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
     <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.32.7/ace.js"></script>
             text-align: center;
             font-weight: bold;
         }
+        .tts-controls {
+            background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%);
+            border-radius: 8px;
+            padding: 15px;
+            margin-top: 15px;
+        }
+        .tts-slider {
+            width: 100%;
+            -webkit-appearance: none;
+            height: 8px;
+            border-radius: 4px;
+            background: #4a00e0;
+            outline: none;
+        }
+        .tts-slider::-webkit-slider-thumb {
+            -webkit-appearance: none;
+            appearance: none;
+            width: 18px;
+            height: 18px;
+            border-radius: 50%;
+            background: #8e2de2;
+            cursor: pointer;
+        }
+        .tts-audio-container {
+            display: flex;
+            align-items: center;
+            gap: 10px;
+            margin-top: 10px;
+        }
+        .tts-audio-visualizer {
+            flex-grow: 1;
+            height: 40px;
+            background: rgba(0, 0, 0, 0.3);
+            border-radius: 4px;
+            overflow: hidden;
+            position: relative;
+        }
+        .tts-audio-wave {
+            position: absolute;
+            top: 0;
+            left: 0;
+            width: 100%;
+            height: 100%;
+            background: linear-gradient(90deg, transparent, rgba(138, 43, 226, 0.5), transparent);
+            animation: wave 2s infinite linear;
+        }
+        @keyframes wave {
+            0% { transform: translateX(-100%); }
+            100% { transform: translateX(100%); }
+        }
+        .tts-voice-selector {
+            background: #1a1a2e;
+            border: 1px solid #4a00e0;
+            color: white;
+            padding: 8px;
+            border-radius: 4px;
+            width: 100%;
+        }
+        .tts-voice-option {
+            padding: 8px;
+            background: #16213e;
+        }
+        .tts-voice-option:hover {
+            background: #4a00e0;
+        }
     </style>
 </head>
 <body class="bg-gray-900 text-gray-100 min-h-screen">
             <span class="text-purple-400"><i class="fas fa-shield-alt mr-1"></i> ENCRYPTED</span>
             <span class="ml-4 text-blue-400"><i class="fas fa-search mr-1"></i> WEB SEARCH ENABLED</span>
             <span class="ml-4 text-yellow-400"><i class="fas fa-code mr-1"></i> IDE ACTIVE</span>
+            <span class="ml-4 text-pink-400"><i class="fas fa-volume-up mr-1"></i> TTS ACTIVE</span>
         </div>
     </div>
             <button id="tools-tab" class="px-4 py-2 font-medium text-gray-500">
                 <i class="fas fa-tools mr-2"></i>Tools
             </button>
+            <button id="tts-tab" class="px-4 py-2 font-medium text-gray-500">
+                <i class="fas fa-volume-up mr-2"></i>TTS
+            </button>
         </div>
         <!-- Chat panel -->
                     <div class="bot-message text-white rounded-lg p-4 max-w-xs md:max-w-md lg:max-w-lg relative">
                         <div class="absolute -left-2 top-3 w-4 h-4 rotate-45 bg-indigo-900"></div>
                         <p class="font-bold text-purple-300">Xortron7:</p>
+                        <p>Neural pathways initialized. Persistent memory database connected. Web search integration active. IDE environment loaded. TTS system ready. Ready to serve, human.</p>
                         <div class="text-xs text-gray-400 mt-2 terminal-text">[SYSTEM BOOT COMPLETE]</div>
                     </div>
                 </div>
                             <input type="checkbox" id="ide-toggle" class="form-checkbox h-3 w-3 text-yellow-600" checked>
                             <span class="ml-2">IDE</span>
                         </label>
+                        <label class="inline-flex items-center ml-4">
+                            <input type="checkbox" id="tts-toggle" class="form-checkbox h-3 w-3 text-pink-600" checked>
+                            <span class="ml-2">TTS</span>
+                        </label>
                     </div>
                     <span id="status-indicator">[SYSTEM READY]</span>
                 </div>
                             <p class="text-green-400">$ Python 3.10.6 | Java 17.0.6 | Node.js 18.12.1</p>
                             <p class="text-green-400">$ Android SDK tools available</p>
                             <p class="text-green-400">$ APK tools and decompilers ready</p>
+                            <p class="text-green-400">$ TTS system initialized</p>
                             <p class="text-green-400">$ Ready for commands</p>
                         </div>
                     </div>
             </div>
         </div>
+        <!-- TTS panel (hidden by default) -->
+        <div id="tts-panel" class="hidden bg-gray-800 rounded-lg p-4">
+            <h3 class="text-lg font-bold mb-4 text-pink-300"><i class="fas fa-volume-up mr-2"></i>Text-to-Speech System</h3>
+            <div class="tts-controls">
+                <div class="mb-4">
+                    <label class="block text-sm font-medium text-gray-300 mb-2">Voice Model</label>
+                    <select id="tts-voice" class="tts-voice-selector">
+                        <option value="dia-1.6b">Dia 1.6B (Default)</option>
+                        <option value="male-1">Male Voice 1</option>
+                        <option value="female-1">Female Voice 1</option>
+                        <option value="robot-1">Robotic Voice</option>
+                        <option value="custom">Custom Voice (Upload Sample)</option>
+                    </select>
+                </div>
+                <div class="mb-4">
+                    <label class="block text-sm font-medium text-gray-300 mb-2">Text Input</label>
+                    <textarea id="tts-text" class="w-full bg-gray-700 text-white px-4 py-2 rounded-lg focus:outline-none focus:ring-2 focus:ring-purple-500" rows="4" placeholder="Enter text to convert to speech..."></textarea>
+                </div>
+                <div class="mb-4">
+                    <label class="block text-sm font-medium text-gray-300 mb-2">Audio Prompt (Optional)</label>
+                    <input type="file" id="tts-audio-prompt" class="hidden" accept="audio/*">
+                    <div class="flex items-center gap-2">
+                        <button id="tts-upload-btn" class="bg-purple-600 hover:bg-purple-700 text-white px-4 py-2 rounded-lg">
+                            <i class="fas fa-upload mr-2"></i>Upload Audio
+                        </button>
+                        <span id="tts-audio-filename" class="text-gray-400 text-sm">No file selected</span>
+                    </div>
+                </div>
+                <div class="grid grid-cols-1 md:grid-cols-2 gap-4 mb-4">
+                    <div>
+                        <label class="block text-sm font-medium text-gray-300 mb-2">Speed Factor</label>
+                        <input type="range" id="tts-speed" min="0.8" max="1.2" step="0.05" value="0.94" class="tts-slider">
+                        <div class="flex justify-between text-xs text-gray-400 mt-1">
+                            <span>0.8x</span>
+                            <span>1.0x</span>
+                            <span>1.2x</span>
+                        </div>
+                    </div>
+                    <div>
+                        <label class="block text-sm font-medium text-gray-300 mb-2">Temperature</label>
+                        <input type="range" id="tts-temperature" min="1.0" max="1.5" step="0.05" value="1.3" class="tts-slider">
+                        <div class="flex justify-between text-xs text-gray-400 mt-1">
+                            <span>1.0</span>
+                            <span>1.25</span>
+                            <span>1.5</span>
+                        </div>
+                    </div>
+                </div>
+                <div class="grid grid-cols-1 md:grid-cols-2 gap-4 mb-4">
+                    <div>
+                        <label class="block text-sm font-medium text-gray-300 mb-2">CFG Scale</label>
+                        <input type="range" id="tts-cfg-scale" min="1.0" max="5.0" step="0.1" value="3.0" class="tts-slider">
+                        <div class="flex justify-between text-xs text-gray-400 mt-1">
+                            <span>1.0</span>
+                            <span>3.0</span>
+                            <span>5.0</span>
+                        </div>
+                    </div>
+                    <div>
+                        <label class="block text-sm font-medium text-gray-300 mb-2">Top P</label>
+                        <input type="range" id="tts-top-p" min="0.8" max="1.0" step="0.01" value="0.95" class="tts-slider">
+                        <div class="flex justify-between text-xs text-gray-400 mt-1">
+                            <span>0.8</span>
+                            <span>0.9</span>
+                            <span>1.0</span>
+                        </div>
+                    </div>
+                </div>
+                <button id="tts-generate-btn" class="w-full bg-pink-600 hover:bg-pink-700 text-white px-4 py-3 rounded-lg font-bold transition-all duration-200 glow-button">
+                    <i class="fas fa-play mr-2"></i> Generate Speech
+                </button>
+                <div class="tts-audio-container mt-4">
+                    <div class="tts-audio-visualizer">
+                        <div id="tts-wave" class="tts-audio-wave" style="display: none;"></div>
+                    </div>
+                    <audio id="tts-audio-player" controls class="hidden"></audio>
+                </div>
+            </div>
+            <div class="mt-6 bg-gray-700 rounded-lg p-4">
+                <h4 class="font-bold text-purple-300 mb-2"><i class="fas fa-info-circle mr-2"></i>TTS System Info</h4>
+                <div class="terminal-text text-sm">
+                    <p>Model: Dia-1.6B (Nari Labs)</p>
+                    <p>Sample Rate: 44.1kHz</p>
+                    <p>Max Tokens: 3072</p>
+                    <p>Status: <span class="text-green-400">Ready</span></p>
+                    <p class="mt-2 text-gray-400">Supports multi-speaker dialogue generation with optional audio prompts for voice matching.</p>
+                </div>
+            </div>
+        </div>
         <!-- System controls -->
         <div class="mt-6 bg-gray-800 rounded-lg p-4 cyberpunk-border">
             <div class="flex flex-wrap justify-between items-center">
                     <button id="run-code" class="bg-green-700 hover:bg-green-600 text-white px-3 py-2 rounded text-sm terminal-text">
                         <i class="fas fa-play mr-1"></i> Run Code
                     </button>
+                    <button id="tts-chat" class="bg-pink-700 hover:bg-pink-600 text-white px-3 py-2 rounded text-sm terminal-text">
+                        <i class="fas fa-volume-up mr-1"></i> TTS Chat
+                    </button>
                 </div>
                 <div class="flex items-center space-x-4">
                     <div class="flex items-center">
                         </div>
                     </div>
                     <div class="text-xs terminal-text">
+                        <span class="text-green-400">ACTIVE</span> | <span class="text-blue-400">SEARCH</span> | <span class="text-purple-400">SECURE</span> | <span class="text-yellow-400">IDE</span> | <span class="text-pink-400">TTS</span>
                     </div>
                 </div>
             </div>
     <!-- Footer -->
     <footer class="mt-8 py-4 px-6 bg-black text-center text-xs text-gray-500 terminal-text cyberpunk-border">
         <p>XORTRON CYBERNETIC LABORATORIES © 2023 | ALL SYSTEMS SECURE | SANDBOXED ENVIRONMENT</p>
+        <p class="mt-1">WARNING: This AI system operates with persistent memory, web search, full development capabilities, and advanced TTS.</p>
     </footer>
     <script>
             const webSearchToggle = document.getElementById('web-search-toggle');
             const memoryToggle = document.getElementById('memory-toggle');
             const ideToggle = document.getElementById('ide-toggle');
+            const ttsToggle = document.getElementById('tts-toggle');
             const clearChatButton = document.getElementById('clear-chat');
             const exportChatButton = document.getElementById('export-chat');
             const runCodeButton = document.getElementById('run-code');
+            const ttsChatButton = document.getElementById('tts-chat');
             // Tab elements
             const chatTab = document.getElementById('chat-tab');
             const searchTab = document.getElementById('search-tab');
             const ideTab = document.getElementById('ide-tab');
             const toolsTab = document.getElementById('tools-tab');
+            const ttsTab = document.getElementById('tts-tab');
             const chatPanel = document.getElementById('chat-panel');
             const memoryPanel = document.getElementById('memory-panel');
             const searchPanel = document.getElementById('search-panel');
             const idePanel = document.getElementById('ide-panel');
             const toolsPanel = document.getElementById('tools-panel');
+            const ttsPanel = document.getElementById('tts-panel');
             // Search elements
             const searchQuery = document.getElementById('search-query');
             const terminalOutput = document.getElementById('terminal-output');
             const fileItems = document.querySelectorAll('.file-item');
+            // TTS elements
+            const ttsVoice = document.getElementById('tts-voice');
+            const ttsText = document.getElementById('tts-text');
+            const ttsAudioPrompt = document.getElementById('tts-audio-prompt');
+            const ttsUploadBtn = document.getElementById('tts-upload-btn');
+            const ttsAudioFilename = document.getElementById('tts-audio-filename');
+            const ttsSpeed = document.getElementById('tts-speed');
+            const ttsTemperature = document.getElementById('tts-temperature');
+            const ttsCfgScale = document.getElementById('tts-cfg-scale');
+            const ttsTopP = document.getElementById('tts-top-p');
+            const ttsGenerateBtn = document.getElementById('tts-generate-btn');
+            const ttsWave = document.getElementById('tts-wave');
+            const ttsAudioPlayer = document.getElementById('tts-audio-player');
             // Initialize ACE Editor
             editor.setTheme("ace/theme/monokai");
             editor.session.setMode("ace/mode/python");

prompts.txt CHANGED Viewed

@@ -1,2 +1,3 @@
 Please include web search functionality with a duckduckgo.com integration and let's make this fully functional operational and a hundred percent working primarily, and most importantly, with the a I properly installed and responding and set up with long term persistent memory with the database, API and server set up through the same integration.
-Let's really upscale this and include an embedded built in pre installed IDE sand-boxed and fully loaded with an advanced vscode setup for security systems management cybersecurity NSFW and adult content, as well as Android app. Development and APK modification with cutlin and python react Java script. Html css all pre installed with any other essential packages included. along with

 Please include web search functionality with a duckduckgo.com integration and let's make this fully functional operational and a hundred percent working primarily, and most importantly, with the a I properly installed and responding and set up with long term persistent memory with the database, API and server set up through the same integration.
+Let's really upscale this and include an embedded built in pre installed IDE sand-boxed and fully loaded with an advanced vscode setup for security systems management cybersecurity NSFW and adult content, as well as Android app. Development and APK modification with cutlin and python react Java script. Html css all pre installed with any other essential packages included. along with
+Now add TTS to the setup by combining the code we already generated with this gradio space code:raw  Copy download link history blame contribute delete  15.3 kB import tempfile import time from pathlib import Path from typing import Optional, Tuple import spaces  import gradio as gr import numpy as np import soundfile as sf import torch  from dia.model import Dia   # Load Nari model and config print("Loading Nari model...") try:     # Use the function from inference.py     model = Dia.from_pretrained("nari-labs/Dia-1.6B", compute_dtype="float32") except Exception as e:     print(f"Error loading Nari model: {e}")     raise   @spaces.GPU def run_inference(     text_input: str,     audio_prompt_input: Optional[Tuple[int, np.ndarray]],     max_new_tokens: int,     cfg_scale: float,     temperature: float,     top_p: float,     cfg_filter_top_k: int,     speed_factor: float, ):     """     Runs Nari inference using the globally loaded model and provided inputs.     Uses temporary files for text and audio prompt compatibility with inference.generate.     """     # global model, device  # Access global model, config, device      if not text_input or text_input.isspace():         raise gr.Error("Text input cannot be empty.")      temp_txt_file_path = None     temp_audio_prompt_path = None     output_audio = (44100, np.zeros(1, dtype=np.float32))      try:         prompt_path_for_generate = None         if audio_prompt_input is not None:             sr, audio_data = audio_prompt_input             # Check if audio_data is valid             if (                 audio_data is None or audio_data.size == 0 or audio_data.max() == 0             ):  # Check for silence/empty                 gr.Warning("Audio prompt seems empty or silent, ignoring prompt.")             else:                 # Save prompt audio to a temporary WAV file                 with tempfile.NamedTemporaryFile(                     mode="wb", suffix=".wav", delete=False                 ) as f_audio:                     temp_audio_prompt_path = f_audio.name  # Store path for cleanup                      # Basic audio preprocessing for consistency                     # Convert to float32 in [-1, 1] range if integer type                     if np.issubdtype(audio_data.dtype, np.integer):                         max_val = np.iinfo(audio_data.dtype).max                         audio_data = audio_data.astype(np.float32) / max_val                     elif not np.issubdtype(audio_data.dtype, np.floating):                         gr.Warning(                             f"Unsupported audio prompt dtype {audio_data.dtype}, attempting conversion."                         )                         # Attempt conversion, might fail for complex types                         try:                             audio_data = audio_data.astype(np.float32)                         except Exception as conv_e:                             raise gr.Error(                                 f"Failed to convert audio prompt to float32: {conv_e}"                             )                      # Ensure mono (average channels if stereo)                     if audio_data.ndim > 1:                         if audio_data.shape[0] == 2:  # Assume (2, N)                             audio_data = np.mean(audio_data, axis=0)                         elif audio_data.shape[1] == 2:  # Assume (N, 2)                             audio_data = np.mean(audio_data, axis=1)                         else:                             gr.Warning(                                 f"Audio prompt has unexpected shape {audio_data.shape}, taking first channel/axis."                             )                             audio_data = (                                 audio_data[0]                                 if audio_data.shape[0] < audio_data.shape[1]                                 else audio_data[:, 0]                             )                         audio_data = np.ascontiguousarray(                             audio_data                         )  # Ensure contiguous after slicing/mean                      # Write using soundfile                     try:                         sf.write(                             temp_audio_prompt_path, audio_data, sr, subtype="FLOAT"                         )  # Explicitly use FLOAT subtype                         prompt_path_for_generate = temp_audio_prompt_path                         print(                             f"Created temporary audio prompt file: {temp_audio_prompt_path} (orig sr: {sr})"                         )                     except Exception as write_e:                         print(f"Error writing temporary audio file: {write_e}")                         raise gr.Error(f"Failed to save audio prompt: {write_e}")          # 3. Run Generation          start_time = time.time()          # Use torch.inference_mode() context manager for the generation call         with torch.inference_mode():             output_audio_np = model.generate(                 text_input,                 max_tokens=max_new_tokens,                 cfg_scale=cfg_scale,                 temperature=temperature,                 top_p=top_p,                 cfg_filter_top_k=cfg_filter_top_k,  # Pass the value here                 use_torch_compile=False,  # Keep False for Gradio stability                 audio_prompt=prompt_path_for_generate,             )          end_time = time.time()         print(f"Generation finished in {end_time - start_time:.2f} seconds.")          # 4. Convert Codes to Audio         if output_audio_np is not None:             # Get sample rate from the loaded DAC model             output_sr = 44100              # --- Slow down audio ---             original_len = len(output_audio_np)             # Ensure speed_factor is positive and not excessively small/large to avoid issues             speed_factor = max(0.1, min(speed_factor, 5.0))             target_len = int(                 original_len / speed_factor             )  # Target length based on speed_factor             if (                 target_len != original_len and target_len > 0             ):  # Only interpolate if length changes and is valid                 x_original = np.arange(original_len)                 x_resampled = np.linspace(0, original_len - 1, target_len)                 resampled_audio_np = np.interp(x_resampled, x_original, output_audio_np)                 output_audio = (                     output_sr,                     resampled_audio_np.astype(np.float32),                 )  # Use resampled audio                 print(                     f"Resampled audio from {original_len} to {target_len} samples for {speed_factor:.2f}x speed."                 )             else:                 output_audio = (                     output_sr,                     output_audio_np,                 )  # Keep original if calculation fails or no change                 print(f"Skipping audio speed adjustment (factor: {speed_factor:.2f}).")             # --- End slowdown ---              print(                 f"Audio conversion successful. Final shape: {output_audio[1].shape}, Sample Rate: {output_sr}"             )              # Explicitly convert to int16 to prevent Gradio warning             if (                 output_audio[1].dtype == np.float32                 or output_audio[1].dtype == np.float64             ):                 audio_for_gradio = np.clip(output_audio[1], -1.0, 1.0)                 audio_for_gradio = (audio_for_gradio * 32767).astype(np.int16)                 output_audio = (output_sr, audio_for_gradio)                 print("Converted audio to int16 for Gradio output.")          else:             print("\nGeneration finished, but no valid tokens were produced.")             # Return default silence             gr.Warning("Generation produced no output.")      except Exception as e:         print(f"Error during inference: {e}")         import traceback          traceback.print_exc()         # Re-raise as Gradio error to display nicely in the UI         raise gr.Error(f"Inference failed: {e}")      finally:         # 5. Cleanup Temporary Files defensively         if temp_txt_file_path and Path(temp_txt_file_path).exists():             try:                 Path(temp_txt_file_path).unlink()                 print(f"Deleted temporary text file: {temp_txt_file_path}")             except OSError as e:                 print(                     f"Warning: Error deleting temporary text file {temp_txt_file_path}: {e}"                 )         if temp_audio_prompt_path and Path(temp_audio_prompt_path).exists():             try:                 Path(temp_audio_prompt_path).unlink()                 print(f"Deleted temporary audio prompt file: {temp_audio_prompt_path}")             except OSError as e:                 print(                     f"Warning: Error deleting temporary audio prompt file {temp_audio_prompt_path}: {e}"                 )      return output_audio   # --- Create Gradio Interface --- css = """ #col-container {max-width: 90%; margin-left: auto; margin-right: auto;} """ # Attempt to load default text from example.txt default_text = "[S1] Dia is an open weights text to dialogue model. \n[S2] You get full control over scripts and voices. \n[S1] Wow. Amazing. (laughs) \n[S2] Try it now on Git hub or Hugging Face." example_txt_path = Path("./example.txt") if example_txt_path.exists():     try:         default_text = example_txt_path.read_text(encoding="utf-8").strip()         if not default_text:  # Handle empty example file             default_text = "Example text file was empty."     except Exception as e:         print(f"Warning: Could not read example.txt: {e}")   # Build Gradio UI with gr.Blocks(css=css) as demo:     gr.Markdown("# Nari Text-to-Speech Synthesis")      with gr.Row(equal_height=False):         with gr.Column(scale=1):             text_input = gr.Textbox(                 label="Input Text",                 placeholder="Enter text here...",                 value=default_text,                 lines=5,  # Increased lines             )             audio_prompt_input = gr.Audio(                 label="Audio Prompt (Optional)",                 show_label=True,                 sources=["upload", "microphone"],                 type="numpy",             )             with gr.Accordion("Generation Parameters", open=False):                 max_new_tokens = gr.Slider(                     label="Max New Tokens (Audio Length)",                     minimum=860,                     maximum=3072,                     value=model.config.data.audio_length,  # Use config default if available, else fallback                     step=50,                     info="Controls the maximum length of the generated audio (more tokens = longer audio).",                 )                 cfg_scale = gr.Slider(                     label="CFG Scale (Guidance Strength)",                     minimum=1.0,                     maximum=5.0,                     value=3.0,  # Default from inference.py                     step=0.1,                     info="Higher values increase adherence to the text prompt.",                 )                 temperature = gr.Slider(                     label="Temperature (Randomness)",                     minimum=1.0,                     maximum=1.5,                     value=1.3,  # Default from inference.py                     step=0.05,                     info="Lower values make the output more deterministic, higher values increase randomness.",                 )                 top_p = gr.Slider(                     label="Top P (Nucleus Sampling)",                     minimum=0.80,                     maximum=1.0,                     value=0.95,  # Default from inference.py                     step=0.01,                     info="Filters vocabulary to the most likely tokens cumulatively reaching probability P.",                 )                 cfg_filter_top_k = gr.Slider(                     label="CFG Filter Top K",                     minimum=15,                     maximum=50,                     value=30,                     step=1,                     info="Top k filter for CFG guidance.",                 )                 speed_factor_slider = gr.Slider(                     label="Speed Factor",                     minimum=0.8,                     maximum=1.0,                     value=0.94,                     step=0.02,                     info="Adjusts the speed of the generated audio (1.0 = original speed).",                 )              run_button = gr.Button("Generate Audio", variant="primary")          with gr.Column(scale=1):             audio_output = gr.Audio(                 label="Generated Audio",                 type="numpy",                 autoplay=False,             )      # Link button click to function     run_button.click(         fn=run_inference,         inputs=[             text_input,             audio_prompt_input,             max_new_tokens,             cfg_scale,             temperature,             top_p,             cfg_filter_top_k,             speed_factor_slider,         ],         outputs=[audio_output],  # Add status_output here if using it         api_name="generate_audio",     )      # Add examples (ensure the prompt path is correct or remove it if example file doesn't exist)     example_prompt_path = "./example_prompt.mp3"  # Adjust if needed     examples_list = [         [             "[S1] Oh fire! Oh my goodness! What's the procedure? What to we do people? The smoke could be coming through an air duct! \n[S2] Oh my god! Okay.. it's happening. Everybody stay calm! \n[S1] What's the procedure... \n[S2] Everybody stay fucking calm!!!... Everybody fucking calm down!!!!! \n[S1] No! No! If you touch the handle, if its hot there might be a fire down the hallway! ",             None,             3072,             3.0,             1.3,             0.95,             35,             0.94,         ],         [             "[S1] Open weights text to dialogue model. \n[S2] You get full control over scripts and voices. \n[S1] I'm biased, but I think we clearly won. \n[S2] Hard to disagree. (laughs) \n[S1] Thanks for listening to this demo. \n[S2] Try it now on Git hub and Hugging Face. \n[S1] If you liked our model, please give us a star and share to your friends. \n[S2] This was Nari Labs.",             example_prompt_path if Path(example_prompt_path).exists() else None,             3072,             3.0,             1.3,             0.95,             35,             0.94,         ],     ]      if examples_list:         gr.Examples(             examples=examples_list,             inputs=[                 text_input,                 audio_prompt_input,                 max_new_tokens,                 cfg_scale,                 temperature,                 top_p,                 cfg_filter_top_k,                 speed_factor_slider,             ],             outputs=[audio_output],             fn=run_inference,             cache_examples=False,             label="Examples (Click to Run)",         )     else:         gr.Markdown("_(No examples configured or example prompt file missing)_")  # --- Launch the App --- if __name__ == "__main__":     print("Launching Gradio interface...")      # set `GRADIO_SERVER_NAME`, `GRADIO_SERVER_PORT` env vars to override default values     # use `GRADIO_SERVER_NAME=0.0.0.0` for Docker     demo.launch()