Spaces:

AIencoder
/

Axon

Running

App Files Files Community

AIencoder commited on Jan 25

Commit

8b257cb

verified ·

1 Parent(s): 0b1db20

Update app.py

Browse files

Files changed (1) hide show

app.py +445 -415

app.py CHANGED Viewed

@@ -1,55 +1,77 @@
 import gradio as gr
-import requests
 import json
 import time
 import os
 from faster_whisper import WhisperModel
-OLLAMA_URL = "http://localhost:11434"
-MAX_RETRIES = 3
-TIMEOUT = 300
-# ===== PERFORMANCE SETTINGS =====
-OLLAMA_OPTIONS = {
-    "num_ctx": 4096,        # Context window (lower = faster, 4096 is good balance)
-    "num_batch": 512,       # Batch size for prompt processing
-    "num_thread": 4,        # CPU threads (adjust based on your CPU)
-    "repeat_penalty": 1.1,  # Prevent repetition
-    "top_k": 40,            # Top-K sampling (lower = faster)
-    "top_p": 0.9,           # Nucleus sampling
-}
-# Keep model loaded for 10 minutes (faster subsequent requests)
-KEEP_ALIVE = "10m"
 MODELS = {
-    "⭐ Qwen2.5 Coder 7B (Best)": "qwen2.5-coder:7b",
-    "🧠 DeepSeek Coder 6.7B (Logic)": "deepseek-coder:6.7b",
-    "Qwen2.5 Coder 3B (Fast)": "qwen2.5-coder:3b",
-    "Qwen2.5 Coder 1.5B (Fastest)": "qwen2.5-coder:1.5b",
-    "DeepSeek Coder 1.3B (Fast)": "deepseek-coder:1.3b",
-    "StarCoder2 3B": "starcoder2:3b",
-    "CodeGemma 2B (Fast)": "codegemma:2b",
 }
 MODEL_INFO = {
-    "⭐ Qwen2.5 Coder 7B (Best)": "🏆 Best overall • ~4.5GB • Recommended",
-    "🧠 DeepSeek Coder 6.7B (Logic)": "🧠 Best for algorithms & logic • ~3.8GB",
-    "Qwen2.5 Coder 3B (Fast)": "⚖️ Balanced • ~2GB • Great all-rounder",
-    "Qwen2.5 Coder 1.5B (Fastest)": "⚡ Fastest • ~1GB • Quick tasks",
-    "DeepSeek Coder 1.3B (Fast)": "⚡ Fast • ~0.8GB • Quick logic",
-    "StarCoder2 3B": "🐙 GitHub trained • ~1.7GB • Real patterns",
-    "CodeGemma 2B (Fast)": "🔷 Google • ~1.6GB • Quick & efficient",
 }
 LANGUAGES = [
     "Python", "JavaScript", "TypeScript", "Go", "Rust",
     "Java", "C++", "C#", "C", "PHP", "Ruby", "Swift", "Kotlin",
-    "Scala", "R", "MATLAB", "Julia", "Perl",
-    "HTML/CSS", "SQL", "Bash", "PowerShell", "Lua"
 ]
-# ===== WHISPER INIT =====
 whisper_model = None
 def init_whisper():
@@ -58,139 +80,64 @@ def init_whisper():
         print("Loading Whisper...")
         whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8")
         print("✅ Whisper ready!")
-        return True
     except Exception as e:
-        print(f"❌ Whisper failed to load: {e}")
-        return False
 init_whisper()
-# ===== PRELOAD DEFAULT MODEL =====
-def preload_model():
-    """Preload default model for faster first request"""
-    try:
-        print("🔥 Preloading default model...")
-        requests.post(
-            f"{OLLAMA_URL}/api/generate",
-            json={
-                "model": "qwen2.5-coder:3b",
-                "prompt": "Hi",
-                "keep_alive": KEEP_ALIVE,
-                "options": {"num_predict": 1}
-            },
-            timeout=60
-        )
-        print("✅ Model preloaded!")
-    except Exception as e:
-        print(f"⚠️ Preload failed: {e}")
-# ===== HELPER FUNCTIONS =====
-def check_ollama_health():
-    try:
-        r = requests.get(f"{OLLAMA_URL}/api/tags", timeout=5)
-        return r.status_code == 200
-    except:
-        return False
 def get_status():
-    try:
-        r = requests.get(f"{OLLAMA_URL}/api/tags", timeout=5)
-        if r.status_code == 200:
-            models = r.json().get("models", [])
-            return f"🟢 Online • {len(models)} models"
-    except requests.exceptions.ConnectionError:
-        return "🔴 Offline • Ollama not running"
-    except requests.exceptions.Timeout:
-        return "🟡 Slow • Connection timeout"
-    except Exception as e:
-        return f"🔴 Error • {str(e)[:30]}"
-    return "🟡 Starting..."
 def get_model_info(model_name):
     return MODEL_INFO.get(model_name, "")
-def validate_input(text, field_name="Input"):
     if not text or not text.strip():
-        return False, f"⚠️ {field_name} cannot be empty."
-    if len(text) > 100000:
-        return False, f"⚠️ {field_name} is too long (max 100KB)."
     return True, None
 def transcribe_audio(audio):
-    if audio is None:
         return ""
-    if whisper_model is None:
-        return "❌ Whisper not loaded. Voice input unavailable."
     try:
         segments, _ = whisper_model.transcribe(audio)
-        text = " ".join([seg.text for seg in segments]).strip()
-        if not text:
-            return "⚠️ No speech detected. Try again."
-        return text
-    except FileNotFoundError:
-        return "❌ Audio file not found."
     except Exception as e:
-        return f"❌ Transcription failed: {str(e)[:50]}"
-def call_ollama_with_retry(model_name, prompt, temperature=0.7, max_tokens=2048):
-    if not check_ollama_health():
-        return "❌ **Ollama is not running.**\n\nPlease wait for Ollama to start."
-    model = MODELS.get(model_name, "qwen2.5-coder:3b")
-    # Merge performance options with request options
-    options = {
-        **OLLAMA_OPTIONS,
-        "temperature": temperature,
-        "num_predict": max_tokens
-    }
-    for attempt in range(MAX_RETRIES):
-        try:
-            r = requests.post(
-                f"{OLLAMA_URL}/api/generate",
-                json={
-                    "model": model,
-                    "prompt": prompt,
-                    "stream": False,
-                    "keep_alive": KEEP_ALIVE,
-                    "options": options
-                },
-                timeout=TIMEOUT
-            )
-            if r.status_code == 200:
-                response = r.json().get("response", "")
-                if not response.strip():
-                    return "⚠️ Model returned empty response. Try rephrasing."
-                return response
-            elif r.status_code == 404:
-                return f"❌ **Model not found:** `{model}`"
-            elif r.status_code == 500:
-                error_msg = r.text[:200] if r.text else "Unknown error"
-                if "out of memory" in error_msg.lower():
-                    return "❌ **Out of memory.** Try a smaller model."
-                return f"❌ **Server error:** {error_msg}"
-            else:
-                return f"❌ **HTTP {r.status_code}:** {r.text[:100]}"
-        except requests.exceptions.Timeout:
-            if attempt < MAX_RETRIES - 1:
-                time.sleep(2)
-                continue
-            return "❌ **Timeout.** Try smaller model or shorter input."
-        except requests.exceptions.ConnectionError:
-            if attempt < MAX_RETRIES - 1:
-                time.sleep(2)
-                continue
-            return "❌ **Connection failed.** Ollama may have crashed."
-        except json.JSONDecodeError:
-            return "❌ **Invalid response from Ollama.**"
-        except Exception as e:
-            return f"❌ **Error:** {str(e)[:100]}"
-    return "❌ **Max retries reached.**"
 def extract_code(text):
     if not text or "```" not in text:
@@ -206,7 +153,7 @@ def extract_code(text):
         pass
     return text
-# ===== STREAMING FUNCTIONS (FASTER FEEDBACK) =====
 def chat_stream(message, history, model_name, temperature, max_tokens):
     valid, error = validate_input(message, "Message")
@@ -214,273 +161,252 @@ def chat_stream(message, history, model_name, temperature, max_tokens):
         yield history + [[message, error]]
         return
-    if not check_ollama_health():
-        yield history + [[message, "❌ **Ollama not running.**"]]
         return
-    model = MODELS.get(model_name, "qwen2.5-coder:3b")
-    messages = [{"role": "system", "content": "Expert coding assistant. Use markdown code blocks."}]
-    for user_msg, assistant_msg in history:
-        messages.append({"role": "user", "content": user_msg})
-        if assistant_msg:
-            messages.append({"role": "assistant", "content": assistant_msg})
-    messages.append({"role": "user", "content": message})
-    options = {
-        **OLLAMA_OPTIONS,
-        "temperature": temperature,
-        "num_predict": max_tokens
-    }
     try:
-        response = requests.post(
-            f"{OLLAMA_URL}/api/chat",
-            json={
-                "model": model,
-                "messages": messages,
-                "stream": True,
-                "keep_alive": KEEP_ALIVE,
-                "options": options
-            },
-            stream=True,
-            timeout=TIMEOUT
-        )
-        if response.status_code == 404:
-            yield history + [[message, f"❌ **Model not found:** `{model}`"]]
-            return
-        if response.status_code != 200:
-            yield history + [[message, f"❌ **Error {response.status_code}**"]]
-            return
         full = ""
-        for line in response.iter_lines():
-            if line:
-                try:
-                    data = json.loads(line)
-                    if "error" in data:
-                        yield history + [[message, f"❌ **Error:** {data['error']}"]]
-                        return
-                    if "message" in data:
-                        full += data["message"].get("content", "")
-                        yield history + [[message, full]]
-                except json.JSONDecodeError:
-                    continue
-        if not full.strip():
-            yield history + [[message, "⚠️ Empty response. Try rephrasing."]]
-    except requests.exceptions.Timeout:
-        yield history + [[message, "❌ **Timeout.** Try smaller model."]]
-    except requests.exceptions.ConnectionError:
-        yield history + [[message, "❌ **Connection lost.**"]]
     except Exception as e:
-        yield history + [[message, f"❌ **Error:** {str(e)[:50]}"]]
-# ===== STREAMING CODE GENERATION (NEW!) =====
-def generate_code_stream(prompt, language, model_name, temperature, max_tokens):
     valid, error = validate_input(prompt, "Description")
     if not valid:
         yield error
         return
-    if not check_ollama_health():
-        yield "❌ **Ollama not running.**"
         return
-    model = MODELS.get(model_name, "qwen2.5-coder:3b")
-    # Shorter, optimized prompt
-    full_prompt = f"Write clean {language} code with comments for:\n{prompt}\n\nCode only:"
-    options = {
-        **OLLAMA_OPTIONS,
-        "temperature": temperature,
-        "num_predict": max_tokens
-    }
     try:
-        response = requests.post(
-            f"{OLLAMA_URL}/api/generate",
-            json={
-                "model": model,
-                "prompt": full_prompt,
-                "stream": True,
-                "keep_alive": KEEP_ALIVE,
-                "options": options
-            },
-            stream=True,
-            timeout=TIMEOUT
-        )
-        if response.status_code != 200:
-            yield f"❌ **Error {response.status_code}**"
-            return
         full = ""
-        for line in response.iter_lines():
-            if line:
-                try:
-                    data = json.loads(line)
-                    if "response" in data:
-                        full += data["response"]
-                        # Extract code as we stream
-                        yield extract_code(full)
-                except:
-                    continue
-    except requests.exceptions.Timeout:
-        yield "❌ **Timeout.**"
     except Exception as e:
-        yield f"❌ **Error:** {str(e)[:50]}"
-# ===== OPTIMIZED CORE FUNCTIONS (SHORTER PROMPTS) =====
-def explain_code(code, model_name, detail_level, max_tokens):
-    valid, error = validate_input(code, "Code")
     if not valid:
-        return error
     prompts = {
         "Brief": f"Explain briefly (2-3 sentences):\n{code}",
         "Normal": f"Explain this code:\n{code}",
         "Detailed": f"Detailed explanation (purpose, logic, complexity, improvements):\n{code}"
     }
-    return call_ollama_with_retry(model_name, prompts.get(detail_level, prompts["Normal"]), 0.5, max_tokens)
 def fix_code(code, error_msg, model_name, max_tokens):
-    valid, error = validate_input(code, "Code")
     if not valid:
-        return error
-    err = error_msg.strip() if error_msg else "Not working"
-    prompt = f"Fix this code. Error: {err}\n\n{code}\n\nFixed code and explanation:"
-    return call_ollama_with_retry(model_name, prompt, 0.3, max_tokens)
 def review_code(code, model_name, max_tokens):
-    valid, error = validate_input(code, "Code")
     if not valid:
-        return error
-    prompt = f"Review for bugs, performance, security, and improvements:\n{code}"
-    return call_ollama_with_retry(model_name, prompt, 0.4, max_tokens)
-def convert_code(code, source_lang, target_lang, model_name, max_tokens):
-    valid, error = validate_input(code, "Code")
     if not valid:
-        return error
-    if source_lang == target_lang:
-        return "⚠️ Same language selected."
-    prompt = f"Convert {source_lang} to {target_lang}. Output only code:\n{code}"
-    result = call_ollama_with_retry(model_name, prompt, 0.3, max_tokens)
-    return result if result.startswith("❌") or result.startswith("⚠️") else extract_code(result)
 def generate_tests(code, language, framework, model_name, max_tokens):
-    valid, error = validate_input(code, "Code")
     if not valid:
-        return error
-    fw = framework.strip() if framework else "pytest" if language == "Python" else "Jest"
-    prompt = f"Generate {fw} tests for this {language} code. Output only test code:\n{code}"
-    result = call_ollama_with_retry(model_name, prompt, 0.3, max_tokens)
-    return result if result.startswith("❌") or result.startswith("⚠️") else extract_code(result)
 def document_code(code, language, style, model_name, max_tokens):
-    valid, error = validate_input(code, "Code")
     if not valid:
-        return error
-    prompt = f"Add {style.lower()} to this {language} code:\n{code}"
-    result = call_ollama_with_retry(model_name, prompt, 0.4, max_tokens)
     return result if style == "README" or result.startswith("❌") else extract_code(result)
 def optimize_code(code, language, focus, model_name, max_tokens):
-    valid, error = validate_input(code, "Code")
     if not valid:
-        return error
-    prompt = f"Optimize for {focus.lower()}. Explain changes:\n{code}"
-    return call_ollama_with_retry(model_name, prompt, 0.3, max_tokens)
 def build_regex(description, model_name, max_tokens):
-    valid, error = validate_input(description, "Description")
     if not valid:
-        return error
-    prompt = f"Create regex for: {description}\n\nPattern, explanation, examples, Python code:"
-    return call_ollama_with_retry(model_name, prompt, 0.3, max_tokens)
 def build_api(description, framework, model_name, max_tokens):
-    valid, error = validate_input(description, "Description")
     if not valid:
-        return error
-    prompt = f"Create {framework} REST endpoint:\n{description}\n\nCode with validation and error handling:"
-    result = call_ollama_with_retry(model_name, prompt, 0.3, max_tokens)
     return result if result.startswith("❌") else extract_code(result)
-# ===== CSS =====
-css = """
-:root {
-    --primary: #6366f1;
-    --secondary: #8b5cf6;
-    --bg-dark: #0f172a;
-    --bg-card: #1e293b;
-    --border: #334155;
-    --text-primary: #f1f5f9;
-    --gradient: linear-gradient(135deg, #6366f1 0%, #8b5cf6 50%, #06b6d4 100%);
-}
-.gradio-container { max-width: 1500px !important; margin: auto !important; }
-.header-section {
-    background: var(--gradient);
-    border-radius: 20px;
-    padding: 32px 40px;
-    margin-bottom: 24px;
-    box-shadow: 0 20px 40px rgba(99, 102, 241, 0.3);
-}
-.header-title { color: white; margin: 0; font-size: 2.5rem; font-weight: 800; }
-.header-subtitle { color: rgba(255,255,255,0.9); margin: 8px 0 0 0; }
-.header-badges { display: flex; gap: 10px; flex-wrap: wrap; margin-top: 16px; }
-.badge {
-    background: rgba(255,255,255,0.2);
-    padding: 8px 16px;
-    border-radius: 50px;
-    font-size: 0.85rem;
-    color: white;
-}
-.tab-nav button.selected {
-    background: var(--gradient) !important;
-    color: white !important;
-}
-button.primary {
-    background: var(--gradient) !important;
-    border: none !important;
-    border-radius: 10px !important;
-}
-.footer { text-align: center; padding: 24px; color: #94a3b8; font-size: 0.85rem; }
-footer { display: none !important; }
-"""
 # ===== UI =====
 with gr.Blocks(title="Axon v6") as demo:
     gr.HTML("""
-    <div class="header-section">
-        <h1 class="header-title">🔥 Axon v6</h1>
-        <p class="header-subtitle">AI-Powered Coding Assistant • Optimized for Speed</p>
-        <div class="header-badges">
-            <span class="badge">🤖 7 Models</span>
-            <span class="badge">🛠️ 9 Tools</span>
-            <span class="badge">⚡ Optimized</span>
-            <span class="badge">🔒 100% Local</span>
         </div>
     </div>
     """)
@@ -490,127 +416,219 @@ with gr.Blocks(title="Axon v6") as demo:
     with gr.Row():
         model_dropdown = gr.Dropdown(choices=list(MODELS.keys()), value="Qwen2.5 Coder 3B (Fast)", label="🤖 Model", scale=3)
         temperature = gr.Slider(0, 1, value=0.7, step=0.1, label="🌡️ Creativity", scale=2)
-        max_tokens = gr.Slider(256, 8192, value=2048, step=256, label="📏 Max Tokens", scale=2)
-    model_info = gr.Markdown(value="⚖️ Balanced • ~2GB • Great all-rounder")
     model_dropdown.change(get_model_info, model_dropdown, model_info)
     with gr.Tabs():
         with gr.TabItem("💬 Chat"):
-            chatbot = gr.Chatbot(height=500)
             with gr.Row():
                 msg = gr.Textbox(placeholder="Ask anything...", show_label=False, scale=8)
-                send = gr.Button("Send ➤", variant="primary", scale=1)
             with gr.Row():
                 audio = gr.Audio(sources=["microphone"], type="filepath", label="🎤", scale=2)
                 transcribe = gr.Button("🎤 Transcribe", scale=1)
                 clear = gr.Button("🗑️ Clear", scale=1)
-            with gr.Accordion("💡 Examples", open=False):
-                gr.Examples(["Write a Python quicksort", "Explain async/await in JS"], inputs=msg)
         with gr.TabItem("⚡ Generate"):
             with gr.Row():
-                with gr.Column(scale=1):
-                    gen_prompt = gr.Textbox(label="📝 Describe what to build", lines=4)
                     with gr.Row():
-                        gen_lang = gr.Dropdown(LANGUAGES, value="Python", label="Language", scale=2)
-                        gen_temp = gr.Slider(0, 1, value=0.3, step=0.1, label="🌡️", scale=1)
-                    gen_btn = gr.Button("⚡ Generate (Streaming)", variant="primary")
-                with gr.Column(scale=2):
-                    gen_output = gr.Code(label="Code", language="python", lines=18)
         with gr.TabItem("🔍 Explain"):
             with gr.Row():
-                with gr.Column(scale=1):
-                    explain_input = gr.Code(label="📋 Code", lines=12)
-                    explain_detail = gr.Radio(["Brief", "Normal", "Detailed"], value="Normal", label="Detail")
                     explain_btn = gr.Button("🔍 Explain", variant="primary")
-                with gr.Column(scale=1):
                     explain_output = gr.Markdown()
         with gr.TabItem("🔧 Debug"):
             with gr.Row():
-                with gr.Column(scale=1):
-                    fix_input = gr.Code(label="🐛 Code", lines=10)
-                    fix_error = gr.Textbox(label="❌ Error", lines=2)
                     fix_btn = gr.Button("🔧 Fix", variant="primary")
-                with gr.Column(scale=1):
                     fix_output = gr.Markdown()
         with gr.TabItem("📋 Review"):
             with gr.Row():
-                with gr.Column(scale=1):
-                    review_input = gr.Code(label="📋 Code", lines=14)
                     review_btn = gr.Button("📋 Review", variant="primary")
-                with gr.Column(scale=1):
                     review_output = gr.Markdown()
         with gr.TabItem("🔄 Convert"):
             with gr.Row():
-                with gr.Column(scale=1):
-                    convert_input = gr.Code(label="📥 Source", lines=12)
                     with gr.Row():
                         convert_from = gr.Dropdown(LANGUAGES, value="Python", label="From")
                         convert_to = gr.Dropdown(LANGUAGES, value="JavaScript", label="To")
                     convert_btn = gr.Button("🔄 Convert", variant="primary")
-                with gr.Column(scale=1):
-                    convert_output = gr.Code(label="📤 Result", lines=12)
         with gr.TabItem("🧪 Test"):
             with gr.Row():
-                with gr.Column(scale=1):
-                    test_input = gr.Code(label="📋 Code", lines=12)
                     with gr.Row():
-                        test_lang = gr.Dropdown(LANGUAGES[:12], value="Python", label="Language")
                         test_fw = gr.Textbox(label="Framework", placeholder="pytest")
-                    test_btn = gr.Button("🧪 Generate Tests", variant="primary")
-                with gr.Column(scale=1):
-                    test_output = gr.Code(label="Tests", lines=12)
         with gr.TabItem("📝 Document"):
             with gr.Row():
-                with gr.Column(scale=1):
-                    doc_input = gr.Code(label="📋 Code", lines=12)
                     with gr.Row():
                         doc_lang = gr.Dropdown(LANGUAGES, value="Python", label="Language")
-                        doc_style = gr.Dropdown(["Docstrings", "Comments", "Both", "README"], value="Both", label="Style")
                     doc_btn = gr.Button("📝 Document", variant="primary")
-                with gr.Column(scale=1):
-                    doc_output = gr.Code(label="Documented", lines=12)
         with gr.TabItem("🚀 Optimize"):
             with gr.Row():
-                with gr.Column(scale=1):
-                    opt_input = gr.Code(label="📋 Code", lines=12)
                     with gr.Row():
                         opt_lang = gr.Dropdown(LANGUAGES, value="Python", label="Language")
-                        opt_focus = gr.Dropdown(["All", "Performance", "Readability", "Memory"], value="All", label="Focus")
                     opt_btn = gr.Button("🚀 Optimize", variant="primary")
-                with gr.Column(scale=1):
                     opt_output = gr.Markdown()
-        with gr.TabItem("🛠️ Tools"):
-            gr.Markdown("### 🎯 Regex Builder")
             with gr.Row():
                 with gr.Column():
-                    regex_desc = gr.Textbox(label="Describe pattern", lines=2)
-                    regex_btn = gr.Button("🎯 Build Regex", variant="primary")
                 with gr.Column():
                     regex_output = gr.Markdown()
-            gr.Markdown("---\n### 🔗 API Builder")
             with gr.Row():
                 with gr.Column():
-                    api_desc = gr.Textbox(label="Describe endpoint", lines=2)
-                    api_fw = gr.Dropdown(["FastAPI", "Express", "Flask", "Gin"], value="FastAPI", label="Framework")
-                    api_btn = gr.Button("🔗 Build API", variant="primary")
                 with gr.Column():
-                    api_output = gr.Code(label="API Code", lines=12)
-    gr.HTML('<div class="footer">🔒 100% Local • ⚡ Optimized for Speed • Built with ❤️</div>')
-    # Events
     def respond(message, history, model, temp, tokens):
         history = history or []
         for updated in chat_stream(message, history, model, temp, tokens):
@@ -621,9 +639,7 @@ with gr.Blocks(title="Axon v6") as demo:
     clear.click(lambda: [], None, chatbot)
     transcribe.click(transcribe_audio, audio, msg)
-    # Streaming generate!
-    gen_btn.click(generate_code_stream, [gen_prompt, gen_lang, model_dropdown, gen_temp, max_tokens], gen_output)
     explain_btn.click(explain_code, [explain_input, model_dropdown, explain_detail, max_tokens], explain_output)
     fix_btn.click(fix_code, [fix_input, fix_error, model_dropdown, max_tokens], fix_output)
     review_btn.click(review_code, [review_input, model_dropdown, max_tokens], review_output)
@@ -631,10 +647,24 @@ with gr.Blocks(title="Axon v6") as demo:
     test_btn.click(generate_tests, [test_input, test_lang, test_fw, model_dropdown, max_tokens], test_output)
     doc_btn.click(document_code, [doc_input, doc_lang, doc_style, model_dropdown, max_tokens], doc_output)
     opt_btn.click(optimize_code, [opt_input, opt_lang, opt_focus, model_dropdown, max_tokens], opt_output)
     regex_btn.click(build_regex, [regex_desc, model_dropdown, max_tokens], regex_output)
     api_btn.click(build_api, [api_desc, api_fw, model_dropdown, max_tokens], api_output)
-# Preload model on startup
-preload_model()
 demo.launch(server_name="0.0.0.0", server_port=7860)

 import gradio as gr
 import json
 import time
 import os
+from pathlib import Path
+from llama_cpp import Llama
 from faster_whisper import WhisperModel
+# ===== CONFIG =====
+MODELS_DIR = "/models"
+MAX_TOKENS = 2048
+CONTEXT_SIZE = 4096
 MODELS = {
+    "⭐ Qwen2.5 Coder 7B (Best)": "qwen2.5-coder-7b-instruct-q4_k_m.gguf",
+    "Qwen2.5 Coder 3B (Fast)": "qwen2.5-coder-3b-instruct-q4_k_m.gguf",
+    "Qwen2.5 Coder 1.5B (Fastest)": "qwen2.5-coder-1.5b-instruct-q4_k_m.gguf",
 }
 MODEL_INFO = {
+    "⭐ Qwen2.5 Coder 7B (Best)": "🏆 Best quality • ~4.5GB",
+    "Qwen2.5 Coder 3B (Fast)": "⚖️ Balanced • ~2GB • Recommended",
+    "Qwen2.5 Coder 1.5B (Fastest)": "⚡ Fastest • ~1GB",
 }
 LANGUAGES = [
     "Python", "JavaScript", "TypeScript", "Go", "Rust",
     "Java", "C++", "C#", "C", "PHP", "Ruby", "Swift", "Kotlin",
+    "Scala", "R", "Julia", "Perl", "HTML/CSS", "SQL", "Bash", "PowerShell", "Lua"
 ]
+# ===== MODEL CACHE =====
+loaded_models = {}
+current_model_name = None
+def load_model(model_name):
+    global loaded_models, current_model_name
+    if model_name == current_model_name and model_name in loaded_models:
+        return loaded_models[model_name]
+    # Unload previous model to save RAM
+    if current_model_name and current_model_name != model_name:
+        if current_model_name in loaded_models:
+            del loaded_models[current_model_name]
+            print(f"🗑️ Unloaded {current_model_name}")
+    filename = MODELS.get(model_name)
+    if not filename:
+        return None
+    model_path = os.path.join(MODELS_DIR, filename)
+    if not os.path.exists(model_path):
+        print(f"❌ Model not found: {model_path}")
+        return None
+    print(f"📥 Loading {model_name}...")
+    try:
+        llm = Llama(
+            model_path=model_path,
+            n_ctx=CONTEXT_SIZE,
+            n_threads=4,
+            n_batch=512,
+            verbose=False
+        )
+        loaded_models[model_name] = llm
+        current_model_name = model_name
+        print(f"✅ {model_name} loaded!")
+        return llm
+    except Exception as e:
+        print(f"❌ Failed to load {model_name}: {e}")
+        return None
+# ===== WHISPER =====
 whisper_model = None
 def init_whisper():
         print("Loading Whisper...")
         whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8")
         print("✅ Whisper ready!")
     except Exception as e:
+        print(f"❌ Whisper failed: {e}")
 init_whisper()
+# ===== HELPERS =====
 def get_status():
+    available = [name for name, file in MODELS.items() if os.path.exists(os.path.join(MODELS_DIR, file))]
+    if current_model_name:
+        return f"🟢 Ready • {len(available)} models • Active: {current_model_name.split()[1] if len(current_model_name.split()) > 1 else current_model_name}"
+    return f"🟡 {len(available)} models available"
 def get_model_info(model_name):
     return MODEL_INFO.get(model_name, "")
+def validate_input(text, name="Input"):
     if not text or not text.strip():
+        return False, f"⚠️ {name} cannot be empty."
+    if len(text) > 50000:
+        return False, f"⚠️ {name} too long."
     return True, None
 def transcribe_audio(audio):
+    if not audio:
         return ""
+    if not whisper_model:
+        return "❌ Whisper unavailable."
     try:
         segments, _ = whisper_model.transcribe(audio)
+        return " ".join([s.text for s in segments]).strip() or "⚠️ No speech detected."
     except Exception as e:
+        return f"❌ {str(e)[:50]}"
+def generate_response(model_name, prompt, temperature=0.7, max_tokens=2048):
+    llm = load_model(model_name)
+    if not llm:
+        return "❌ **Model not available.** Check if downloaded."
+    try:
+        # Qwen2.5 chat format
+        formatted = f"<|im_start|>system\nYou are an expert coding assistant.<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
+        output = llm(
+            formatted,
+            max_tokens=max_tokens,
+            temperature=temperature,
+            top_p=0.9,
+            top_k=40,
+            repeat_penalty=1.1,
+            stop=["<|im_end|>", "<|im_start|>"],
+            echo=False
+        )
+        response = output["choices"][0]["text"].strip()
+        return response if response else "⚠️ Empty response."
+    except Exception as e:
+        return f"❌ **Error:** {str(e)[:100]}"
 def extract_code(text):
     if not text or "```" not in text:
         pass
     return text
+# ===== STREAMING =====
 def chat_stream(message, history, model_name, temperature, max_tokens):
     valid, error = validate_input(message, "Message")
         yield history + [[message, error]]
         return
+    llm = load_model(model_name)
+    if not llm:
+        yield history + [[message, "❌ Model not available."]]
         return
+    # Build conversation
+    conv = "<|im_start|>system\nYou are an expert coding assistant. Use markdown code blocks.<|im_end|>\n"
+    for u, a in history:
+        conv += f"<|im_start|>user\n{u}<|im_end|>\n"
+        if a:
+            conv += f"<|im_start|>assistant\n{a}<|im_end|>\n"
+    conv += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
     try:
         full = ""
+        for chunk in llm(
+            conv,
+            max_tokens=max_tokens,
+            temperature=temperature,
+            top_p=0.9,
+            stop=["<|im_end|>", "<|im_start|>"],
+            stream=True
+        ):
+            token = chunk["choices"][0]["text"]
+            full += token
+            yield history + [[message, full]]
     except Exception as e:
+        yield history + [[message, f"❌ {str(e)[:50]}"]]
+def generate_stream(prompt, language, model_name, temperature, max_tokens):
     valid, error = validate_input(prompt, "Description")
     if not valid:
         yield error
         return
+    llm = load_model(model_name)
+    if not llm:
+        yield "❌ Model not available."
         return
+    formatted = f"<|im_start|>system\nYou are an expert coder.<|im_end|>\n<|im_start|>user\nWrite clean {language} code with comments:\n{prompt}\n\nOutput only code:<|im_end|>\n<|im_start|>assistant\n"
     try:
         full = ""
+        for chunk in llm(formatted, max_tokens=max_tokens, temperature=temperature, stop=["<|im_end|>"], stream=True):
+            full += chunk["choices"][0]["text"]
+            yield extract_code(full)
     except Exception as e:
+        yield f"❌ {str(e)[:50]}"
+# ===== CORE FEATURES =====
+def explain_code(code, model_name, detail, max_tokens):
+    valid, err = validate_input(code, "Code")
     if not valid:
+        return err
     prompts = {
         "Brief": f"Explain briefly (2-3 sentences):\n{code}",
         "Normal": f"Explain this code:\n{code}",
         "Detailed": f"Detailed explanation (purpose, logic, complexity, improvements):\n{code}"
     }
+    return generate_response(model_name, prompts.get(detail, prompts["Normal"]), 0.5, max_tokens)
 def fix_code(code, error_msg, model_name, max_tokens):
+    valid, err = validate_input(code, "Code")
     if not valid:
+        return err
+    e = error_msg.strip() if error_msg else "Not working"
+    return generate_response(model_name, f"Fix this code. Error: {e}\n\n{code}\n\nFixed code and explanation:", 0.3, max_tokens)
 def review_code(code, model_name, max_tokens):
+    valid, err = validate_input(code, "Code")
     if not valid:
+        return err
+    return generate_response(model_name, f"Review for bugs, performance, security:\n{code}", 0.4, max_tokens)
+def convert_code(code, from_lang, to_lang, model_name, max_tokens):
+    valid, err = validate_input(code, "Code")
     if not valid:
+        return err
+    if from_lang == to_lang:
+        return "⚠️ Same language."
+    result = generate_response(model_name, f"Convert {from_lang} to {to_lang}. Code only:\n{code}", 0.3, max_tokens)
+    return result if result.startswith("❌") else extract_code(result)
 def generate_tests(code, language, framework, model_name, max_tokens):
+    valid, err = validate_input(code, "Code")
     if not valid:
+        return err
+    fw = framework.strip() if framework else "pytest"
+    result = generate_response(model_name, f"Generate {fw} tests for {language}. Code only:\n{code}", 0.3, max_tokens)
+    return result if result.startswith("❌") else extract_code(result)
 def document_code(code, language, style, model_name, max_tokens):
+    valid, err = validate_input(code, "Code")
     if not valid:
+        return err
+    result = generate_response(model_name, f"Add {style.lower()} to this {language} code:\n{code}", 0.4, max_tokens)
     return result if style == "README" or result.startswith("❌") else extract_code(result)
 def optimize_code(code, language, focus, model_name, max_tokens):
+    valid, err = validate_input(code, "Code")
     if not valid:
+        return err
+    return generate_response(model_name, f"Optimize {language} for {focus.lower()}. Explain:\n{code}", 0.3, max_tokens)
+# ===== NEW FEATURES =====
+def security_scan(code, model_name, max_tokens):
+    valid, err = validate_input(code, "Code")
+    if not valid:
+        return err
+    prompt = """Security audit this code. Check for:
+1. Injection vulnerabilities (SQL, XSS, Command)
+2. Authentication issues
+3. Data exposure
+4. Input validation
+5. Cryptography issues
+For each issue: Severity (🔴🟠🟡🟢), Location, Description, Fix.
+Code:
+""" + code
+    return generate_response(model_name, prompt, 0.3, max_tokens)
+def analyze_complexity(code, model_name, max_tokens):
+    valid, err = validate_input(code, "Code")
+    if not valid:
+        return err
+    prompt = """Analyze time and space complexity:
+1. Time Complexity (Big O)
+2. Space Complexity (Big O)
+3. Best/Average/Worst cases
+4. Bottlenecks
+5. Optimization suggestions
+Code:
+""" + code
+    return generate_response(model_name, prompt, 0.4, max_tokens)
+def build_sql(description, db_type, model_name, max_tokens):
+    valid, err = validate_input(description, "Description")
+    if not valid:
+        return err
+    result = generate_response(model_name, f"Write optimized {db_type} SQL for:\n{description}\n\nSQL only:", 0.2, max_tokens)
+    return result if result.startswith("❌") else extract_code(result)
+def build_shell(description, shell_type, model_name, max_tokens):
+    valid, err = validate_input(description, "Description")
+    if not valid:
+        return err
+    result = generate_response(model_name, f"Write {shell_type} command for:\n{description}\n\nCommand only:", 0.2, max_tokens)
+    return result if result.startswith("❌") else extract_code(result)
+def code_diff(code1, code2, model_name, max_tokens):
+    v1, e1 = validate_input(code1, "Code 1")
+    v2, e2 = validate_input(code2, "Code 2")
+    if not v1:
+        return e1
+    if not v2:
+        return e2
+    prompt = f"""Compare these code snippets:
+1. Key differences
+2. Functionality changes
+3. Performance impact
+4. Which is better and why
+=== CODE 1 ===
+{code1}
+=== CODE 2 ===
+{code2}"""
+    return generate_response(model_name, prompt, 0.4, max_tokens)
+def generate_mock_data(schema, count, format_type, model_name, max_tokens):
+    valid, err = validate_input(schema, "Schema")
+    if not valid:
+        return err
+    result = generate_response(model_name, f"Generate {count} realistic mock entries as {format_type}:\n{schema}", 0.7, max_tokens)
+    return result if result.startswith("❌") else extract_code(result)
+def interview_challenge(topic, difficulty, language, model_name, max_tokens):
+    valid, err = validate_input(topic, "Topic")
+    if not valid:
+        return err
+    prompt = f"""Create {difficulty} {language} interview challenge about {topic}.
+Include:
+1. Problem statement
+2. Examples (2-3)
+3. Constraints
+4. Hints
+5. Solution with explanation"""
+    return generate_response(model_name, prompt, 0.6, max_tokens)
+def to_pseudocode(code, output_type, model_name, max_tokens):
+    valid, err = validate_input(code, "Code")
+    if not valid:
+        return err
+    if output_type == "Pseudocode":
+        prompt = f"Convert to pseudocode:\n{code}"
+    else:
+        prompt = f"Create Mermaid.js flowchart for:\n{code}"
+    return generate_response(model_name, prompt, 0.3, max_tokens)
+def build_cron(description, model_name, max_tokens):
+    valid, err = validate_input(description, "Description")
+    if not valid:
+        return err
+    return generate_response(model_name, f"Create cron expression for: {description}\n\nInclude: expression, breakdown, next 5 runs", 0.2, max_tokens)
 def build_regex(description, model_name, max_tokens):
+    valid, err = validate_input(description, "Description")
     if not valid:
+        return err
+    return generate_response(model_name, f"Create regex for: {description}\n\nPattern, explanation, examples, Python code:", 0.3, max_tokens)
 def build_api(description, framework, model_name, max_tokens):
+    valid, err = validate_input(description, "Description")
     if not valid:
+        return err
+    result = generate_response(model_name, f"Create {framework} REST endpoint:\n{description}\n\nCode:", 0.3, max_tokens)
     return result if result.startswith("❌") else extract_code(result)
+def convert_data_format(data, from_fmt, to_fmt, model_name, max_tokens):
+    valid, err = validate_input(data, "Data")
+    if not valid:
+        return err
+    if from_fmt == to_fmt:
+        return "⚠️ Same format."
+    result = generate_response(model_name, f"Convert {from_fmt} to {to_fmt}:\n{data}\n\nOutput only:", 0.1, max_tokens)
+    return result if result.startswith("❌") else extract_code(result)
 # ===== UI =====
 with gr.Blocks(title="Axon v6") as demo:
     gr.HTML("""
+    <div style="background: linear-gradient(135deg, #6366f1, #8b5cf6, #06b6d4); border-radius: 16px; padding: 24px; margin-bottom: 16px;">
+        <h1 style="color: white; margin: 0; font-size: 2rem;">🔥 Axon v6</h1>
+        <p style="color: rgba(255,255,255,0.9); margin: 4px 0 0 0;">llama.cpp Edition • 19 Tools • Your Wheels! 🛞</p>
+        <div style="display: flex; gap: 8px; margin-top: 12px; flex-wrap: wrap;">
+            <span style="background: rgba(255,255,255,0.2); padding: 4px 12px; border-radius: 20px; font-size: 0.8rem; color: white;">🤖 3 Models</span>
+            <span style="background: rgba(255,255,255,0.2); padding: 4px 12px; border-radius: 20px; font-size: 0.8rem; color: white;">🛠️ 19 Tools</span>
+            <span style="background: rgba(255,255,255,0.2); padding: 4px 12px; border-radius: 20px; font-size: 0.8rem; color: white;">⚡ llama.cpp</span>
+            <span style="background: rgba(255,255,255,0.2); padding: 4px 12px; border-radius: 20px; font-size: 0.8rem; color: white;">🔒 Local</span>
         </div>
     </div>
     """)
     with gr.Row():
         model_dropdown = gr.Dropdown(choices=list(MODELS.keys()), value="Qwen2.5 Coder 3B (Fast)", label="🤖 Model", scale=3)
         temperature = gr.Slider(0, 1, value=0.7, step=0.1, label="🌡️ Creativity", scale=2)
+        max_tokens = gr.Slider(256, 4096, value=2048, step=256, label="📏 Max Tokens", scale=2)
+    model_info = gr.Markdown(value="⚖️ Balanced • ~2GB • Recommended")
     model_dropdown.change(get_model_info, model_dropdown, model_info)
     with gr.Tabs():
         with gr.TabItem("💬 Chat"):
+            chatbot = gr.Chatbot(height=400)
             with gr.Row():
                 msg = gr.Textbox(placeholder="Ask anything...", show_label=False, scale=8)
+                send = gr.Button("Send", variant="primary", scale=1)
             with gr.Row():
                 audio = gr.Audio(sources=["microphone"], type="filepath", label="🎤", scale=2)
                 transcribe = gr.Button("🎤 Transcribe", scale=1)
                 clear = gr.Button("🗑️ Clear", scale=1)
         with gr.TabItem("⚡ Generate"):
             with gr.Row():
+                with gr.Column():
+                    gen_prompt = gr.Textbox(label="📝 Describe", lines=3)
                     with gr.Row():
+                        gen_lang = gr.Dropdown(LANGUAGES, value="Python", label="Language")
+                        gen_temp = gr.Slider(0, 1, value=0.3, step=0.1, label="🌡️")
+                    gen_btn = gr.Button("⚡ Generate", variant="primary")
+                with gr.Column():
+                    gen_output = gr.Code(label="Code", language="python", lines=14)
         with gr.TabItem("🔍 Explain"):
             with gr.Row():
+                with gr.Column():
+                    explain_input = gr.Code(label="Code", lines=10)
+                    explain_detail = gr.Radio(["Brief", "Normal", "Detailed"], value="Normal")
                     explain_btn = gr.Button("🔍 Explain", variant="primary")
+                with gr.Column():
                     explain_output = gr.Markdown()
         with gr.TabItem("🔧 Debug"):
             with gr.Row():
+                with gr.Column():
+                    fix_input = gr.Code(label="Code", lines=8)
+                    fix_error = gr.Textbox(label="Error", lines=2)
                     fix_btn = gr.Button("🔧 Fix", variant="primary")
+                with gr.Column():
                     fix_output = gr.Markdown()
         with gr.TabItem("📋 Review"):
             with gr.Row():
+                with gr.Column():
+                    review_input = gr.Code(label="Code", lines=10)
                     review_btn = gr.Button("📋 Review", variant="primary")
+                with gr.Column():
                     review_output = gr.Markdown()
+        with gr.TabItem("🔐 Security"):
+            with gr.Row():
+                with gr.Column():
+                    security_input = gr.Code(label="Code", lines=10)
+                    security_btn = gr.Button("🔐 Scan", variant="primary")
+                with gr.Column():
+                    security_output = gr.Markdown()
+        with gr.TabItem("📊 Complexity"):
+            with gr.Row():
+                with gr.Column():
+                    complexity_input = gr.Code(label="Code", lines=10)
+                    complexity_btn = gr.Button("📊 Analyze", variant="primary")
+                with gr.Column():
+                    complexity_output = gr.Markdown()
         with gr.TabItem("🔄 Convert"):
             with gr.Row():
+                with gr.Column():
+                    convert_input = gr.Code(label="Source", lines=10)
                     with gr.Row():
                         convert_from = gr.Dropdown(LANGUAGES, value="Python", label="From")
                         convert_to = gr.Dropdown(LANGUAGES, value="JavaScript", label="To")
                     convert_btn = gr.Button("🔄 Convert", variant="primary")
+                with gr.Column():
+                    convert_output = gr.Code(label="Result", lines=10)
         with gr.TabItem("🧪 Test"):
             with gr.Row():
+                with gr.Column():
+                    test_input = gr.Code(label="Code", lines=10)
                     with gr.Row():
+                        test_lang = gr.Dropdown(LANGUAGES[:10], value="Python", label="Language")
                         test_fw = gr.Textbox(label="Framework", placeholder="pytest")
+                    test_btn = gr.Button("🧪 Generate", variant="primary")
+                with gr.Column():
+                    test_output = gr.Code(label="Tests", lines=10)
         with gr.TabItem("📝 Document"):
             with gr.Row():
+                with gr.Column():
+                    doc_input = gr.Code(label="Code", lines=10)
                     with gr.Row():
                         doc_lang = gr.Dropdown(LANGUAGES, value="Python", label="Language")
+                        doc_style = gr.Dropdown(["Docstrings", "Comments", "Both", "README"], value="Both")
                     doc_btn = gr.Button("📝 Document", variant="primary")
+                with gr.Column():
+                    doc_output = gr.Code(label="Documented", lines=10)
         with gr.TabItem("🚀 Optimize"):
             with gr.Row():
+                with gr.Column():
+                    opt_input = gr.Code(label="Code", lines=10)
                     with gr.Row():
                         opt_lang = gr.Dropdown(LANGUAGES, value="Python", label="Language")
+                        opt_focus = gr.Dropdown(["All", "Performance", "Readability", "Memory"], value="All")
                     opt_btn = gr.Button("🚀 Optimize", variant="primary")
+                with gr.Column():
                     opt_output = gr.Markdown()
+        with gr.TabItem("🔀 Diff"):
             with gr.Row():
                 with gr.Column():
+                    diff_code1 = gr.Code(label="Code 1", lines=8)
+                    diff_code2 = gr.Code(label="Code 2", lines=8)
+                    diff_btn = gr.Button("🔀 Compare", variant="primary")
+                with gr.Column():
+                    diff_output = gr.Markdown()
+        with gr.TabItem("📐 Pseudo"):
+            with gr.Row():
+                with gr.Column():
+                    pseudo_input = gr.Code(label="Code", lines=10)
+                    pseudo_type = gr.Radio(["Pseudocode", "Flowchart"], value="Pseudocode")
+                    pseudo_btn = gr.Button("📐 Convert", variant="primary")
+                with gr.Column():
+                    pseudo_output = gr.Markdown()
+        with gr.TabItem("🎓 Interview"):
+            with gr.Row():
+                with gr.Column():
+                    interview_topic = gr.Textbox(label="Topic", placeholder="Binary trees...")
+                    with gr.Row():
+                        interview_diff = gr.Dropdown(["Easy", "Medium", "Hard"], value="Medium")
+                        interview_lang = gr.Dropdown(LANGUAGES[:8], value="Python")
+                    interview_btn = gr.Button("🎓 Generate", variant="primary")
+                with gr.Column():
+                    interview_output = gr.Markdown()
+        with gr.TabItem("🛠️ Builders"):
+            gr.Markdown("### 🗄️ SQL")
+            with gr.Row():
+                with gr.Column():
+                    sql_desc = gr.Textbox(label="Describe", lines=2)
+                    sql_type = gr.Dropdown(["PostgreSQL", "MySQL", "SQLite"], value="PostgreSQL")
+                    sql_btn = gr.Button("🗄️ Build", variant="primary")
+                with gr.Column():
+                    sql_output = gr.Code(language="sql", lines=6)
+            gr.Markdown("---\n### 🐚 Shell")
+            with gr.Row():
+                with gr.Column():
+                    shell_desc = gr.Textbox(label="Describe", lines=2)
+                    shell_type = gr.Dropdown(["Bash", "PowerShell", "Zsh"], value="Bash")
+                    shell_btn = gr.Button("🐚 Build", variant="primary")
+                with gr.Column():
+                    shell_output = gr.Code(language="bash", lines=6)
+            gr.Markdown("---\n### ⏰ Cron")
+            with gr.Row():
+                with gr.Column():
+                    cron_desc = gr.Textbox(label="Describe", lines=2)
+                    cron_btn = gr.Button("⏰ Build", variant="primary")
+                with gr.Column():
+                    cron_output = gr.Markdown()
+            gr.Markdown("---\n### 🎯 Regex")
+            with gr.Row():
+                with gr.Column():
+                    regex_desc = gr.Textbox(label="Describe", lines=2)
+                    regex_btn = gr.Button("🎯 Build", variant="primary")
                 with gr.Column():
                     regex_output = gr.Markdown()
+            gr.Markdown("---\n### 🔗 API")
+            with gr.Row():
+                with gr.Column():
+                    api_desc = gr.Textbox(label="Describe", lines=2)
+                    api_fw = gr.Dropdown(["FastAPI", "Express", "Flask"], value="FastAPI")
+                    api_btn = gr.Button("🔗 Build", variant="primary")
+                with gr.Column():
+                    api_output = gr.Code(lines=8)
+        with gr.TabItem("📦 Data"):
+            gr.Markdown("### 📦 Mock Data")
+            with gr.Row():
+                with gr.Column():
+                    mock_schema = gr.Textbox(label="Schema", lines=2, placeholder="User: name, email, age...")
+                    with gr.Row():
+                        mock_count = gr.Slider(1, 20, value=5, step=1, label="Count")
+                        mock_format = gr.Dropdown(["JSON", "CSV", "SQL"], value="JSON")
+                    mock_btn = gr.Button("📦 Generate", variant="primary")
+                with gr.Column():
+                    mock_output = gr.Code(lines=10)
+            gr.Markdown("---\n### 🔄 Format Converter")
             with gr.Row():
                 with gr.Column():
+                    format_input = gr.Code(label="Input", lines=6)
+                    with gr.Row():
+                        format_from = gr.Dropdown(["JSON", "YAML", "XML", "CSV"], value="JSON")
+                        format_to = gr.Dropdown(["JSON", "YAML", "XML", "CSV"], value="YAML")
+                    format_btn = gr.Button("🔄 Convert", variant="primary")
                 with gr.Column():
+                    format_output = gr.Code(label="Output", lines=6)
+    gr.HTML('<div style="text-align:center;padding:16px;opacity:0.5;">🔥 Axon v6 llama.cpp • Your Wheels Power This! 🛞</div>')
+    # ===== EVENTS =====
     def respond(message, history, model, temp, tokens):
         history = history or []
         for updated in chat_stream(message, history, model, temp, tokens):
     clear.click(lambda: [], None, chatbot)
     transcribe.click(transcribe_audio, audio, msg)
+    gen_btn.click(generate_stream, [gen_prompt, gen_lang, model_dropdown, gen_temp, max_tokens], gen_output)
     explain_btn.click(explain_code, [explain_input, model_dropdown, explain_detail, max_tokens], explain_output)
     fix_btn.click(fix_code, [fix_input, fix_error, model_dropdown, max_tokens], fix_output)
     review_btn.click(review_code, [review_input, model_dropdown, max_tokens], review_output)
     test_btn.click(generate_tests, [test_input, test_lang, test_fw, model_dropdown, max_tokens], test_output)
     doc_btn.click(document_code, [doc_input, doc_lang, doc_style, model_dropdown, max_tokens], doc_output)
     opt_btn.click(optimize_code, [opt_input, opt_lang, opt_focus, model_dropdown, max_tokens], opt_output)
+    security_btn.click(security_scan, [security_input, model_dropdown, max_tokens], security_output)
+    complexity_btn.click(analyze_complexity, [complexity_input, model_dropdown, max_tokens], complexity_output)
+    diff_btn.click(code_diff, [diff_code1, diff_code2, model_dropdown, max_tokens], diff_output)
+    pseudo_btn.click(to_pseudocode, [pseudo_input, pseudo_type, model_dropdown, max_tokens], pseudo_output)
+    interview_btn.click(interview_challenge, [interview_topic, interview_diff, interview_lang, model_dropdown, max_tokens], interview_output)
+    sql_btn.click(build_sql, [sql_desc, sql_type, model_dropdown, max_tokens], sql_output)
+    shell_btn.click(build_shell, [shell_desc, shell_type, model_dropdown, max_tokens], shell_output)
+    cron_btn.click(build_cron, [cron_desc, model_dropdown, max_tokens], cron_output)
     regex_btn.click(build_regex, [regex_desc, model_dropdown, max_tokens], regex_output)
     api_btn.click(build_api, [api_desc, api_fw, model_dropdown, max_tokens], api_output)
+    mock_btn.click(generate_mock_data, [mock_schema, mock_count, mock_format, model_dropdown, max_tokens], mock_output)
+    format_btn.click(convert_data_format, [format_input, format_from, format_to, model_dropdown, max_tokens], format_output)
+# Preload default model
+print("🔥 Preloading default model...")
+load_model("Qwen2.5 Coder 3B (Fast)")
 demo.launch(server_name="0.0.0.0", server_port=7860)