Spaces:

AIencoder
/

Axon

Sleeping

App Files Files Community

AIencoder commited on Jan 25

Commit

0e84231

verified ·

1 Parent(s): 5c19f16

Update app.py

Browse files

Files changed (1) hide show

app.py +230 -591

app.py CHANGED Viewed

@@ -2,12 +2,26 @@ import gradio as gr
 import requests
 import json
 import time
 from faster_whisper import WhisperModel
 OLLAMA_URL = "http://localhost:11434"
 MAX_RETRIES = 3
 TIMEOUT = 300
 MODELS = {
     "⭐ Qwen2.5 Coder 7B (Best)": "qwen2.5-coder:7b",
     "🧠 DeepSeek Coder 6.7B (Logic)": "deepseek-coder:6.7b",
@@ -51,6 +65,25 @@ def init_whisper():
 init_whisper()
 # ===== HELPER FUNCTIONS =====
 def check_ollama_health():
@@ -87,10 +120,8 @@ def validate_input(text, field_name="Input"):
 def transcribe_audio(audio):
     if audio is None:
         return ""
     if whisper_model is None:
         return "❌ Whisper not loaded. Voice input unavailable."
     try:
         segments, _ = whisper_model.transcribe(audio)
         text = " ".join([seg.text for seg in segments]).strip()
@@ -104,10 +135,17 @@ def transcribe_audio(audio):
 def call_ollama_with_retry(model_name, prompt, temperature=0.7, max_tokens=2048):
     if not check_ollama_health():
-        return "❌ **Ollama is not running.**\n\nPlease wait for Ollama to start, or check the logs."
     model = MODELS.get(model_name, "qwen2.5-coder:3b")
     for attempt in range(MAX_RETRIES):
         try:
             r = requests.post(
@@ -116,10 +154,8 @@ def call_ollama_with_retry(model_name, prompt, temperature=0.7, max_tokens=2048)
                     "model": model,
                     "prompt": prompt,
                     "stream": False,
-                    "options": {
-                        "temperature": temperature,
-                        "num_predict": max_tokens
-                    }
                 },
                 timeout=TIMEOUT
             )
@@ -127,18 +163,15 @@ def call_ollama_with_retry(model_name, prompt, temperature=0.7, max_tokens=2048)
             if r.status_code == 200:
                 response = r.json().get("response", "")
                 if not response.strip():
-                    return "⚠️ Model returned empty response. Try rephrasing your request."
                 return response
             elif r.status_code == 404:
-                return f"❌ **Model not found:** `{model}`\n\nThe model may still be downloading. Check logs or try a different model."
             elif r.status_code == 500:
-                error_msg = r.text[:200] if r.text else "Unknown server error"
                 if "out of memory" in error_msg.lower():
-                    return "❌ **Out of memory.**\n\nTry a smaller model like `Qwen2.5 Coder 1.5B (Fast)`."
                 return f"❌ **Server error:** {error_msg}"
             else:
                 return f"❌ **HTTP {r.status_code}:** {r.text[:100]}"
@@ -146,26 +179,22 @@ def call_ollama_with_retry(model_name, prompt, temperature=0.7, max_tokens=2048)
             if attempt < MAX_RETRIES - 1:
                 time.sleep(2)
                 continue
-            return "❌ **Request timed out.**\n\nThe model is taking too long. Try:\n- A smaller model\n- Shorter input\n- Lower max tokens"
         except requests.exceptions.ConnectionError:
             if attempt < MAX_RETRIES - 1:
                 time.sleep(2)
                 continue
-            return "❌ **Connection failed.**\n\nOllama may have crashed. Check the logs."
         except json.JSONDecodeError:
-            return "❌ **Invalid response from Ollama.**\n\nThe model returned malformed data."
         except Exception as e:
-            return f"❌ **Unexpected error:** {str(e)[:100]}"
-    return "❌ **Max retries reached.** Please try again."
 def extract_code(text):
     if not text or "```" not in text:
         return text
     try:
         parts = text.split("```")
         if len(parts) >= 2:
@@ -173,11 +202,11 @@ def extract_code(text):
             if "\n" in code:
                 code = code.split("\n", 1)[-1]
             return code.strip()
-    except Exception:
         pass
     return text
-# ===== CORE FUNCTIONS =====
 def chat_stream(message, history, model_name, temperature, max_tokens):
     valid, error = validate_input(message, "Message")
@@ -186,11 +215,11 @@ def chat_stream(message, history, model_name, temperature, max_tokens):
         return
     if not check_ollama_health():
-        yield history + [[message, "❌ **Ollama is not running.** Please wait for it to start."]]
         return
     model = MODELS.get(model_name, "qwen2.5-coder:3b")
-    messages = [{"role": "system", "content": "You are an expert coding assistant. Provide clear, well-commented code. Always use markdown code blocks with language tags."}]
     for user_msg, assistant_msg in history:
         messages.append({"role": "user", "content": user_msg})
@@ -198,6 +227,12 @@ def chat_stream(message, history, model_name, temperature, max_tokens):
             messages.append({"role": "assistant", "content": assistant_msg})
     messages.append({"role": "user", "content": message})
     try:
         response = requests.post(
@@ -206,18 +241,19 @@ def chat_stream(message, history, model_name, temperature, max_tokens):
                 "model": model,
                 "messages": messages,
                 "stream": True,
-                "options": {"temperature": temperature, "num_predict": max_tokens}
             },
             stream=True,
             timeout=TIMEOUT
         )
         if response.status_code == 404:
-            yield history + [[message, f"❌ **Model not found:** `{model}`\n\nTry a different model."]]
             return
         if response.status_code != 200:
-            yield history + [[message, f"❌ **Error {response.status_code}:** {response.text[:100]}"]]
             return
         full = ""
@@ -226,7 +262,7 @@ def chat_stream(message, history, model_name, temperature, max_tokens):
                 try:
                     data = json.loads(line)
                     if "error" in data:
-                        yield history + [[message, f"❌ **Model error:** {data['error']}"]]
                         return
                     if "message" in data:
                         full += data["message"].get("content", "")
@@ -235,64 +271,95 @@ def chat_stream(message, history, model_name, temperature, max_tokens):
                     continue
         if not full.strip():
-            yield history + [[message, "⚠️ Model returned empty response. Try rephrasing."]]
     except requests.exceptions.Timeout:
-        yield history + [[message, "❌ **Request timed out.** Try a smaller model or shorter input."]]
     except requests.exceptions.ConnectionError:
-        yield history + [[message, "❌ **Connection lost.** Ollama may have crashed."]]
     except Exception as e:
-        yield history + [[message, f"❌ **Error:** {str(e)[:100]}"]]
-def generate_code(prompt, language, model_name, temperature, max_tokens):
     valid, error = validate_input(prompt, "Description")
     if not valid:
-        return error
-    full_prompt = (
-        f"Write {language} code for the following task:\n\n"
-        f"{prompt}\n\n"
-        "Requirements:\n"
-        "- Clean, production-ready code\n"
-        "- Add helpful comments\n"
-        "- Handle edge cases\n"
-        "- Output ONLY the code in a markdown code block"
-    )
-    result = call_ollama_with_retry(model_name, full_prompt, temperature, max_tokens)
-    if result.startswith("❌") or result.startswith("⚠️"):
-        return result
-    return extract_code(result)
 def explain_code(code, model_name, detail_level, max_tokens):
     valid, error = validate_input(code, "Code")
     if not valid:
         return error
-    detail_prompts = {
-        "Brief": "Give a brief 2-3 sentence explanation of what this code does.",
-        "Normal": "Explain what this code does, including the main logic and any important details.",
-        "Detailed": "Give a detailed explanation including: purpose, how it works step-by-step, time/space complexity, and potential improvements."
     }
-    prompt = detail_prompts.get(detail_level, detail_prompts["Normal"]) + "\n\nCode:\n" + code
-    return call_ollama_with_retry(model_name, prompt, 0.5, max_tokens)
 def fix_code(code, error_msg, model_name, max_tokens):
     valid, error = validate_input(code, "Code")
     if not valid:
         return error
-    error_text = error_msg if error_msg and error_msg.strip() else "Code is not working as expected"
-    prompt = (
-        "Fix the following code and explain what was wrong.\n\n"
-        "Code:\n" + code + "\n\n"
-        "Error/Problem: " + error_text + "\n\n"
-        "Provide:\n"
-        "1. The fixed code in a markdown code block\n"
-        "2. Brief explanation of what was wrong\n"
-        "3. Any suggestions to prevent similar issues"
-    )
     return call_ollama_with_retry(model_name, prompt, 0.3, max_tokens)
 def review_code(code, model_name, max_tokens):
@@ -300,126 +367,45 @@ def review_code(code, model_name, max_tokens):
     if not valid:
         return error
-    prompt = (
-        "Review this code and provide feedback on:\n\n"
-        "1. **Code Quality** - Is it clean, readable, well-structured?\n"
-        "2. **Bugs/Issues** - Any potential bugs or problems?\n"
-        "3. **Performance** - Any performance concerns?\n"
-        "4. **Security** - Any security issues?\n"
-        "5. **Suggestions** - How could it be improved?\n\n"
-        "Code:\n" + code
-    )
     return call_ollama_with_retry(model_name, prompt, 0.4, max_tokens)
 def convert_code(code, source_lang, target_lang, model_name, max_tokens):
     valid, error = validate_input(code, "Code")
     if not valid:
         return error
     if source_lang == target_lang:
-        return "⚠️ Source and target languages are the same."
-    prompt = (
-        f"Convert this {source_lang} code to {target_lang}.\n\n"
-        "Requirements:\n"
-        f"- Write idiomatic {target_lang} code\n"
-        "- Preserve the functionality exactly\n"
-        "- Add comments explaining any language-specific differences\n"
-        "- Output ONLY the converted code in a markdown code block\n\n"
-        f"{source_lang} Code:\n" + code
-    )
     result = call_ollama_with_retry(model_name, prompt, 0.3, max_tokens)
-    if result.startswith("❌") or result.startswith("⚠️"):
-        return result
-    return extract_code(result)
 def generate_tests(code, language, framework, model_name, max_tokens):
     valid, error = validate_input(code, "Code")
     if not valid:
         return error
-    frameworks = {
-        "Python": "pytest",
-        "JavaScript": "Jest",
-        "TypeScript": "Jest",
-        "Java": "JUnit",
-        "C#": "NUnit",
-        "Go": "testing package",
-        "Rust": "built-in test framework",
-        "Ruby": "RSpec",
-        "PHP": "PHPUnit",
-    }
-    fw = framework if framework and framework.strip() else frameworks.get(language, "appropriate testing framework")
-    prompt = (
-        f"Generate comprehensive unit tests for this {language} code using {fw}.\n\n"
-        "Requirements:\n"
-        "- Test all functions/methods\n"
-        "- Include edge cases\n"
-        "- Include both positive and negative tests\n"
-        "- Add descriptive test names\n"
-        "- Output ONLY the test code in a markdown code block\n\n"
-        "Code to test:\n" + code
-    )
     result = call_ollama_with_retry(model_name, prompt, 0.3, max_tokens)
-    if result.startswith("❌") or result.startswith("⚠️"):
-        return result
-    return extract_code(result)
 def document_code(code, language, style, model_name, max_tokens):
     valid, error = validate_input(code, "Code")
     if not valid:
         return error
-    styles = {
-        "Docstrings": "Add comprehensive docstrings to all functions, classes, and methods",
-        "Comments": "Add inline comments explaining the logic",
-        "Both": "Add both docstrings and inline comments",
-        "README": "Generate a README.md documenting this code"
-    }
-    prompt = (
-        f"Document this {language} code.\n\n"
-        f"Task: {styles.get(style, styles['Both'])}\n\n"
-        "Requirements:\n"
-        "- Be clear and concise\n"
-        "- Explain parameters, return values, and exceptions\n"
-        "- Include usage examples where helpful\n"
-        "- Output the fully documented code in a markdown code block\n\n"
-        "Code:\n" + code
-    )
     result = call_ollama_with_retry(model_name, prompt, 0.4, max_tokens)
-    if style == "README" or result.startswith("❌") or result.startswith("⚠️"):
-        return result
-    return extract_code(result)
 def optimize_code(code, language, focus, model_name, max_tokens):
     valid, error = validate_input(code, "Code")
     if not valid:
         return error
-    focus_prompts = {
-        "Performance": "Optimize for speed and efficiency. Reduce time complexity where possible.",
-        "Readability": "Refactor for better readability and maintainability. Follow best practices.",
-        "Memory": "Optimize memory usage. Reduce allocations and improve data structures.",
-        "All": "Optimize for performance, readability, and memory usage."
-    }
-    prompt = (
-        f"Optimize this {language} code.\n\n"
-        f"Focus: {focus_prompts.get(focus, focus_prompts['All'])}\n\n"
-        "Requirements:\n"
-        "- Explain what you changed and why\n"
-        "- Preserve the original functionality\n"
-        "- Show before/after complexity if relevant\n"
-        "- Output the optimized code in a markdown code block\n\n"
-        "Code:\n" + code
-    )
     return call_ollama_with_retry(model_name, prompt, 0.3, max_tokens)
 def build_regex(description, model_name, max_tokens):
@@ -427,16 +413,7 @@ def build_regex(description, model_name, max_tokens):
     if not valid:
         return error
-    prompt = (
-        "Create a regex pattern for the following requirement:\n\n"
-        f"{description}\n\n"
-        "Provide:\n"
-        "1. The regex pattern\n"
-        "2. Explanation of each part\n"
-        "3. Example matches and non-matches\n"
-        "4. Code example in Python showing usage"
-    )
     return call_ollama_with_retry(model_name, prompt, 0.3, max_tokens)
 def build_api(description, framework, model_name, max_tokens):
@@ -444,558 +421,220 @@ def build_api(description, framework, model_name, max_tokens):
     if not valid:
         return error
-    prompt = (
-        f"Create a REST API endpoint using {framework}.\n\n"
-        f"Requirements:\n{description}\n\n"
-        "Include:\n"
-        "- Route definition with proper HTTP methods\n"
-        "- Request validation\n"
-        "- Error handling\n"
-        "- Response formatting\n"
-        "- Brief documentation comments\n"
-        "- Output the code in a markdown code block"
-    )
     result = call_ollama_with_retry(model_name, prompt, 0.3, max_tokens)
-    if result.startswith("❌") or result.startswith("⚠️"):
-        return result
-    return extract_code(result)
-# ===== PREMIUM CSS =====
 css = """
-/* ===== GLOBAL ===== */
 :root {
     --primary: #6366f1;
-    --primary-dark: #4f46e5;
     --secondary: #8b5cf6;
-    --accent: #06b6d4;
     --bg-dark: #0f172a;
     --bg-card: #1e293b;
-    --bg-hover: #334155;
-    --text-primary: #f1f5f9;
-    --text-secondary: #94a3b8;
     --border: #334155;
-    --success: #10b981;
-    --warning: #f59e0b;
-    --error: #ef4444;
     --gradient: linear-gradient(135deg, #6366f1 0%, #8b5cf6 50%, #06b6d4 100%);
 }
-.gradio-container {
-    max-width: 1500px !important;
-    margin: auto !important;
-    background: var(--bg-dark) !important;
-    font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif !important;
-}
-/* ===== HEADER ===== */
 .header-section {
     background: var(--gradient);
     border-radius: 20px;
     padding: 32px 40px;
     margin-bottom: 24px;
-    position: relative;
-    overflow: hidden;
     box-shadow: 0 20px 40px rgba(99, 102, 241, 0.3);
 }
-.header-section::before {
-    content: '';
-    position: absolute;
-    top: -50%;
-    right: -50%;
-    width: 100%;
-    height: 200%;
-    background: radial-gradient(circle, rgba(255,255,255,0.1) 0%, transparent 60%);
-    animation: pulse 4s ease-in-out infinite;
-}
-@keyframes pulse {
-    0%, 100% { transform: scale(1); opacity: 0.5; }
-    50% { transform: scale(1.1); opacity: 0.8; }
-}
-.header-content {
-    position: relative;
-    z-index: 1;
-    display: flex;
-    justify-content: space-between;
-    align-items: center;
-    flex-wrap: wrap;
-    gap: 20px;
-}
-.header-title {
-    color: white;
-    margin: 0;
-    font-size: 2.8rem;
-    font-weight: 800;
-    letter-spacing: -0.02em;
-    text-shadow: 0 2px 10px rgba(0,0,0,0.2);
-}
-.header-subtitle {
-    color: rgba(255,255,255,0.9);
-    margin: 8px 0 0 0;
-    font-size: 1.1rem;
-    font-weight: 400;
-}
-.header-badges {
-    display: flex;
-    gap: 10px;
-    flex-wrap: wrap;
-}
 .badge {
     background: rgba(255,255,255,0.2);
-    backdrop-filter: blur(10px);
     padding: 8px 16px;
     border-radius: 50px;
     font-size: 0.85rem;
-    font-weight: 500;
     color: white;
-    border: 1px solid rgba(255,255,255,0.2);
-}
-/* ===== STATUS BAR ===== */
-.status-bar {
-    background: var(--bg-card);
-    border: 1px solid var(--border);
-    border-radius: 16px;
-    padding: 16px 24px;
-    margin-bottom: 20px;
-}
-/* ===== SETTINGS PANEL ===== */
-.settings-panel {
-    background: var(--bg-card);
-    border: 1px solid var(--border);
-    border-radius: 16px;
-    padding: 20px 24px;
-    margin-bottom: 20px;
 }
-/* ===== MODEL INFO ===== */
-.model-info-box {
-    background: linear-gradient(135deg, rgba(99, 102, 241, 0.1) 0%, rgba(139, 92, 246, 0.1) 100%);
-    border: 1px solid rgba(99, 102, 241, 0.3);
-    border-radius: 12px;
-    padding: 12px 18px;
-    font-size: 0.9rem;
-    color: var(--text-secondary);
-    margin-top: 12px;
-    margin-bottom: 20px;
-}
-/* ===== TABS ===== */
-.tab-nav {
-    background: var(--bg-card) !important;
-    border: 1px solid var(--border) !important;
-    border-radius: 16px !important;
-    padding: 8px !important;
-    gap: 6px !important;
-    margin-bottom: 20px !important;
-    flex-wrap: wrap !important;
-}
-.tab-nav button {
-    background: transparent !important;
-    border: none !important;
-    border-radius: 12px !important;
-    padding: 12px 20px !important;
-    font-weight: 600 !important;
-    font-size: 0.9rem !important;
-    color: var(--text-secondary) !important;
-    transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important;
-}
-.tab-nav button:hover {
-    background: var(--bg-hover) !important;
-    color: var(--text-primary) !important;
-}
 .tab-nav button.selected {
     background: var(--gradient) !important;
     color: white !important;
-    box-shadow: 0 4px 15px rgba(99, 102, 241, 0.4) !important;
 }
-/* ===== CHATBOT ===== */
-.chatbot-container {
-    background: var(--bg-card) !important;
-    border: 1px solid var(--border) !important;
-    border-radius: 16px !important;
-}
-/* ===== INPUTS ===== */
-textarea, input[type="text"] {
-    background: var(--bg-card) !important;
-    border: 1px solid var(--border) !important;
-    border-radius: 12px !important;
-    color: var(--text-primary) !important;
-    padding: 14px 18px !important;
-    font-size: 0.95rem !important;
-    transition: all 0.2s ease !important;
-}
-textarea:focus, input[type="text"]:focus {
-    border-color: var(--primary) !important;
-    box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.2) !important;
-    outline: none !important;
-}
-/* ===== CODE BLOCKS ===== */
-.code-wrap {
-    border-radius: 16px !important;
-    overflow: hidden !important;
-    border: 1px solid var(--border) !important;
-}
-/* ===== BUTTONS ===== */
-.primary-btn, button.primary {
     background: var(--gradient) !important;
     border: none !important;
-    border-radius: 12px !important;
-    padding: 14px 28px !important;
-    font-weight: 600 !important;
-    font-size: 0.95rem !important;
-    color: white !important;
-    cursor: pointer !important;
-    transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important;
-    box-shadow: 0 4px 15px rgba(99, 102, 241, 0.3) !important;
-}
-.primary-btn:hover, button.primary:hover {
-    transform: translateY(-2px) !important;
-    box-shadow: 0 8px 25px rgba(99, 102, 241, 0.4) !important;
-}
-.secondary-btn {
-    background: var(--bg-hover) !important;
-    border: 1px solid var(--border) !important;
-    border-radius: 12px !important;
-    padding: 12px 20px !important;
-    font-weight: 500 !important;
-    color: var(--text-secondary) !important;
-    transition: all 0.2s ease !important;
 }
-.secondary-btn:hover {
-    background: var(--bg-card) !important;
-    border-color: var(--primary) !important;
-    color: var(--text-primary) !important;
-}
-/* ===== MARKDOWN OUTPUT ===== */
-.markdown-output {
-    background: var(--bg-card);
-    border: 1px solid var(--border);
-    border-radius: 16px;
-    padding: 24px;
-    color: var(--text-primary);
-    line-height: 1.7;
-}
-/* ===== DIVIDER ===== */
-.divider {
-    height: 1px;
-    background: var(--border);
-    margin: 24px 0;
-}
-/* ===== TOOL SECTION ===== */
-.tool-section {
-    background: var(--bg-card);
-    border: 1px solid var(--border);
-    border-radius: 16px;
-    padding: 24px;
-    margin-bottom: 20px;
-}
-.tool-title {
-    color: var(--text-primary);
-    font-size: 1.2rem;
-    font-weight: 600;
-    margin-bottom: 16px;
-}
-/* ===== FOOTER ===== */
-.footer {
-    text-align: center;
-    padding: 24px;
-    color: var(--text-secondary);
-    font-size: 0.85rem;
-    border-top: 1px solid var(--border);
-    margin-top: 32px;
-}
-/* ===== SCROLLBAR ===== */
-::-webkit-scrollbar {
-    width: 8px;
-    height: 8px;
-}
-::-webkit-scrollbar-track {
-    background: var(--bg-dark);
-}
-::-webkit-scrollbar-thumb {
-    background: var(--border);
-    border-radius: 4px;
-}
-::-webkit-scrollbar-thumb:hover {
-    background: var(--text-secondary);
-}
-/* ===== HIDE DEFAULT FOOTER ===== */
 footer { display: none !important; }
-/* ===== RESPONSIVE ===== */
-@media (max-width: 768px) {
-    .header-title { font-size: 2rem; }
-    .header-content { flex-direction: column; text-align: center; }
-    .header-badges { justify-content: center; }
-    .tab-nav button { padding: 10px 14px !important; font-size: 0.8rem !important; }
-}
 """
-# ===== UI (Gradio 6.0 Compatible) =====
 with gr.Blocks(title="Axon v6") as demo:
-    # Header
     gr.HTML("""
     <div class="header-section">
-        <div class="header-content">
-            <div>
-                <h1 class="header-title">🔥 Axon v6</h1>
-                <p class="header-subtitle">AI-Powered Coding Assistant</p>
-            </div>
-            <div class="header-badges">
-                <span class="badge">🤖 10 Models</span>
-                <span class="badge">🛠️ 9 Tools</span>
-                <span class="badge">🔒 100% Local</span>
-                <span class="badge">⚡ No Rate Limits</span>
-            </div>
         </div>
     </div>
     """)
-    # Status
-    with gr.Row():
-        status = gr.Markdown(value=get_status, every=5)
-    # Settings Panel
     with gr.Row():
-        model_dropdown = gr.Dropdown(
-            choices=list(MODELS.keys()),
-            value="Qwen2.5 Coder 3B",
-            label="🤖 Model",
-            scale=3
-        )
         temperature = gr.Slider(0, 1, value=0.7, step=0.1, label="🌡️ Creativity", scale=2)
         max_tokens = gr.Slider(256, 8192, value=2048, step=256, label="📏 Max Tokens", scale=2)
-    model_info_display = gr.Markdown(value="🚀 Fast & capable • Recommended")
-    model_dropdown.change(get_model_info, model_dropdown, model_info_display)
     with gr.Tabs():
-        # ===== CHAT =====
         with gr.TabItem("💬 Chat"):
             chatbot = gr.Chatbot(height=500)
             with gr.Row():
-                msg = gr.Textbox(
-                    placeholder="Ask anything about coding... Press Enter to send",
-                    show_label=False, scale=8, lines=1
-                )
                 send = gr.Button("Send ➤", variant="primary", scale=1)
             with gr.Row():
-                audio_input = gr.Audio(sources=["microphone"], type="filepath", label="🎤 Voice", scale=2)
-                transcribe_btn = gr.Button("🎤 Transcribe", scale=1)
                 clear = gr.Button("🗑️ Clear", scale=1)
-            with gr.Accordion("💡 Quick Prompts", open=False):
-                gr.Examples([
-                    "Write a Python function to find all prime numbers up to n",
-                    "Explain async/await vs promises in JavaScript",
-                    "How do I implement a binary search tree?",
-                    "Write a REST API with authentication in FastAPI"
-                ], inputs=msg)
-        # ===== GENERATE =====
         with gr.TabItem("⚡ Generate"):
             with gr.Row():
                 with gr.Column(scale=1):
-                    gen_prompt = gr.Textbox(
-                        label="📝 Describe what you want to build",
-                        placeholder="e.g., A function that validates email addresses with regex",
-                        lines=5
-                    )
                     with gr.Row():
-                        gen_lang = gr.Dropdown(LANGUAGES, value="Python", label="🔤 Language", scale=2)
                         gen_temp = gr.Slider(0, 1, value=0.3, step=0.1, label="🌡️", scale=1)
-                    gen_btn = gr.Button("⚡ Generate Code", variant="primary", size="lg")
                 with gr.Column(scale=2):
-                    gen_output = gr.Code(label="Generated Code", language="python", lines=22)
-        # ===== EXPLAIN =====
         with gr.TabItem("🔍 Explain"):
             with gr.Row():
                 with gr.Column(scale=1):
-                    explain_input = gr.Code(label="📋 Paste your code", lines=14)
-                    explain_detail = gr.Radio(
-                        ["Brief", "Normal", "Detailed"],
-                        value="Normal", label="📊 Detail Level"
-                    )
-                    explain_btn = gr.Button("🔍 Explain Code", variant="primary", size="lg")
                 with gr.Column(scale=1):
-                    explain_output = gr.Markdown(label="Explanation")
-        # ===== DEBUG =====
         with gr.TabItem("🔧 Debug"):
             with gr.Row():
                 with gr.Column(scale=1):
-                    fix_input = gr.Code(label="🐛 Paste buggy code", lines=12)
-                    fix_error = gr.Textbox(
-                        label="❌ Error message (optional)",
-                        placeholder="Paste error or describe the issue",
-                        lines=3
-                    )
-                    fix_btn = gr.Button("🔧 Fix Code", variant="primary", size="lg")
                 with gr.Column(scale=1):
-                    fix_output = gr.Markdown(label="Solution")
-        # ===== REVIEW =====
         with gr.TabItem("📋 Review"):
             with gr.Row():
                 with gr.Column(scale=1):
-                    review_input = gr.Code(label="📋 Code to review", lines=16)
-                    review_btn = gr.Button("📋 Review Code", variant="primary", size="lg")
                 with gr.Column(scale=1):
-                    review_output = gr.Markdown(label="Code Review")
-        # ===== CONVERT =====
         with gr.TabItem("🔄 Convert"):
             with gr.Row():
                 with gr.Column(scale=1):
-                    convert_input = gr.Code(label="📥 Source Code", lines=14)
                     with gr.Row():
-                        convert_from = gr.Dropdown(LANGUAGES, value="Python", label="From", scale=1)
-                        convert_to = gr.Dropdown(LANGUAGES, value="JavaScript", label="To", scale=1)
-                    convert_btn = gr.Button("🔄 Convert Code", variant="primary", size="lg")
                 with gr.Column(scale=1):
-                    convert_output = gr.Code(label="📤 Converted Code", lines=14)
-        # ===== TEST =====
         with gr.TabItem("🧪 Test"):
             with gr.Row():
                 with gr.Column(scale=1):
-                    test_input = gr.Code(label="📋 Code to test", lines=14)
                     with gr.Row():
-                        test_lang = gr.Dropdown(LANGUAGES[:12], value="Python", label="Language", scale=2)
-                        test_framework = gr.Textbox(label="Framework", placeholder="e.g., pytest", scale=2)
-                    test_btn = gr.Button("🧪 Generate Tests", variant="primary", size="lg")
                 with gr.Column(scale=1):
-                    test_output = gr.Code(label="Generated Tests", lines=14)
-        # ===== DOCUMENT =====
         with gr.TabItem("📝 Document"):
             with gr.Row():
                 with gr.Column(scale=1):
-                    doc_input = gr.Code(label="📋 Code to document", lines=14)
                     with gr.Row():
-                        doc_lang = gr.Dropdown(LANGUAGES, value="Python", label="Language", scale=2)
-                        doc_style = gr.Dropdown(
-                            ["Docstrings", "Comments", "Both", "README"],
-                            value="Both", label="Style", scale=2
-                        )
-                    doc_btn = gr.Button("📝 Document", variant="primary", size="lg")
                 with gr.Column(scale=1):
-                    doc_output = gr.Code(label="Documented Code", lines=14)
-        # ===== OPTIMIZE =====
         with gr.TabItem("🚀 Optimize"):
             with gr.Row():
                 with gr.Column(scale=1):
-                    opt_input = gr.Code(label="📋 Code to optimize", lines=14)
                     with gr.Row():
-                        opt_lang = gr.Dropdown(LANGUAGES, value="Python", label="Language", scale=2)
-                        opt_focus = gr.Dropdown(
-                            ["All", "Performance", "Readability", "Memory"],
-                            value="All", label="Focus", scale=2
-                        )
-                    opt_btn = gr.Button("🚀 Optimize", variant="primary", size="lg")
                 with gr.Column(scale=1):
-                    opt_output = gr.Markdown(label="Optimized Code")
-        # ===== TOOLS =====
         with gr.TabItem("🛠️ Tools"):
-            # Regex Builder
             gr.Markdown("### 🎯 Regex Builder")
             with gr.Row():
-                with gr.Column(scale=1):
-                    regex_desc = gr.Textbox(
-                        label="Describe the pattern",
-                        placeholder="e.g., Match email addresses, validate phone numbers...",
-                        lines=3
-                    )
                     regex_btn = gr.Button("🎯 Build Regex", variant="primary")
-                with gr.Column(scale=1):
-                    regex_output = gr.Markdown(label="Regex Pattern")
-            gr.Markdown("---")
-            # API Builder
-            gr.Markdown("### 🔗 API Builder")
             with gr.Row():
-                with gr.Column(scale=1):
-                    api_desc = gr.Textbox(
-                        label="Describe the endpoint",
-                        placeholder="e.g., POST endpoint for user registration...",
-                        lines=3
-                    )
-                    api_framework = gr.Dropdown(
-                        ["FastAPI (Python)", "Express (Node.js)", "Gin (Go)", "Spring Boot (Java)", "Flask (Python)", "Django REST (Python)"],
-                        value="FastAPI (Python)", label="Framework"
-                    )
                     api_btn = gr.Button("🔗 Build API", variant="primary")
-                with gr.Column(scale=1):
-                    api_output = gr.Code(label="API Code", lines=14)
-    # Footer
-    gr.HTML("""
-    <div class="footer">
-        <p>🔒 Running 100% locally via Ollama • Your code never leaves your machine</p>
-        <p style="margin-top: 8px; opacity: 0.7;">Axon v6 • Built with ❤️</p>
-    </div>
-    """)
-    # ===== EVENT HANDLERS =====
     def respond(message, history, model, temp, tokens):
         history = history or []
-        for updated_history in chat_stream(message, history, model, temp, tokens):
-            yield updated_history, ""
     msg.submit(respond, [msg, chatbot, model_dropdown, temperature, max_tokens], [chatbot, msg])
     send.click(respond, [msg, chatbot, model_dropdown, temperature, max_tokens], [chatbot, msg])
     clear.click(lambda: [], None, chatbot)
-    transcribe_btn.click(transcribe_audio, audio_input, msg)
-    gen_btn.click(generate_code, [gen_prompt, gen_lang, model_dropdown, gen_temp, max_tokens], gen_output)
     explain_btn.click(explain_code, [explain_input, model_dropdown, explain_detail, max_tokens], explain_output)
     fix_btn.click(fix_code, [fix_input, fix_error, model_dropdown, max_tokens], fix_output)
     review_btn.click(review_code, [review_input, model_dropdown, max_tokens], review_output)
     convert_btn.click(convert_code, [convert_input, convert_from, convert_to, model_dropdown, max_tokens], convert_output)
-    test_btn.click(generate_tests, [test_input, test_lang, test_framework, model_dropdown, max_tokens], test_output)
     doc_btn.click(document_code, [doc_input, doc_lang, doc_style, model_dropdown, max_tokens], doc_output)
     opt_btn.click(optimize_code, [opt_input, opt_lang, opt_focus, model_dropdown, max_tokens], opt_output)
     regex_btn.click(build_regex, [regex_desc, model_dropdown, max_tokens], regex_output)
-    api_btn.click(build_api, [api_desc, api_framework, model_dropdown, max_tokens], api_output)
-# Launch with CSS (Gradio 6.0 way)
 demo.launch(server_name="0.0.0.0", server_port=7860)

 import requests
 import json
 import time
+import os
 from faster_whisper import WhisperModel
 OLLAMA_URL = "http://localhost:11434"
 MAX_RETRIES = 3
 TIMEOUT = 300
+# ===== PERFORMANCE SETTINGS =====
+OLLAMA_OPTIONS = {
+    "num_ctx": 4096,        # Context window (lower = faster, 4096 is good balance)
+    "num_batch": 512,       # Batch size for prompt processing
+    "num_thread": 4,        # CPU threads (adjust based on your CPU)
+    "repeat_penalty": 1.1,  # Prevent repetition
+    "top_k": 40,            # Top-K sampling (lower = faster)
+    "top_p": 0.9,           # Nucleus sampling
+}
+# Keep model loaded for 10 minutes (faster subsequent requests)
+KEEP_ALIVE = "10m"
 MODELS = {
     "⭐ Qwen2.5 Coder 7B (Best)": "qwen2.5-coder:7b",
     "🧠 DeepSeek Coder 6.7B (Logic)": "deepseek-coder:6.7b",
 init_whisper()
+# ===== PRELOAD DEFAULT MODEL =====
+def preload_model():
+    """Preload default model for faster first request"""
+    try:
+        print("🔥 Preloading default model...")
+        requests.post(
+            f"{OLLAMA_URL}/api/generate",
+            json={
+                "model": "qwen2.5-coder:3b",
+                "prompt": "Hi",
+                "keep_alive": KEEP_ALIVE,
+                "options": {"num_predict": 1}
+            },
+            timeout=60
+        )
+        print("✅ Model preloaded!")
+    except Exception as e:
+        print(f"⚠️ Preload failed: {e}")
 # ===== HELPER FUNCTIONS =====
 def check_ollama_health():
 def transcribe_audio(audio):
     if audio is None:
         return ""
     if whisper_model is None:
         return "❌ Whisper not loaded. Voice input unavailable."
     try:
         segments, _ = whisper_model.transcribe(audio)
         text = " ".join([seg.text for seg in segments]).strip()
 def call_ollama_with_retry(model_name, prompt, temperature=0.7, max_tokens=2048):
     if not check_ollama_health():
+        return "❌ **Ollama is not running.**\n\nPlease wait for Ollama to start."
     model = MODELS.get(model_name, "qwen2.5-coder:3b")
+    # Merge performance options with request options
+    options = {
+        **OLLAMA_OPTIONS,
+        "temperature": temperature,
+        "num_predict": max_tokens
+    }
     for attempt in range(MAX_RETRIES):
         try:
             r = requests.post(
                     "model": model,
                     "prompt": prompt,
                     "stream": False,
+                    "keep_alive": KEEP_ALIVE,
+                    "options": options
                 },
                 timeout=TIMEOUT
             )
             if r.status_code == 200:
                 response = r.json().get("response", "")
                 if not response.strip():
+                    return "⚠️ Model returned empty response. Try rephrasing."
                 return response
             elif r.status_code == 404:
+                return f"❌ **Model not found:** `{model}`"
             elif r.status_code == 500:
+                error_msg = r.text[:200] if r.text else "Unknown error"
                 if "out of memory" in error_msg.lower():
+                    return "❌ **Out of memory.** Try a smaller model."
                 return f"❌ **Server error:** {error_msg}"
             else:
                 return f"❌ **HTTP {r.status_code}:** {r.text[:100]}"
             if attempt < MAX_RETRIES - 1:
                 time.sleep(2)
                 continue
+            return "❌ **Timeout.** Try smaller model or shorter input."
         except requests.exceptions.ConnectionError:
             if attempt < MAX_RETRIES - 1:
                 time.sleep(2)
                 continue
+            return "❌ **Connection failed.** Ollama may have crashed."
         except json.JSONDecodeError:
+            return "❌ **Invalid response from Ollama.**"
         except Exception as e:
+            return f"❌ **Error:** {str(e)[:100]}"
+    return "❌ **Max retries reached.**"
 def extract_code(text):
     if not text or "```" not in text:
         return text
     try:
         parts = text.split("```")
         if len(parts) >= 2:
             if "\n" in code:
                 code = code.split("\n", 1)[-1]
             return code.strip()
+    except:
         pass
     return text
+# ===== STREAMING FUNCTIONS (FASTER FEEDBACK) =====
 def chat_stream(message, history, model_name, temperature, max_tokens):
     valid, error = validate_input(message, "Message")
         return
     if not check_ollama_health():
+        yield history + [[message, "❌ **Ollama not running.**"]]
         return
     model = MODELS.get(model_name, "qwen2.5-coder:3b")
+    messages = [{"role": "system", "content": "Expert coding assistant. Use markdown code blocks."}]
     for user_msg, assistant_msg in history:
         messages.append({"role": "user", "content": user_msg})
             messages.append({"role": "assistant", "content": assistant_msg})
     messages.append({"role": "user", "content": message})
+    options = {
+        **OLLAMA_OPTIONS,
+        "temperature": temperature,
+        "num_predict": max_tokens
+    }
     try:
         response = requests.post(
                 "model": model,
                 "messages": messages,
                 "stream": True,
+                "keep_alive": KEEP_ALIVE,
+                "options": options
             },
             stream=True,
             timeout=TIMEOUT
         )
         if response.status_code == 404:
+            yield history + [[message, f"❌ **Model not found:** `{model}`"]]
             return
         if response.status_code != 200:
+            yield history + [[message, f"❌ **Error {response.status_code}**"]]
             return
         full = ""
                 try:
                     data = json.loads(line)
                     if "error" in data:
+                        yield history + [[message, f"❌ **Error:** {data['error']}"]]
                         return
                     if "message" in data:
                         full += data["message"].get("content", "")
                     continue
         if not full.strip():
+            yield history + [[message, "⚠️ Empty response. Try rephrasing."]]
     except requests.exceptions.Timeout:
+        yield history + [[message, "❌ **Timeout.** Try smaller model."]]
     except requests.exceptions.ConnectionError:
+        yield history + [[message, "❌ **Connection lost.**"]]
     except Exception as e:
+        yield history + [[message, f"❌ **Error:** {str(e)[:50]}"]]
+# ===== STREAMING CODE GENERATION (NEW!) =====
+def generate_code_stream(prompt, language, model_name, temperature, max_tokens):
     valid, error = validate_input(prompt, "Description")
     if not valid:
+        yield error
+        return
+    if not check_ollama_health():
+        yield "❌ **Ollama not running.**"
+        return
+    model = MODELS.get(model_name, "qwen2.5-coder:3b")
+    # Shorter, optimized prompt
+    full_prompt = f"Write clean {language} code with comments for:\n{prompt}\n\nCode only:"
+    options = {
+        **OLLAMA_OPTIONS,
+        "temperature": temperature,
+        "num_predict": max_tokens
+    }
+    try:
+        response = requests.post(
+            f"{OLLAMA_URL}/api/generate",
+            json={
+                "model": model,
+                "prompt": full_prompt,
+                "stream": True,
+                "keep_alive": KEEP_ALIVE,
+                "options": options
+            },
+            stream=True,
+            timeout=TIMEOUT
+        )
+        if response.status_code != 200:
+            yield f"❌ **Error {response.status_code}**"
+            return
+        full = ""
+        for line in response.iter_lines():
+            if line:
+                try:
+                    data = json.loads(line)
+                    if "response" in data:
+                        full += data["response"]
+                        # Extract code as we stream
+                        yield extract_code(full)
+                except:
+                    continue
+    except requests.exceptions.Timeout:
+        yield "❌ **Timeout.**"
+    except Exception as e:
+        yield f"❌ **Error:** {str(e)[:50]}"
+# ===== OPTIMIZED CORE FUNCTIONS (SHORTER PROMPTS) =====
 def explain_code(code, model_name, detail_level, max_tokens):
     valid, error = validate_input(code, "Code")
     if not valid:
         return error
+    prompts = {
+        "Brief": f"Explain briefly (2-3 sentences):\n{code}",
+        "Normal": f"Explain this code:\n{code}",
+        "Detailed": f"Detailed explanation (purpose, logic, complexity, improvements):\n{code}"
     }
+    return call_ollama_with_retry(model_name, prompts.get(detail_level, prompts["Normal"]), 0.5, max_tokens)
 def fix_code(code, error_msg, model_name, max_tokens):
     valid, error = validate_input(code, "Code")
     if not valid:
         return error
+    err = error_msg.strip() if error_msg else "Not working"
+    prompt = f"Fix this code. Error: {err}\n\n{code}\n\nFixed code and explanation:"
     return call_ollama_with_retry(model_name, prompt, 0.3, max_tokens)
 def review_code(code, model_name, max_tokens):
     if not valid:
         return error
+    prompt = f"Review for bugs, performance, security, and improvements:\n{code}"
     return call_ollama_with_retry(model_name, prompt, 0.4, max_tokens)
 def convert_code(code, source_lang, target_lang, model_name, max_tokens):
     valid, error = validate_input(code, "Code")
     if not valid:
         return error
     if source_lang == target_lang:
+        return "⚠️ Same language selected."
+    prompt = f"Convert {source_lang} to {target_lang}. Output only code:\n{code}"
     result = call_ollama_with_retry(model_name, prompt, 0.3, max_tokens)
+    return result if result.startswith("❌") or result.startswith("⚠️") else extract_code(result)
 def generate_tests(code, language, framework, model_name, max_tokens):
     valid, error = validate_input(code, "Code")
     if not valid:
         return error
+    fw = framework.strip() if framework else "pytest" if language == "Python" else "Jest"
+    prompt = f"Generate {fw} tests for this {language} code. Output only test code:\n{code}"
     result = call_ollama_with_retry(model_name, prompt, 0.3, max_tokens)
+    return result if result.startswith("❌") or result.startswith("⚠️") else extract_code(result)
 def document_code(code, language, style, model_name, max_tokens):
     valid, error = validate_input(code, "Code")
     if not valid:
         return error
+    prompt = f"Add {style.lower()} to this {language} code:\n{code}"
     result = call_ollama_with_retry(model_name, prompt, 0.4, max_tokens)
+    return result if style == "README" or result.startswith("❌") else extract_code(result)
 def optimize_code(code, language, focus, model_name, max_tokens):
     valid, error = validate_input(code, "Code")
     if not valid:
         return error
+    prompt = f"Optimize for {focus.lower()}. Explain changes:\n{code}"
     return call_ollama_with_retry(model_name, prompt, 0.3, max_tokens)
 def build_regex(description, model_name, max_tokens):
     if not valid:
         return error
+    prompt = f"Create regex for: {description}\n\nPattern, explanation, examples, Python code:"
     return call_ollama_with_retry(model_name, prompt, 0.3, max_tokens)
 def build_api(description, framework, model_name, max_tokens):
     if not valid:
         return error
+    prompt = f"Create {framework} REST endpoint:\n{description}\n\nCode with validation and error handling:"
     result = call_ollama_with_retry(model_name, prompt, 0.3, max_tokens)
+    return result if result.startswith("❌") else extract_code(result)
+# ===== CSS =====
 css = """
 :root {
     --primary: #6366f1;
     --secondary: #8b5cf6;
     --bg-dark: #0f172a;
     --bg-card: #1e293b;
     --border: #334155;
+    --text-primary: #f1f5f9;
     --gradient: linear-gradient(135deg, #6366f1 0%, #8b5cf6 50%, #06b6d4 100%);
 }
+.gradio-container { max-width: 1500px !important; margin: auto !important; }
 .header-section {
     background: var(--gradient);
     border-radius: 20px;
     padding: 32px 40px;
     margin-bottom: 24px;
     box-shadow: 0 20px 40px rgba(99, 102, 241, 0.3);
 }
+.header-title { color: white; margin: 0; font-size: 2.5rem; font-weight: 800; }
+.header-subtitle { color: rgba(255,255,255,0.9); margin: 8px 0 0 0; }
+.header-badges { display: flex; gap: 10px; flex-wrap: wrap; margin-top: 16px; }
 .badge {
     background: rgba(255,255,255,0.2);
     padding: 8px 16px;
     border-radius: 50px;
     font-size: 0.85rem;
     color: white;
 }
 .tab-nav button.selected {
     background: var(--gradient) !important;
     color: white !important;
 }
+button.primary {
     background: var(--gradient) !important;
     border: none !important;
+    border-radius: 10px !important;
 }
+.footer { text-align: center; padding: 24px; color: #94a3b8; font-size: 0.85rem; }
 footer { display: none !important; }
 """
+# ===== UI =====
 with gr.Blocks(title="Axon v6") as demo:
     gr.HTML("""
     <div class="header-section">
+        <h1 class="header-title">🔥 Axon v6</h1>
+        <p class="header-subtitle">AI-Powered Coding Assistant • Optimized for Speed</p>
+        <div class="header-badges">
+            <span class="badge">🤖 7 Models</span>
+            <span class="badge">🛠️ 9 Tools</span>
+            <span class="badge">⚡ Optimized</span>
+            <span class="badge">🔒 100% Local</span>
         </div>
     </div>
     """)
+    status = gr.Markdown(value=get_status, every=5)
     with gr.Row():
+        model_dropdown = gr.Dropdown(choices=list(MODELS.keys()), value="Qwen2.5 Coder 3B (Fast)", label="🤖 Model", scale=3)
         temperature = gr.Slider(0, 1, value=0.7, step=0.1, label="🌡️ Creativity", scale=2)
         max_tokens = gr.Slider(256, 8192, value=2048, step=256, label="📏 Max Tokens", scale=2)
+    model_info = gr.Markdown(value="⚖️ Balanced • ~2GB • Great all-rounder")
+    model_dropdown.change(get_model_info, model_dropdown, model_info)
     with gr.Tabs():
         with gr.TabItem("💬 Chat"):
             chatbot = gr.Chatbot(height=500)
             with gr.Row():
+                msg = gr.Textbox(placeholder="Ask anything...", show_label=False, scale=8)
                 send = gr.Button("Send ➤", variant="primary", scale=1)
             with gr.Row():
+                audio = gr.Audio(sources=["microphone"], type="filepath", label="🎤", scale=2)
+                transcribe = gr.Button("🎤 Transcribe", scale=1)
                 clear = gr.Button("🗑️ Clear", scale=1)
+            with gr.Accordion("💡 Examples", open=False):
+                gr.Examples(["Write a Python quicksort", "Explain async/await in JS"], inputs=msg)
         with gr.TabItem("⚡ Generate"):
             with gr.Row():
                 with gr.Column(scale=1):
+                    gen_prompt = gr.Textbox(label="📝 Describe what to build", lines=4)
                     with gr.Row():
+                        gen_lang = gr.Dropdown(LANGUAGES, value="Python", label="Language", scale=2)
                         gen_temp = gr.Slider(0, 1, value=0.3, step=0.1, label="🌡️", scale=1)
+                    gen_btn = gr.Button("⚡ Generate (Streaming)", variant="primary")
                 with gr.Column(scale=2):
+                    gen_output = gr.Code(label="Code", language="python", lines=18)
         with gr.TabItem("🔍 Explain"):
             with gr.Row():
                 with gr.Column(scale=1):
+                    explain_input = gr.Code(label="📋 Code", lines=12)
+                    explain_detail = gr.Radio(["Brief", "Normal", "Detailed"], value="Normal", label="Detail")
+                    explain_btn = gr.Button("🔍 Explain", variant="primary")
                 with gr.Column(scale=1):
+                    explain_output = gr.Markdown()
         with gr.TabItem("🔧 Debug"):
             with gr.Row():
                 with gr.Column(scale=1):
+                    fix_input = gr.Code(label="🐛 Code", lines=10)
+                    fix_error = gr.Textbox(label="❌ Error", lines=2)
+                    fix_btn = gr.Button("🔧 Fix", variant="primary")
                 with gr.Column(scale=1):
+                    fix_output = gr.Markdown()
         with gr.TabItem("📋 Review"):
             with gr.Row():
                 with gr.Column(scale=1):
+                    review_input = gr.Code(label="📋 Code", lines=14)
+                    review_btn = gr.Button("📋 Review", variant="primary")
                 with gr.Column(scale=1):
+                    review_output = gr.Markdown()
         with gr.TabItem("🔄 Convert"):
             with gr.Row():
                 with gr.Column(scale=1):
+                    convert_input = gr.Code(label="📥 Source", lines=12)
                     with gr.Row():
+                        convert_from = gr.Dropdown(LANGUAGES, value="Python", label="From")
+                        convert_to = gr.Dropdown(LANGUAGES, value="JavaScript", label="To")
+                    convert_btn = gr.Button("🔄 Convert", variant="primary")
                 with gr.Column(scale=1):
+                    convert_output = gr.Code(label="📤 Result", lines=12)
         with gr.TabItem("🧪 Test"):
             with gr.Row():
                 with gr.Column(scale=1):
+                    test_input = gr.Code(label="📋 Code", lines=12)
                     with gr.Row():
+                        test_lang = gr.Dropdown(LANGUAGES[:12], value="Python", label="Language")
+                        test_fw = gr.Textbox(label="Framework", placeholder="pytest")
+                    test_btn = gr.Button("🧪 Generate Tests", variant="primary")
                 with gr.Column(scale=1):
+                    test_output = gr.Code(label="Tests", lines=12)
         with gr.TabItem("📝 Document"):
             with gr.Row():
                 with gr.Column(scale=1):
+                    doc_input = gr.Code(label="📋 Code", lines=12)
                     with gr.Row():
+                        doc_lang = gr.Dropdown(LANGUAGES, value="Python", label="Language")
+                        doc_style = gr.Dropdown(["Docstrings", "Comments", "Both", "README"], value="Both", label="Style")
+                    doc_btn = gr.Button("📝 Document", variant="primary")
                 with gr.Column(scale=1):
+                    doc_output = gr.Code(label="Documented", lines=12)
         with gr.TabItem("🚀 Optimize"):
             with gr.Row():
                 with gr.Column(scale=1):
+                    opt_input = gr.Code(label="📋 Code", lines=12)
                     with gr.Row():
+                        opt_lang = gr.Dropdown(LANGUAGES, value="Python", label="Language")
+                        opt_focus = gr.Dropdown(["All", "Performance", "Readability", "Memory"], value="All", label="Focus")
+                    opt_btn = gr.Button("🚀 Optimize", variant="primary")
                 with gr.Column(scale=1):
+                    opt_output = gr.Markdown()
         with gr.TabItem("🛠️ Tools"):
             gr.Markdown("### 🎯 Regex Builder")
             with gr.Row():
+                with gr.Column():
+                    regex_desc = gr.Textbox(label="Describe pattern", lines=2)
                     regex_btn = gr.Button("🎯 Build Regex", variant="primary")
+                with gr.Column():
+                    regex_output = gr.Markdown()
+            gr.Markdown("---\n### 🔗 API Builder")
             with gr.Row():
+                with gr.Column():
+                    api_desc = gr.Textbox(label="Describe endpoint", lines=2)
+                    api_fw = gr.Dropdown(["FastAPI", "Express", "Flask", "Gin"], value="FastAPI", label="Framework")
                     api_btn = gr.Button("🔗 Build API", variant="primary")
+                with gr.Column():
+                    api_output = gr.Code(label="API Code", lines=12)
+    gr.HTML('<div class="footer">🔒 100% Local • ⚡ Optimized for Speed • Built with ❤️</div>')
+    # Events
     def respond(message, history, model, temp, tokens):
         history = history or []
+        for updated in chat_stream(message, history, model, temp, tokens):
+            yield updated, ""
     msg.submit(respond, [msg, chatbot, model_dropdown, temperature, max_tokens], [chatbot, msg])
     send.click(respond, [msg, chatbot, model_dropdown, temperature, max_tokens], [chatbot, msg])
     clear.click(lambda: [], None, chatbot)
+    transcribe.click(transcribe_audio, audio, msg)
+    # Streaming generate!
+    gen_btn.click(generate_code_stream, [gen_prompt, gen_lang, model_dropdown, gen_temp, max_tokens], gen_output)
     explain_btn.click(explain_code, [explain_input, model_dropdown, explain_detail, max_tokens], explain_output)
     fix_btn.click(fix_code, [fix_input, fix_error, model_dropdown, max_tokens], fix_output)
     review_btn.click(review_code, [review_input, model_dropdown, max_tokens], review_output)
     convert_btn.click(convert_code, [convert_input, convert_from, convert_to, model_dropdown, max_tokens], convert_output)
+    test_btn.click(generate_tests, [test_input, test_lang, test_fw, model_dropdown, max_tokens], test_output)
     doc_btn.click(document_code, [doc_input, doc_lang, doc_style, model_dropdown, max_tokens], doc_output)
     opt_btn.click(optimize_code, [opt_input, opt_lang, opt_focus, model_dropdown, max_tokens], opt_output)
     regex_btn.click(build_regex, [regex_desc, model_dropdown, max_tokens], regex_output)
+    api_btn.click(build_api, [api_desc, api_fw, model_dropdown, max_tokens], api_output)
+# Preload model on startup
+preload_model()
 demo.launch(server_name="0.0.0.0", server_port=7860)