mport os import gradio as gr import json import time from datetime import datetime from pathlib import Path from llama_cpp import Llama from faster_whisper import WhisperModel from huggingface_hub import hf_hub_download # Added for auto-download # ===== CONFIG ===== MODELS_DIR = "/data/models" MAX_TOKENS = 2048 CONTEXT_SIZE = 4096 MODEL_REPOS = { # 30B: Unsloth is the most reliable source for Qwen3 GGUFs currently "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M.gguf": "unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF", # 3B: Qwen actually has an official one, but bartowski is safer fallback "qwen2.5-coder-3b-instruct-q4_k_m.gguf": "Qwen/Qwen2.5-Coder-3B-Instruct-GGUF", # 7B: Official Qwen GGUF is often missing/broken. Bartowski is the go-to here. "qwen2.5-coder-7b-instruct-q4_k_m.gguf": "bartowski/Qwen2.5-Coder-7B-Instruct-GGUF", # 14B: Bartowski is recommended for consistency "qwen2.5-coder-14b-instruct-q4_k_m.gguf": "bartowski/Qwen2.5-Coder-14B-Instruct-GGUF", # DeepSeek: Definitely needs community repo "DeepSeek-Coder-V2-Lite-Instruct-Q4_K_M.gguf": "bartowski/DeepSeek-Coder-V2-Lite-Instruct-GGUF", # Tiny models "qwen2.5-coder-1.5b-instruct-q4_k_m.gguf": "Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF", "qwen2.5-coder-0.5b-instruct-q4_k_m.gguf": "Qwen/Qwen2.5-Coder-0.5B-Instruct-GGUF", } MODELS = { "โญ Qwen3 Coder 30B-A3B (Best)": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M.gguf", "๐ Qwen2.5 Coder 14B (Premium)": "qwen2.5-coder-14b-instruct-q4_k_m.gguf", "๐ง DeepSeek V2 Lite (Logic)": "DeepSeek-Coder-V2-Lite-Instruct-Q4_K_M.gguf", "โ๏ธ Qwen2.5 Coder 7B (Balanced)": "qwen2.5-coder-7b-instruct-q4_k_m.gguf", "๐ Qwen2.5 Coder 3B (Fast)": "qwen2.5-coder-3b-instruct-q4_k_m.gguf", "โก DeepSeek Coder 6.7B": "deepseek-coder-6.7b-instruct.Q4_K_M.gguf", "๐จ Qwen2.5 Coder 1.5B (Quick)": "qwen2.5-coder-1.5b-instruct-q4_k_m.gguf", "๐ฌ Qwen2.5 Coder 0.5B (Instant)": "qwen2.5-coder-0.5b-instruct-q4_k_m.gguf", } MODEL_INFO = { "โญ Qwen3 Coder 30B-A3B (Best)": "๐ Best quality โข MoE 30B/3B โข ~10GB", "๐ Qwen2.5 Coder 14B (Premium)": "๐ Premium โข ~8GB โข Complex tasks", "๐ง DeepSeek V2 Lite (Logic)": "๐ง MoE 16B โข ~9GB โข Algorithms", "โ๏ธ Qwen2.5 Coder 7B (Balanced)": "โ๏ธ Balanced โข ~4.5GB โข Recommended", "๐ Qwen2.5 Coder 3B (Fast)": "๐ Fast โข ~2GB โข Great all-rounder", "โก DeepSeek Coder 6.7B": "โก Logic focused โข ~4GB", "๐จ Qwen2.5 Coder 1.5B (Quick)": "๐จ Quick โข ~1GB โข Simple tasks", "๐ฌ Qwen2.5 Coder 0.5B (Instant)": "๐ฌ Instant โข ~0.3GB โข Lightning fast", } LANGUAGES = [ "Python", "JavaScript", "TypeScript", "Go", "Rust", "Java", "C++", "C#", "C", "PHP", "Ruby", "Swift", "Kotlin", "Scala", "R", "Julia", "Perl", "HTML/CSS", "SQL", "Bash", "PowerShell", "Lua" ] # ===== MODEL CACHE ===== loaded_models = {} current_model_name = None def load_model(model_name): global loaded_models, current_model_name if model_name == current_model_name and model_name in loaded_models: return loaded_models[model_name] if current_model_name and current_model_name != model_name: if current_model_name in loaded_models: del loaded_models[current_model_name] print(f"๐๏ธ Unloaded {current_model_name}") filename = MODELS.get(model_name) if not filename: return None model_path = os.path.join(MODELS_DIR, filename) # --- AUTO DOWNLOAD LOGIC --- if not os.path.exists(model_path): print(f"โฌ๏ธ Model not found. Attempting download for {filename}...") repo_id = MODEL_REPOS.get(filename, "Qwen/Qwen2.5-Coder-3B-Instruct-GGUF") # Default fallback try: hf_hub_download( repo_id=repo_id, filename=filename, local_dir=MODELS_DIR, local_dir_use_symlinks=False ) print("โ Download complete!") except Exception as e: print(f"โ Download failed: {e}") return None print(f"๐ฅ Loading {model_name}...") try: llm = Llama( model_path=model_path, n_ctx=CONTEXT_SIZE, n_threads=4, n_batch=512, verbose=False ) loaded_models[model_name] = llm current_model_name = model_name print(f"โ {model_name} loaded!") return llm except Exception as e: print(f"โ Failed to load: {e}") return None # ===== WHISPER ===== whisper_model = None def init_whisper(): global whisper_model try: print("Loading Whisper...") whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8") print("โ Whisper ready!") except Exception as e: print(f"โ Whisper failed: {e}") init_whisper() # ===== HELPERS ===== def get_status(): available = [name for name, file in MODELS.items() if os.path.exists(os.path.join(MODELS_DIR, file))] if current_model_name: short = current_model_name.split('(')[0].strip().split()[-1] return f"๐ข Ready โข {len(available)} models โข Active: {short}" return f"๐ก {len(available)} models available" def get_model_info(model_name): return MODEL_INFO.get(model_name, "") def validate_input(text, name="Input"): if not text or not text.strip(): return False, f"โ ๏ธ {name} cannot be empty." if len(text) > 50000: return False, f"โ ๏ธ {name} too long." return True, None def transcribe_audio(audio): if not audio: return "" if not whisper_model: return "โ Whisper unavailable." try: segments, _ = whisper_model.transcribe(audio) return " ".join([s.text for s in segments]).strip() or "โ ๏ธ No speech detected." except Exception as e: return f"โ {str(e)[:50]}" def generate_response(model_name, prompt, temperature=0.7, max_tokens=2048): llm = load_model(model_name) if not llm: return "โ **Model not available.**" try: if "deepseek" in model_name.lower(): formatted = f"### Instruction:\n{prompt}\n\n### Response:\n" stop_tokens = ["### Instruction:", "### Response:"] else: formatted = f"<|im_start|>system\nYou are an expert coding assistant.<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n" stop_tokens = ["<|im_end|>", "<|im_start|>"] output = llm( formatted, max_tokens=max_tokens, temperature=temperature, top_p=0.9, top_k=40, repeat_penalty=1.1, stop=stop_tokens, echo=False ) response = output["choices"][0]["text"].strip() return response if response else "โ ๏ธ Empty response." except Exception as e: return f"โ **Error:** {str(e)[:100]}" def extract_code(text): if not text or "```" not in text: return text try: parts = text.split("```") if len(parts) >= 2: code = parts[1] if "\n" in code: code = code.split("\n", 1)[-1] return code.strip() except: pass return text # ===== HISTORY FUNCTIONS ===== def export_chat_history(history): if not history: return None, "โ ๏ธ No chat history to export." export = { "exported_at": datetime.now().isoformat(), "tool": "Axon v6 Chat", "messages": history # Direct dump for Gradio 5 format } filename = f"/tmp/axon_chat_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json" with open(filename, "w") as f: json.dump(export, f, indent=2) return filename, f"โ Exported {len(history)} messages!" def export_code(code, language): if not code or not code.strip(): return None, "โ ๏ธ No code to export." ext_map = { "Python": "py", "JavaScript": "js", "TypeScript": "ts", "Go": "go", "Rust": "rs", "Java": "java", "C++": "cpp", "C#": "cs", "C": "c", "PHP": "php", "Ruby": "rb", "Swift": "swift", "Kotlin": "kt", "HTML/CSS": "html", "SQL": "sql", "Bash": "sh", "PowerShell": "ps1", "Lua": "lua" } ext = ext_map.get(language, "txt") filename = f"/tmp/axon_code_{datetime.now().strftime('%Y%m%d_%H%M%S')}.{ext}" with open(filename, "w") as f: f.write(code) return filename, f"โ Exported as .{ext}!" # ===== STREAMING (UPDATED FOR GRADIO 5) ===== def chat_stream(message, history, model_name, temperature, max_tokens): history = history or [] valid, error = validate_input(message, "Message") if not valid: history.append({"role": "user", "content": message}) history.append({"role": "assistant", "content": error}) yield history return llm = load_model(model_name) if not llm: history.append({"role": "user", "content": message}) history.append({"role": "assistant", "content": "โ Model not available."}) yield history return if "deepseek" in model_name.lower(): conv = "### Instruction:\nYou are an expert coding assistant. Use markdown code blocks.\n\n" for msg in history: if msg['role'] == 'user': conv += f"User: {msg['content']}\n" else: conv += f"Assistant: {msg['content']}\n\n" conv += f"User: {message}\n\n### Response:\n" stop_tokens = ["### Instruction:", "User:"] else: conv = "<|im_start|>system\nYou are an expert coding assistant. Use markdown code blocks.<|im_end|>\n" for msg in history: role = msg['role'] content = msg['content'] conv += f"<|im_start|>{role}\n{content}<|im_end|>\n" conv += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n" stop_tokens = ["<|im_end|>", "<|im_start|>"] history.append({"role": "user", "content": message}) history.append({"role": "assistant", "content": ""}) try: full = "" for chunk in llm(conv, max_tokens=max_tokens, temperature=temperature, top_p=0.9, stop=stop_tokens, stream=True): text_chunk = chunk["choices"][0]["text"] full += text_chunk history[-1]['content'] = full yield history except Exception as e: history[-1]['content'] = f"โ Error: {str(e)[:100]}" yield history def generate_stream(prompt, language, model_name, temperature, max_tokens): valid, error = validate_input(prompt, "Description") if not valid: yield error return llm = load_model(model_name) if not llm: yield "โ Model not available." return if "deepseek" in model_name.lower(): formatted = f"### Instruction:\nWrite clean {language} code with comments:\n{prompt}\n\nOutput only code:\n\n### Response:\n" stop_tokens = ["### Instruction:"] else: formatted = f"<|im_start|>system\nYou are an expert coder.<|im_end|>\n<|im_start|>user\nWrite clean {language} code with comments:\n{prompt}\n\nOutput only code:<|im_end|>\n<|im_start|>assistant\n" stop_tokens = ["<|im_end|>"] try: full = "" for chunk in llm(formatted, max_tokens=max_tokens, temperature=temperature, stop=stop_tokens, stream=True): full += chunk["choices"][0]["text"] yield extract_code(full) except Exception as e: yield f"โ {str(e)[:50]}" # ===== CORE FEATURES ===== def explain_code(code, model_name, detail, max_tokens): valid, err = validate_input(code, "Code") if not valid: return err prompts = { "Brief": f"Explain briefly (2-3 sentences):\n{code}", "Normal": f"Explain this code:\n{code}", "Detailed": f"Detailed explanation (purpose, logic, complexity, improvements):\n{code}" } return generate_response(model_name, prompts.get(detail, prompts["Normal"]), 0.5, max_tokens) def fix_code(code, error_msg, model_name, max_tokens): valid, err = validate_input(code, "Code") if not valid: return err e = error_msg.strip() if error_msg else "Not working" return generate_response(model_name, f"Fix this code. Error: {e}\n\n{code}\n\nFixed code and explanation:", 0.3, max_tokens) def review_code(code, model_name, max_tokens): valid, err = validate_input(code, "Code") if not valid: return err return generate_response(model_name, f"Review for bugs, performance, security:\n{code}", 0.4, max_tokens) def convert_code(code, from_lang, to_lang, model_name, max_tokens): valid, err = validate_input(code, "Code") if not valid: return err if from_lang == to_lang: return "โ ๏ธ Same language." result = generate_response(model_name, f"Convert {from_lang} to {to_lang}. Code only:\n{code}", 0.3, max_tokens) return result if result.startswith("โ") else extract_code(result) def generate_tests(code, language, framework, model_name, max_tokens): valid, err = validate_input(code, "Code") if not valid: return err fw = framework.strip() if framework else "pytest" result = generate_response(model_name, f"Generate {fw} tests for {language}. Code only:\n{code}", 0.3, max_tokens) return result if result.startswith("โ") else extract_code(result) def document_code(code, language, style, model_name, max_tokens): valid, err = validate_input(code, "Code") if not valid: return err result = generate_response(model_name, f"Add {style.lower()} to this {language} code:\n{code}", 0.4, max_tokens) return result if style == "README" or result.startswith("โ") else extract_code(result) def optimize_code(code, language, focus, model_name, max_tokens): valid, err = validate_input(code, "Code") if not valid: return err return generate_response(model_name, f"Optimize {language} for {focus.lower()}. Explain:\n{code}", 0.3, max_tokens) def security_scan(code, model_name, max_tokens): valid, err = validate_input(code, "Code") if not valid: return err prompt = """Security audit this code. Check for: 1. Injection vulnerabilities (SQL, XSS, Command) 2. Authentication issues 3. Data exposure 4. Input validation 5. Cryptography issues For each issue: Severity (๐ด๐ ๐ก๐ข), Location, Description, Fix. Code: """ + code return generate_response(model_name, prompt, 0.3, max_tokens) def analyze_complexity(code, model_name, max_tokens): valid, err = validate_input(code, "Code") if not valid: return err prompt = """Analyze time and space complexity: 1. Time Complexity (Big O) 2. Space Complexity (Big O) 3. Best/Average/Worst cases 4. Bottlenecks 5. Optimization suggestions Code: """ + code return generate_response(model_name, prompt, 0.4, max_tokens) def build_sql(description, db_type, model_name, max_tokens): valid, err = validate_input(description, "Description") if not valid: return err result = generate_response(model_name, f"Write optimized {db_type} SQL for:\n{description}\n\nSQL only:", 0.2, max_tokens) return result if result.startswith("โ") else extract_code(result) def build_shell(description, shell_type, model_name, max_tokens): valid, err = validate_input(description, "Description") if not valid: return err result = generate_response(model_name, f"Write {shell_type} command for:\n{description}\n\nCommand only:", 0.2, max_tokens) return result if result.startswith("โ") else extract_code(result) def code_diff(code1, code2, model_name, max_tokens): v1, e1 = validate_input(code1, "Code 1") v2, e2 = validate_input(code2, "Code 2") if not v1: return e1 if not v2: return e2 prompt = f"""Compare these code snippets: 1. Key differences 2. Functionality changes 3. Performance impact 4. Which is better and why === CODE 1 === {code1} === CODE 2 === {code2}""" return generate_response(model_name, prompt, 0.4, max_tokens) def generate_mock_data(schema, count, format_type, model_name, max_tokens): valid, err = validate_input(schema, "Schema") if not valid: return err result = generate_response(model_name, f"Generate {count} realistic mock entries as {format_type}:\n{schema}", 0.7, max_tokens) return result if result.startswith("โ") else extract_code(result) def interview_challenge(topic, difficulty, language, model_name, max_tokens): valid, err = validate_input(topic, "Topic") if not valid: return err prompt = f"""Create {difficulty} {language} interview challenge about {topic}. Include: 1. Problem statement 2. Examples (2-3) 3. Constraints 4. Hints 5. Solution with explanation""" return generate_response(model_name, prompt, 0.6, max_tokens) def to_pseudocode(code, output_type, model_name, max_tokens): valid, err = validate_input(code, "Code") if not valid: return err if output_type == "Pseudocode": prompt = f"Convert to pseudocode:\n{code}" else: prompt = f"Create Mermaid.js flowchart for:\n{code}" return generate_response(model_name, prompt, 0.3, max_tokens) def build_cron(description, model_name, max_tokens): valid, err = validate_input(description, "Description") if not valid: return err return generate_response(model_name, f"Create cron expression for: {description}\n\nInclude: expression, breakdown, next 5 runs", 0.2, max_tokens) def build_regex(description, model_name, max_tokens): valid, err = validate_input(description, "Description") if not valid: return err return generate_response(model_name, f"Create regex for: {description}\n\nPattern, explanation, examples, Python code:", 0.3, max_tokens) def build_api(description, framework, model_name, max_tokens): valid, err = validate_input(description, "Description") if not valid: return err result = generate_response(model_name, f"Create {framework} REST endpoint:\n{description}\n\nCode:", 0.3, max_tokens) return result if result.startswith("โ") else extract_code(result) def convert_data_format(data, from_fmt, to_fmt, model_name, max_tokens): valid, err = validate_input(data, "Data") if not valid: return err if from_fmt == to_fmt: return "โ ๏ธ Same format." result = generate_response(model_name, f"Convert {from_fmt} to {to_fmt}:\n{data}\n\nOutput only:", 0.1, max_tokens) return result if result.startswith("โ") else extract_code(result) # ===== THEME ===== light_theme = gr.themes.Soft( primary_hue="indigo", secondary_hue="blue", ) dark_theme = gr.themes.Soft( primary_hue="indigo", secondary_hue="blue", ).set( body_background_fill="#0f172a", body_background_fill_dark="#0f172a", block_background_fill="#1e293b", block_background_fill_dark="#1e293b", border_color_primary="#334155", border_color_primary_dark="#334155", ) # ===== UI ===== # FIX: Title and theme moved here with gr.Blocks(title="Axon v6", theme=dark_theme) as demo: # State for theme is_dark = gr.State(True) # Header gr.HTML("""
AI Coding Assistant โข 8 Models โข 19 Tools โข 100% Local
The ultimate free AI coding assistant - running 100% locally on your browser.
| Model | Size | Best For |
|---|---|---|
| โญ Qwen3 30B-A3B | ~10GB | Best quality (MoE) |
| ๐ Qwen2.5 14B | ~8GB | Premium tasks |
| ๐ง DeepSeek V2 Lite | ~9GB | Complex logic |
| โ๏ธ Qwen2.5 7B | ~4.5GB | Balanced |
| ๐ Qwen2.5 3B | ~2GB | Fast & capable |
| โก DeepSeek 6.7B | ~4GB | Algorithms |
| ๐จ Qwen2.5 1.5B | ~1GB | Quick tasks |
| ๐ฌ Qwen2.5 0.5B | ~0.3GB | Instant |
Core: Chat, Generate, Explain, Debug, Review
Advanced: Security, Complexity, Convert, Test, Document, Optimize, Diff, Pseudo, Interview
Builders: SQL, Shell, Cron, Regex, API
Data: Mock Data, Format Converter
๐ Pre-built Wheels โข ๐ฆ llama.cpp โข ๐ค Qwen Models
๐ฅ Axon v6 โข Built with โค๏ธ by AIencoder
Wheels: AIencoder/llama-cpp-wheels โข Powered by llama.cpp