Spaces:

AIencoder
/

Axon

Sleeping

App Files Files Community

AIencoder commited on Jan 25

Commit

ed77a98

verified ·

1 Parent(s): 3a61070

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -13

app.py CHANGED Viewed

@@ -6,12 +6,20 @@ from datetime import datetime
 from pathlib import Path
 from llama_cpp import Llama
 from faster_whisper import WhisperModel
 # ===== CONFIG =====
 MODELS_DIR = "/data/models"
 MAX_TOKENS = 2048
 CONTEXT_SIZE = 4096
 MODELS = {
     "⭐ Qwen3 Coder 30B-A3B (Best)": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M.gguf",
     "🏆 Qwen2.5 Coder 14B (Premium)": "qwen2.5-coder-14b-instruct-q4_k_m.gguf",
@@ -60,9 +68,22 @@ def load_model(model_name):
         return None
     model_path = os.path.join(MODELS_DIR, filename)
     if not os.path.exists(model_path):
-        print(f"❌ Model not found: {model_path}")
-        return None
     print(f"📥 Loading {model_name}...")
     try:
@@ -208,12 +229,10 @@ def export_code(code, language):
 # ===== STREAMING (UPDATED FOR GRADIO 5) =====
 def chat_stream(message, history, model_name, temperature, max_tokens):
-    # Initialize history if None (Gradio 5 sometimes sends None on first load)
     history = history or []
     valid, error = validate_input(message, "Message")
     if not valid:
-        # Append error as assistant message
         history.append({"role": "user", "content": message})
         history.append({"role": "assistant", "content": error})
         yield history
@@ -226,7 +245,6 @@ def chat_stream(message, history, model_name, temperature, max_tokens):
         yield history
         return
-    # Build conversation string from Dict history
     if "deepseek" in model_name.lower():
         conv = "### Instruction:\nYou are an expert coding assistant. Use markdown code blocks.\n\n"
         for msg in history:
@@ -245,9 +263,7 @@ def chat_stream(message, history, model_name, temperature, max_tokens):
         conv += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
         stop_tokens = ["<|im_end|>", "<|im_start|>"]
-    # Add the new user message to history
     history.append({"role": "user", "content": message})
-    # Add a placeholder for the assistant response
     history.append({"role": "assistant", "content": ""})
     try:
@@ -255,7 +271,6 @@ def chat_stream(message, history, model_name, temperature, max_tokens):
         for chunk in llm(conv, max_tokens=max_tokens, temperature=temperature, top_p=0.9, stop=stop_tokens, stream=True):
             text_chunk = chunk["choices"][0]["text"]
             full += text_chunk
-            # Update the last message (assistant's response)
             history[-1]['content'] = full
             yield history
     except Exception as e:
@@ -490,8 +505,8 @@ dark_theme = gr.themes.Soft(
 # ===== UI =====
-# NOTE: Theme and title moved to launch() for Gradio 5+ compatibility
-with gr.Blocks() as demo:
     # State for theme
     is_dark = gr.State(True)
@@ -623,7 +638,7 @@ with gr.Blocks() as demo:
         # ===== EXPLAIN =====
         with gr.TabItem("🔍 Explain"):
             with gr.Row():
-                with gr.Column():
                     explain_input = gr.Code(label="Code", lines=10)
                     explain_detail = gr.Radio(["Brief", "Normal", "Detailed"], value="Normal")
                     explain_btn = gr.Button("🔍 Explain", variant="primary")
@@ -879,5 +894,5 @@ with gr.Blocks() as demo:
 print("🔥 Preloading model...")
 load_model("🚀 Qwen2.5 Coder 3B (Fast)")
-# Theme and title are now passed here to avoid the UserWarning
-demo.launch(server_name="0.0.0.0", server_port=7860, theme=dark_theme, title="Axon v6")

 from pathlib import Path
 from llama_cpp import Llama
 from faster_whisper import WhisperModel
+from huggingface_hub import hf_hub_download  # Added for auto-download
 # ===== CONFIG =====
 MODELS_DIR = "/data/models"
 MAX_TOKENS = 2048
 CONTEXT_SIZE = 4096
+# Map models to their likely GGUF repositories (Falling back to Qwen for the 3B)
+MODEL_REPOS = {
+    "qwen2.5-coder-3b-instruct-q4_k_m.gguf": "Qwen/Qwen2.5-Coder-3B-Instruct-GGUF",
+    "qwen2.5-coder-7b-instruct-q4_k_m.gguf": "Qwen/Qwen2.5-Coder-7B-Instruct-GGUF",
+    # You can add others here
+}
 MODELS = {
     "⭐ Qwen3 Coder 30B-A3B (Best)": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M.gguf",
     "🏆 Qwen2.5 Coder 14B (Premium)": "qwen2.5-coder-14b-instruct-q4_k_m.gguf",
         return None
     model_path = os.path.join(MODELS_DIR, filename)
+    # --- AUTO DOWNLOAD LOGIC ---
     if not os.path.exists(model_path):
+        print(f"⬇️ Model not found. Attempting download for {filename}...")
+        repo_id = MODEL_REPOS.get(filename, "Qwen/Qwen2.5-Coder-3B-Instruct-GGUF") # Default fallback
+        try:
+            hf_hub_download(
+                repo_id=repo_id,
+                filename=filename,
+                local_dir=MODELS_DIR,
+                local_dir_use_symlinks=False
+            )
+            print("✅ Download complete!")
+        except Exception as e:
+            print(f"❌ Download failed: {e}")
+            return None
     print(f"📥 Loading {model_name}...")
     try:
 # ===== STREAMING (UPDATED FOR GRADIO 5) =====
 def chat_stream(message, history, model_name, temperature, max_tokens):
     history = history or []
     valid, error = validate_input(message, "Message")
     if not valid:
         history.append({"role": "user", "content": message})
         history.append({"role": "assistant", "content": error})
         yield history
         yield history
         return
     if "deepseek" in model_name.lower():
         conv = "### Instruction:\nYou are an expert coding assistant. Use markdown code blocks.\n\n"
         for msg in history:
         conv += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
         stop_tokens = ["<|im_end|>", "<|im_start|>"]
     history.append({"role": "user", "content": message})
     history.append({"role": "assistant", "content": ""})
     try:
         for chunk in llm(conv, max_tokens=max_tokens, temperature=temperature, top_p=0.9, stop=stop_tokens, stream=True):
             text_chunk = chunk["choices"][0]["text"]
             full += text_chunk
             history[-1]['content'] = full
             yield history
     except Exception as e:
 # ===== UI =====
+# FIX: Title and theme moved here
+with gr.Blocks(title="Axon v6", theme=dark_theme) as demo:
     # State for theme
     is_dark = gr.State(True)
         # ===== EXPLAIN =====
         with gr.TabItem("🔍 Explain"):
             with gr.Row():
+                with gr.Column(): # FIXED: used to be Column()
                     explain_input = gr.Code(label="Code", lines=10)
                     explain_detail = gr.Radio(["Brief", "Normal", "Detailed"], value="Normal")
                     explain_btn = gr.Button("🔍 Explain", variant="primary")
 print("🔥 Preloading model...")
 load_model("🚀 Qwen2.5 Coder 3B (Fast)")
+# Launch (Removed 'title' and 'theme', they are in Blocks)
+demo.launch(server_name="0.0.0.0", server_port=7860)