Spaces:

AIencoder
/

Axon

Running

App Files Files Community

AIencoder commited on Jan 25

Commit

97c05be

verified ·

1 Parent(s): 1c4e80f

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -22

app.py CHANGED Viewed

@@ -5,11 +5,12 @@ from faster_whisper import WhisperModel
 OLLAMA_URL = "http://localhost:11434"
 MODELS = {
     "Qwen2.5-Coder 1.5B (Fastest)": "qwen2.5-coder:1.5b",
     "Qwen2.5-Coder 3B (Fast)": "qwen2.5-coder:3b",
     "Qwen2.5-Coder 7B (Quality)": "qwen2.5-coder:7b",
-    "Qwen3-Coder 30B-A3B (Best)": "qwen3-coder-30b-a3b",
 }
 print("Loading Whisper...")
@@ -23,6 +24,32 @@ def check_ollama():
     except:
         return False
 def transcribe_audio(audio):
     if audio is None:
         return ""
@@ -36,24 +63,24 @@ def chat_stream(message, history, model_name, temperature, max_tokens):
     if not check_ollama():
         yield "⏳ Ollama starting... wait 30 seconds and try again."
         return
     model = MODELS.get(model_name, "qwen2.5-coder:3b")
     messages = [{"role": "system", "content": "You are an expert coding assistant. Always use markdown code blocks."}]
     for user_msg, assistant_msg in history:
         messages.append({"role": "user", "content": user_msg})
         if assistant_msg:
             messages.append({"role": "assistant", "content": assistant_msg})
     messages.append({"role": "user", "content": message})
     try:
         response = requests.post(
             f"{OLLAMA_URL}/api/chat",
             json={"model": model, "messages": messages, "stream": True, "options": {"temperature": temperature, "num_predict": max_tokens}},
             stream=True, timeout=300
         )
         full = ""
         for line in response.iter_lines():
             if line:
@@ -72,10 +99,10 @@ def generate_code(prompt, language, model_name, max_tokens):
         return "Please describe what you want."
     if not check_ollama():
         return "⏳ Ollama starting..."
     model = MODELS.get(model_name, "qwen2.5-coder:3b")
     full_prompt = f"Write {language} code for: {prompt}\n\nOutput ONLY code in a markdown block."
     try:
         r = requests.post(
             f"{OLLAMA_URL}/api/generate",
@@ -101,9 +128,9 @@ def explain_code(code, model_name, max_tokens):
         return "Paste code to explain."
     if not check_ollama():
         return "⏳ Ollama starting..."
     model = MODELS.get(model_name, "qwen2.5-coder:3b")
     try:
         r = requests.post(
             f"{OLLAMA_URL}/api/generate",
@@ -119,10 +146,10 @@ def fix_code(code, error, model_name, max_tokens):
         return "Paste code to fix."
     if not check_ollama():
         return "⏳ Ollama starting..."
     model = MODELS.get(model_name, "qwen2.5-coder:3b")
     prompt = f"Fix this code:\n```\n{code}\n```\nError: {error or 'Not working'}"
     try:
         r = requests.post(
             f"{OLLAMA_URL}/api/generate",
@@ -134,14 +161,14 @@ def fix_code(code, error, model_name, max_tokens):
         return f"Error: {e}"
 with gr.Blocks(title="Axon v5.1", theme=gr.themes.Soft(primary_hue="purple")) as demo:
     gr.Markdown("# 🔥 Axon v5.1\n**Ollama Edition** • Qwen2.5-Coder + Qwen3-Coder • No rate limits!")
     with gr.Row():
         model_dropdown = gr.Dropdown(choices=list(MODELS.keys()), value="Qwen2.5-Coder 3B (Fast)", label="🤖 Model")
         temperature = gr.Slider(0, 1, value=0.7, step=0.1, label="🌡️ Temperature")
         max_tokens = gr.Slider(256, 4096, value=2048, step=256, label="📏 Max Tokens")
     with gr.Tabs():
         with gr.TabItem("💬 Chat"):
             chatbot = gr.Chatbot(height=400)
@@ -153,7 +180,7 @@ with gr.Blocks(title="Axon v5.1", theme=gr.themes.Soft(primary_hue="purple")) as
                 clear = gr.Button("Clear")
                 transcribe_btn = gr.Button("🎤 Transcribe", variant="secondary")
             gr.Examples(["Write a Python quicksort function", "Explain async/await in JavaScript"], inputs=msg)
         with gr.TabItem("⚡ Generate"):
             with gr.Row():
                 with gr.Column():
@@ -161,13 +188,13 @@ with gr.Blocks(title="Axon v5.1", theme=gr.themes.Soft(primary_hue="purple")) as
                     gen_lang = gr.Dropdown(["Python", "JavaScript", "TypeScript", "Go", "Rust", "Java", "C++"], value="Python", label="Language")
                     gen_btn = gr.Button("Generate", variant="primary")
                 gen_output = gr.Code(label="Code", language="python", lines=15)
         with gr.TabItem("🔍 Explain"):
             with gr.Row():
                 explain_input = gr.Code(label="Paste code", lines=10)
                 explain_output = gr.Markdown()
-            explain_btn = gr.Button("Explain", variant="primary")
         with gr.TabItem("🔧 Fix"):
             with gr.Row():
                 with gr.Column():
@@ -175,12 +202,12 @@ with gr.Blocks(title="Axon v5.1", theme=gr.themes.Soft(primary_hue="purple")) as
                     fix_error = gr.Textbox(label="Error message", lines=2)
                     fix_btn = gr.Button("Fix", variant="primary")
                 fix_output = gr.Markdown()
     def respond(message, history, model, temp, tokens):
         history = history or []
         for chunk in chat_stream(message, history, model, temp, tokens):
             yield history + [[message, chunk]], ""
     msg.submit(respond, [msg, chatbot, model_dropdown, temperature, max_tokens], [chatbot, msg])
     send.click(respond, [msg, chatbot, model_dropdown, temperature, max_tokens], [chatbot, msg])
     clear.click(lambda: [], None, chatbot)
@@ -189,4 +216,4 @@ with gr.Blocks(title="Axon v5.1", theme=gr.themes.Soft(primary_hue="purple")) as
     explain_btn.click(explain_code, [explain_input, model_dropdown, max_tokens], explain_output)
     fix_btn.click(fix_code, [fix_input, fix_error, model_dropdown, max_tokens], fix_output)
-demo.launch(server_name="0.0.0.0", server_port=7860)

 OLLAMA_URL = "http://localhost:11434"
+# UPDATED: Pointing to the working bartowski GGUF repo
 MODELS = {
     "Qwen2.5-Coder 1.5B (Fastest)": "qwen2.5-coder:1.5b",
     "Qwen2.5-Coder 3B (Fast)": "qwen2.5-coder:3b",
     "Qwen2.5-Coder 7B (Quality)": "qwen2.5-coder:7b",
+    "Qwen3-Coder 30B-A3B (Best)": "hf.co/bartowski/Qwen_Qwen3-30B-A3B-GGUF",
 }
 print("Loading Whisper...")
     except:
         return False
+# NEW FUNCTION: Auto-download the model if missing
+def ensure_model(model_name):
+    if not check_ollama():
+        print("❌ Ollama not running, skipping model download.")
+        return
+    print(f"🔎 Checking for model: {model_name}")
+    try:
+        # Check if model is already loaded
+        check = requests.post(f"{OLLAMA_URL}/api/show", json={"name": model_name})
+        if check.status_code == 200:
+            print(f"✅ {model_name} is ready!")
+            return
+        # If not, pull it
+        print(f"📥 Downloading {model_name}... (This may take a few minutes)")
+        with requests.post(f"{OLLAMA_URL}/api/pull", json={"name": model_name}, stream=True) as r:
+            for line in r.iter_lines():
+                pass
+        print(f"🎉 Download complete: {model_name}")
+    except Exception as e:
+        print(f"⚠️ Error pulling model: {e}")
+# TRIGGER DOWNLOAD IMMEDIATELY
+ensure_model("hf.co/bartowski/Qwen_Qwen3-30B-A3B-GGUF")
 def transcribe_audio(audio):
     if audio is None:
         return ""
     if not check_ollama():
         yield "⏳ Ollama starting... wait 30 seconds and try again."
         return
     model = MODELS.get(model_name, "qwen2.5-coder:3b")
     messages = [{"role": "system", "content": "You are an expert coding assistant. Always use markdown code blocks."}]
     for user_msg, assistant_msg in history:
         messages.append({"role": "user", "content": user_msg})
         if assistant_msg:
             messages.append({"role": "assistant", "content": assistant_msg})
     messages.append({"role": "user", "content": message})
     try:
         response = requests.post(
             f"{OLLAMA_URL}/api/chat",
             json={"model": model, "messages": messages, "stream": True, "options": {"temperature": temperature, "num_predict": max_tokens}},
             stream=True, timeout=300
         )
         full = ""
         for line in response.iter_lines():
             if line:
         return "Please describe what you want."
     if not check_ollama():
         return "⏳ Ollama starting..."
     model = MODELS.get(model_name, "qwen2.5-coder:3b")
     full_prompt = f"Write {language} code for: {prompt}\n\nOutput ONLY code in a markdown block."
     try:
         r = requests.post(
             f"{OLLAMA_URL}/api/generate",
         return "Paste code to explain."
     if not check_ollama():
         return "⏳ Ollama starting..."
     model = MODELS.get(model_name, "qwen2.5-coder:3b")
     try:
         r = requests.post(
             f"{OLLAMA_URL}/api/generate",
         return "Paste code to fix."
     if not check_ollama():
         return "⏳ Ollama starting..."
     model = MODELS.get(model_name, "qwen2.5-coder:3b")
     prompt = f"Fix this code:\n```\n{code}\n```\nError: {error or 'Not working'}"
     try:
         r = requests.post(
             f"{OLLAMA_URL}/api/generate",
         return f"Error: {e}"
 with gr.Blocks(title="Axon v5.1", theme=gr.themes.Soft(primary_hue="purple")) as demo:
     gr.Markdown("# 🔥 Axon v5.1\n**Ollama Edition** • Qwen2.5-Coder + Qwen3-Coder • No rate limits!")
     with gr.Row():
         model_dropdown = gr.Dropdown(choices=list(MODELS.keys()), value="Qwen2.5-Coder 3B (Fast)", label="🤖 Model")
         temperature = gr.Slider(0, 1, value=0.7, step=0.1, label="🌡️ Temperature")
         max_tokens = gr.Slider(256, 4096, value=2048, step=256, label="📏 Max Tokens")
     with gr.Tabs():
         with gr.TabItem("💬 Chat"):
             chatbot = gr.Chatbot(height=400)
                 clear = gr.Button("Clear")
                 transcribe_btn = gr.Button("🎤 Transcribe", variant="secondary")
             gr.Examples(["Write a Python quicksort function", "Explain async/await in JavaScript"], inputs=msg)
         with gr.TabItem("⚡ Generate"):
             with gr.Row():
                 with gr.Column():
                     gen_lang = gr.Dropdown(["Python", "JavaScript", "TypeScript", "Go", "Rust", "Java", "C++"], value="Python", label="Language")
                     gen_btn = gr.Button("Generate", variant="primary")
                 gen_output = gr.Code(label="Code", language="python", lines=15)
         with gr.TabItem("🔍 Explain"):
             with gr.Row():
                 explain_input = gr.Code(label="Paste code", lines=10)
                 explain_output = gr.Markdown()
+                explain_btn = gr.Button("Explain", variant="primary")
         with gr.TabItem("🔧 Fix"):
             with gr.Row():
                 with gr.Column():
                     fix_error = gr.Textbox(label="Error message", lines=2)
                     fix_btn = gr.Button("Fix", variant="primary")
                 fix_output = gr.Markdown()
     def respond(message, history, model, temp, tokens):
         history = history or []
         for chunk in chat_stream(message, history, model, temp, tokens):
             yield history + [[message, chunk]], ""
     msg.submit(respond, [msg, chatbot, model_dropdown, temperature, max_tokens], [chatbot, msg])
     send.click(respond, [msg, chatbot, model_dropdown, temperature, max_tokens], [chatbot, msg])
     clear.click(lambda: [], None, chatbot)
     explain_btn.click(explain_code, [explain_input, model_dropdown, max_tokens], explain_output)
     fix_btn.click(fix_code, [fix_input, fix_error, model_dropdown, max_tokens], fix_output)
+    demo.launch(server_name="0.0.0.0", server_port=7860)