Spaces:

AIencoder
/

Axon

Sleeping

App Files Files Community

AIencoder commited on Jan 25

Commit

6fce65f

verified ·

1 Parent(s): 6871109

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -72

app.py CHANGED Viewed

@@ -1,17 +1,15 @@
 import gradio as gr
 import requests
 import json
-import time
 from faster_whisper import WhisperModel
 OLLAMA_URL = "http://localhost:11434"
-# Pointing to the working bartowski GGUF repo
 MODELS = {
-    "Qwen2.5-Coder 1.5B (Fastest)": "qwen2.5-coder:1.5b",
     "Qwen2.5-Coder 3B (Fast)": "qwen2.5-coder:3b",
-    "Qwen2.5-Coder 7B (Quality)": "qwen2.5-coder:7b",
-    "Qwen3-Coder 30B-A3B (Best)" : "hf.co/bartowski/Qwen_Qwen3-30B-A3B-GGUF:Q4_K_M",
 }
 print("Loading Whisper...")
@@ -20,44 +18,11 @@ print("Whisper ready!")
 def check_ollama():
     try:
-        r = requests.get(f"{OLLAMA_URL}/api/tags", timeout=5)
         return r.status_code == 200
     except:
         return False
-# NEW FUNCTION: Robust startup that waits for Ollama
-def ensure_model(model_name):
-    print(f"⏳ Waiting for Ollama to start...")
-    # Wait up to 30 seconds for Ollama to be ready
-    retries = 0
-    while not check_ollama():
-        time.sleep(2)
-        retries += 1
-        if retries > 15:
-            print("❌ Ollama failed to start in time.")
-            return
-    print(f"🔎 Checking for model: {model_name}")
-    try:
-        # Check if model is already loaded
-        check = requests.post(f"{OLLAMA_URL}/api/show", json={"name": model_name})
-        if check.status_code == 200:
-            print(f"✅ {model_name} is ready!")
-            return
-        # If not, pull it
-        print(f"📥 Downloading {model_name}... (This WILL take time for 30B)")
-        with requests.post(f"{OLLAMA_URL}/api/pull", json={"name": model_name}, stream=True) as r:
-            for line in r.iter_lines():
-                pass
-        print(f"🎉 Download complete: {model_name}")
-    except Exception as e:
-        print(f"⚠️ Error pulling model: {e}")
-# TRIGGER DOWNLOAD IMMEDIATELY
-ensure_model("hf.co/bartowski/Qwen_Qwen3-30B-A3B-GGUF:Q4_K_M")
 def transcribe_audio(audio):
     if audio is None:
         return ""
@@ -68,10 +33,6 @@ def transcribe_audio(audio):
         return f"[STT Error: {e}]"
 def chat_stream(message, history, model_name, temperature, max_tokens):
-    if not check_ollama():
-        yield "⏳ Ollama starting... wait 30 seconds and try again."
-        return
     model = MODELS.get(model_name, "qwen2.5-coder:3b")
     messages = [{"role": "system", "content": "You are an expert coding assistant. Always use markdown code blocks."}]
@@ -103,11 +64,7 @@ def chat_stream(message, history, model_name, temperature, max_tokens):
         yield f"Error: {e}"
 def generate_code(prompt, language, model_name, max_tokens):
-    if not prompt.strip():
-        return "Please describe what you want."
-    if not check_ollama():
-        return "⏳ Ollama starting..."
     model = MODELS.get(model_name, "qwen2.5-coder:3b")
     full_prompt = f"Write {language} code for: {prompt}\n\nOutput ONLY code in a markdown block."
@@ -122,23 +79,15 @@ def generate_code(prompt, language, model_name, max_tokens):
             if "```" in result:
                 parts = result.split("```")
                 if len(parts) >= 2:
-                    code = parts[1]
-                    if "\n" in code:
-                        code = code.split("\n", 1)[-1]
-                    return code.strip()
             return result
         return f"Error: {r.text}"
     except Exception as e:
         return f"Error: {e}"
 def explain_code(code, model_name, max_tokens):
-    if not code.strip():
-        return "Paste code to explain."
-    if not check_ollama():
-        return "⏳ Ollama starting..."
     model = MODELS.get(model_name, "qwen2.5-coder:3b")
     try:
         r = requests.post(
             f"{OLLAMA_URL}/api/generate",
@@ -150,14 +99,9 @@ def explain_code(code, model_name, max_tokens):
         return f"Error: {e}"
 def fix_code(code, error, model_name, max_tokens):
-    if not code.strip():
-        return "Paste code to fix."
-    if not check_ollama():
-        return "⏳ Ollama starting..."
     model = MODELS.get(model_name, "qwen2.5-coder:3b")
     prompt = f"Fix this code:\n```\n{code}\n```\nError: {error or 'Not working'}"
     try:
         r = requests.post(
             f"{OLLAMA_URL}/api/generate",
@@ -169,13 +113,12 @@ def fix_code(code, error, model_name, max_tokens):
         return f"Error: {e}"
 with gr.Blocks(title="Axon v5.1", theme=gr.themes.Soft(primary_hue="purple")) as demo:
-    gr.Markdown("# 🔥 Axon v5.1\n**Ollama Edition** • Qwen2.5-Coder + Qwen3-Coder • No rate limits!")
     with gr.Row():
         model_dropdown = gr.Dropdown(choices=list(MODELS.keys()), value="Qwen2.5-Coder 3B (Fast)", label="🤖 Model")
-        temperature = gr.Slider(0, 1, value=0.7, step=0.1, label="🌡️ Temperature")
-        max_tokens = gr.Slider(256, 4096, value=2048, step=256, label="📏 Max Tokens")
     with gr.Tabs():
         with gr.TabItem("💬 Chat"):
@@ -187,13 +130,13 @@ with gr.Blocks(title="Axon v5.1", theme=gr.themes.Soft(primary_hue="purple")) as
             with gr.Row():
                 clear = gr.Button("Clear")
                 transcribe_btn = gr.Button("🎤 Transcribe", variant="secondary")
-            gr.Examples(["Write a Python quicksort function", "Explain async/await in JavaScript"], inputs=msg)
         with gr.TabItem("⚡ Generate"):
             with gr.Row():
                 with gr.Column():
-                    gen_prompt = gr.Textbox(label="Describe what you want", lines=3)
-                    gen_lang = gr.Dropdown(["Python", "JavaScript", "TypeScript", "Go", "Rust", "Java", "C++"], value="Python", label="Language")
                     gen_btn = gr.Button("Generate", variant="primary")
                 gen_output = gr.Code(label="Code", language="python", lines=15)

 import gradio as gr
 import requests
 import json
 from faster_whisper import WhisperModel
 OLLAMA_URL = "http://localhost:11434"
+# We just list the models here.
+# They are guaranteed to exist because entrypoint.sh downloaded them first!
 MODELS = {
     "Qwen2.5-Coder 3B (Fast)": "qwen2.5-coder:3b",
+    "Qwen3-Coder 30B-A3B (Best)": "hf.co/bartowski/Qwen_Qwen3-30B-A3B-GGUF:Q4_K_M",
 }
 print("Loading Whisper...")
 def check_ollama():
     try:
+        r = requests.get(f"{OLLAMA_URL}/api/tags", timeout=2)
         return r.status_code == 200
     except:
         return False
 def transcribe_audio(audio):
     if audio is None:
         return ""
         return f"[STT Error: {e}]"
 def chat_stream(message, history, model_name, temperature, max_tokens):
     model = MODELS.get(model_name, "qwen2.5-coder:3b")
     messages = [{"role": "system", "content": "You are an expert coding assistant. Always use markdown code blocks."}]
         yield f"Error: {e}"
 def generate_code(prompt, language, model_name, max_tokens):
+    if not prompt.strip(): return "Please describe what you want."
     model = MODELS.get(model_name, "qwen2.5-coder:3b")
     full_prompt = f"Write {language} code for: {prompt}\n\nOutput ONLY code in a markdown block."
             if "```" in result:
                 parts = result.split("```")
                 if len(parts) >= 2:
+                    return parts[1].split("\n", 1)[-1].strip() if "\n" in parts[1] else parts[1]
             return result
         return f"Error: {r.text}"
     except Exception as e:
         return f"Error: {e}"
 def explain_code(code, model_name, max_tokens):
+    if not code.strip(): return "Paste code to explain."
     model = MODELS.get(model_name, "qwen2.5-coder:3b")
     try:
         r = requests.post(
             f"{OLLAMA_URL}/api/generate",
         return f"Error: {e}"
 def fix_code(code, error, model_name, max_tokens):
+    if not code.strip(): return "Paste code to fix."
     model = MODELS.get(model_name, "qwen2.5-coder:3b")
     prompt = f"Fix this code:\n```\n{code}\n```\nError: {error or 'Not working'}"
     try:
         r = requests.post(
             f"{OLLAMA_URL}/api/generate",
         return f"Error: {e}"
 with gr.Blocks(title="Axon v5.1", theme=gr.themes.Soft(primary_hue="purple")) as demo:
+    gr.Markdown("# 🔥 Axon v5.1\n**Ollama Edition** • Qwen2.5-Coder + Qwen3-Coder")
     with gr.Row():
         model_dropdown = gr.Dropdown(choices=list(MODELS.keys()), value="Qwen2.5-Coder 3B (Fast)", label="🤖 Model")
+        temperature = gr.Slider(0, 1, value=0.7, step=0.1, label="Temp")
+        max_tokens = gr.Slider(256, 4096, value=2048, step=256, label="Max Tokens")
     with gr.Tabs():
         with gr.TabItem("💬 Chat"):
             with gr.Row():
                 clear = gr.Button("Clear")
                 transcribe_btn = gr.Button("🎤 Transcribe", variant="secondary")
+            gr.Examples(["Write a Python quicksort function"], inputs=msg)
         with gr.TabItem("⚡ Generate"):
             with gr.Row():
                 with gr.Column():
+                    gen_prompt = gr.Textbox(label="Describe request", lines=3)
+                    gen_lang = gr.Dropdown(["Python", "JavaScript", "Go", "Rust", "C++"], value="Python", label="Language")
                     gen_btn = gr.Button("Generate", variant="primary")
                 gen_output = gr.Code(label="Code", language="python", lines=15)