Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,11 +5,12 @@ from faster_whisper import WhisperModel
|
|
| 5 |
|
| 6 |
OLLAMA_URL = "http://localhost:11434"
|
| 7 |
|
|
|
|
| 8 |
MODELS = {
|
| 9 |
"Qwen2.5-Coder 1.5B (Fastest)": "qwen2.5-coder:1.5b",
|
| 10 |
"Qwen2.5-Coder 3B (Fast)": "qwen2.5-coder:3b",
|
| 11 |
"Qwen2.5-Coder 7B (Quality)": "qwen2.5-coder:7b",
|
| 12 |
-
"Qwen3-Coder 30B-A3B (Best)": "
|
| 13 |
}
|
| 14 |
|
| 15 |
print("Loading Whisper...")
|
|
@@ -23,6 +24,32 @@ def check_ollama():
|
|
| 23 |
except:
|
| 24 |
return False
|
| 25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
def transcribe_audio(audio):
|
| 27 |
if audio is None:
|
| 28 |
return ""
|
|
@@ -36,24 +63,24 @@ def chat_stream(message, history, model_name, temperature, max_tokens):
|
|
| 36 |
if not check_ollama():
|
| 37 |
yield "⏳ Ollama starting... wait 30 seconds and try again."
|
| 38 |
return
|
| 39 |
-
|
| 40 |
model = MODELS.get(model_name, "qwen2.5-coder:3b")
|
| 41 |
messages = [{"role": "system", "content": "You are an expert coding assistant. Always use markdown code blocks."}]
|
| 42 |
-
|
| 43 |
for user_msg, assistant_msg in history:
|
| 44 |
messages.append({"role": "user", "content": user_msg})
|
| 45 |
if assistant_msg:
|
| 46 |
messages.append({"role": "assistant", "content": assistant_msg})
|
| 47 |
-
|
| 48 |
messages.append({"role": "user", "content": message})
|
| 49 |
-
|
| 50 |
try:
|
| 51 |
response = requests.post(
|
| 52 |
f"{OLLAMA_URL}/api/chat",
|
| 53 |
json={"model": model, "messages": messages, "stream": True, "options": {"temperature": temperature, "num_predict": max_tokens}},
|
| 54 |
stream=True, timeout=300
|
| 55 |
)
|
| 56 |
-
|
| 57 |
full = ""
|
| 58 |
for line in response.iter_lines():
|
| 59 |
if line:
|
|
@@ -72,10 +99,10 @@ def generate_code(prompt, language, model_name, max_tokens):
|
|
| 72 |
return "Please describe what you want."
|
| 73 |
if not check_ollama():
|
| 74 |
return "⏳ Ollama starting..."
|
| 75 |
-
|
| 76 |
model = MODELS.get(model_name, "qwen2.5-coder:3b")
|
| 77 |
full_prompt = f"Write {language} code for: {prompt}\n\nOutput ONLY code in a markdown block."
|
| 78 |
-
|
| 79 |
try:
|
| 80 |
r = requests.post(
|
| 81 |
f"{OLLAMA_URL}/api/generate",
|
|
@@ -101,9 +128,9 @@ def explain_code(code, model_name, max_tokens):
|
|
| 101 |
return "Paste code to explain."
|
| 102 |
if not check_ollama():
|
| 103 |
return "⏳ Ollama starting..."
|
| 104 |
-
|
| 105 |
model = MODELS.get(model_name, "qwen2.5-coder:3b")
|
| 106 |
-
|
| 107 |
try:
|
| 108 |
r = requests.post(
|
| 109 |
f"{OLLAMA_URL}/api/generate",
|
|
@@ -119,10 +146,10 @@ def fix_code(code, error, model_name, max_tokens):
|
|
| 119 |
return "Paste code to fix."
|
| 120 |
if not check_ollama():
|
| 121 |
return "⏳ Ollama starting..."
|
| 122 |
-
|
| 123 |
model = MODELS.get(model_name, "qwen2.5-coder:3b")
|
| 124 |
prompt = f"Fix this code:\n```\n{code}\n```\nError: {error or 'Not working'}"
|
| 125 |
-
|
| 126 |
try:
|
| 127 |
r = requests.post(
|
| 128 |
f"{OLLAMA_URL}/api/generate",
|
|
@@ -134,14 +161,14 @@ def fix_code(code, error, model_name, max_tokens):
|
|
| 134 |
return f"Error: {e}"
|
| 135 |
|
| 136 |
with gr.Blocks(title="Axon v5.1", theme=gr.themes.Soft(primary_hue="purple")) as demo:
|
| 137 |
-
|
| 138 |
gr.Markdown("# 🔥 Axon v5.1\n**Ollama Edition** • Qwen2.5-Coder + Qwen3-Coder • No rate limits!")
|
| 139 |
-
|
| 140 |
with gr.Row():
|
| 141 |
model_dropdown = gr.Dropdown(choices=list(MODELS.keys()), value="Qwen2.5-Coder 3B (Fast)", label="🤖 Model")
|
| 142 |
temperature = gr.Slider(0, 1, value=0.7, step=0.1, label="🌡️ Temperature")
|
| 143 |
max_tokens = gr.Slider(256, 4096, value=2048, step=256, label="📏 Max Tokens")
|
| 144 |
-
|
| 145 |
with gr.Tabs():
|
| 146 |
with gr.TabItem("💬 Chat"):
|
| 147 |
chatbot = gr.Chatbot(height=400)
|
|
@@ -153,7 +180,7 @@ with gr.Blocks(title="Axon v5.1", theme=gr.themes.Soft(primary_hue="purple")) as
|
|
| 153 |
clear = gr.Button("Clear")
|
| 154 |
transcribe_btn = gr.Button("🎤 Transcribe", variant="secondary")
|
| 155 |
gr.Examples(["Write a Python quicksort function", "Explain async/await in JavaScript"], inputs=msg)
|
| 156 |
-
|
| 157 |
with gr.TabItem("⚡ Generate"):
|
| 158 |
with gr.Row():
|
| 159 |
with gr.Column():
|
|
@@ -161,13 +188,13 @@ with gr.Blocks(title="Axon v5.1", theme=gr.themes.Soft(primary_hue="purple")) as
|
|
| 161 |
gen_lang = gr.Dropdown(["Python", "JavaScript", "TypeScript", "Go", "Rust", "Java", "C++"], value="Python", label="Language")
|
| 162 |
gen_btn = gr.Button("Generate", variant="primary")
|
| 163 |
gen_output = gr.Code(label="Code", language="python", lines=15)
|
| 164 |
-
|
| 165 |
with gr.TabItem("🔍 Explain"):
|
| 166 |
with gr.Row():
|
| 167 |
explain_input = gr.Code(label="Paste code", lines=10)
|
| 168 |
explain_output = gr.Markdown()
|
| 169 |
-
|
| 170 |
-
|
| 171 |
with gr.TabItem("🔧 Fix"):
|
| 172 |
with gr.Row():
|
| 173 |
with gr.Column():
|
|
@@ -175,12 +202,12 @@ with gr.Blocks(title="Axon v5.1", theme=gr.themes.Soft(primary_hue="purple")) as
|
|
| 175 |
fix_error = gr.Textbox(label="Error message", lines=2)
|
| 176 |
fix_btn = gr.Button("Fix", variant="primary")
|
| 177 |
fix_output = gr.Markdown()
|
| 178 |
-
|
| 179 |
def respond(message, history, model, temp, tokens):
|
| 180 |
history = history or []
|
| 181 |
for chunk in chat_stream(message, history, model, temp, tokens):
|
| 182 |
yield history + [[message, chunk]], ""
|
| 183 |
-
|
| 184 |
msg.submit(respond, [msg, chatbot, model_dropdown, temperature, max_tokens], [chatbot, msg])
|
| 185 |
send.click(respond, [msg, chatbot, model_dropdown, temperature, max_tokens], [chatbot, msg])
|
| 186 |
clear.click(lambda: [], None, chatbot)
|
|
@@ -189,4 +216,4 @@ with gr.Blocks(title="Axon v5.1", theme=gr.themes.Soft(primary_hue="purple")) as
|
|
| 189 |
explain_btn.click(explain_code, [explain_input, model_dropdown, max_tokens], explain_output)
|
| 190 |
fix_btn.click(fix_code, [fix_input, fix_error, model_dropdown, max_tokens], fix_output)
|
| 191 |
|
| 192 |
-
demo.launch(server_name="0.0.0.0", server_port=7860)
|
|
|
|
| 5 |
|
| 6 |
OLLAMA_URL = "http://localhost:11434"
|
| 7 |
|
| 8 |
+
# UPDATED: Pointing to the working bartowski GGUF repo
|
| 9 |
MODELS = {
|
| 10 |
"Qwen2.5-Coder 1.5B (Fastest)": "qwen2.5-coder:1.5b",
|
| 11 |
"Qwen2.5-Coder 3B (Fast)": "qwen2.5-coder:3b",
|
| 12 |
"Qwen2.5-Coder 7B (Quality)": "qwen2.5-coder:7b",
|
| 13 |
+
"Qwen3-Coder 30B-A3B (Best)": "hf.co/bartowski/Qwen_Qwen3-30B-A3B-GGUF",
|
| 14 |
}
|
| 15 |
|
| 16 |
print("Loading Whisper...")
|
|
|
|
| 24 |
except:
|
| 25 |
return False
|
| 26 |
|
| 27 |
+
# NEW FUNCTION: Auto-download the model if missing
|
| 28 |
+
def ensure_model(model_name):
|
| 29 |
+
if not check_ollama():
|
| 30 |
+
print("❌ Ollama not running, skipping model download.")
|
| 31 |
+
return
|
| 32 |
+
|
| 33 |
+
print(f"🔎 Checking for model: {model_name}")
|
| 34 |
+
try:
|
| 35 |
+
# Check if model is already loaded
|
| 36 |
+
check = requests.post(f"{OLLAMA_URL}/api/show", json={"name": model_name})
|
| 37 |
+
if check.status_code == 200:
|
| 38 |
+
print(f"✅ {model_name} is ready!")
|
| 39 |
+
return
|
| 40 |
+
|
| 41 |
+
# If not, pull it
|
| 42 |
+
print(f"📥 Downloading {model_name}... (This may take a few minutes)")
|
| 43 |
+
with requests.post(f"{OLLAMA_URL}/api/pull", json={"name": model_name}, stream=True) as r:
|
| 44 |
+
for line in r.iter_lines():
|
| 45 |
+
pass
|
| 46 |
+
print(f"🎉 Download complete: {model_name}")
|
| 47 |
+
except Exception as e:
|
| 48 |
+
print(f"⚠️ Error pulling model: {e}")
|
| 49 |
+
|
| 50 |
+
# TRIGGER DOWNLOAD IMMEDIATELY
|
| 51 |
+
ensure_model("hf.co/bartowski/Qwen_Qwen3-30B-A3B-GGUF")
|
| 52 |
+
|
| 53 |
def transcribe_audio(audio):
|
| 54 |
if audio is None:
|
| 55 |
return ""
|
|
|
|
| 63 |
if not check_ollama():
|
| 64 |
yield "⏳ Ollama starting... wait 30 seconds and try again."
|
| 65 |
return
|
| 66 |
+
|
| 67 |
model = MODELS.get(model_name, "qwen2.5-coder:3b")
|
| 68 |
messages = [{"role": "system", "content": "You are an expert coding assistant. Always use markdown code blocks."}]
|
| 69 |
+
|
| 70 |
for user_msg, assistant_msg in history:
|
| 71 |
messages.append({"role": "user", "content": user_msg})
|
| 72 |
if assistant_msg:
|
| 73 |
messages.append({"role": "assistant", "content": assistant_msg})
|
| 74 |
+
|
| 75 |
messages.append({"role": "user", "content": message})
|
| 76 |
+
|
| 77 |
try:
|
| 78 |
response = requests.post(
|
| 79 |
f"{OLLAMA_URL}/api/chat",
|
| 80 |
json={"model": model, "messages": messages, "stream": True, "options": {"temperature": temperature, "num_predict": max_tokens}},
|
| 81 |
stream=True, timeout=300
|
| 82 |
)
|
| 83 |
+
|
| 84 |
full = ""
|
| 85 |
for line in response.iter_lines():
|
| 86 |
if line:
|
|
|
|
| 99 |
return "Please describe what you want."
|
| 100 |
if not check_ollama():
|
| 101 |
return "⏳ Ollama starting..."
|
| 102 |
+
|
| 103 |
model = MODELS.get(model_name, "qwen2.5-coder:3b")
|
| 104 |
full_prompt = f"Write {language} code for: {prompt}\n\nOutput ONLY code in a markdown block."
|
| 105 |
+
|
| 106 |
try:
|
| 107 |
r = requests.post(
|
| 108 |
f"{OLLAMA_URL}/api/generate",
|
|
|
|
| 128 |
return "Paste code to explain."
|
| 129 |
if not check_ollama():
|
| 130 |
return "⏳ Ollama starting..."
|
| 131 |
+
|
| 132 |
model = MODELS.get(model_name, "qwen2.5-coder:3b")
|
| 133 |
+
|
| 134 |
try:
|
| 135 |
r = requests.post(
|
| 136 |
f"{OLLAMA_URL}/api/generate",
|
|
|
|
| 146 |
return "Paste code to fix."
|
| 147 |
if not check_ollama():
|
| 148 |
return "⏳ Ollama starting..."
|
| 149 |
+
|
| 150 |
model = MODELS.get(model_name, "qwen2.5-coder:3b")
|
| 151 |
prompt = f"Fix this code:\n```\n{code}\n```\nError: {error or 'Not working'}"
|
| 152 |
+
|
| 153 |
try:
|
| 154 |
r = requests.post(
|
| 155 |
f"{OLLAMA_URL}/api/generate",
|
|
|
|
| 161 |
return f"Error: {e}"
|
| 162 |
|
| 163 |
with gr.Blocks(title="Axon v5.1", theme=gr.themes.Soft(primary_hue="purple")) as demo:
|
| 164 |
+
|
| 165 |
gr.Markdown("# 🔥 Axon v5.1\n**Ollama Edition** • Qwen2.5-Coder + Qwen3-Coder • No rate limits!")
|
| 166 |
+
|
| 167 |
with gr.Row():
|
| 168 |
model_dropdown = gr.Dropdown(choices=list(MODELS.keys()), value="Qwen2.5-Coder 3B (Fast)", label="🤖 Model")
|
| 169 |
temperature = gr.Slider(0, 1, value=0.7, step=0.1, label="🌡️ Temperature")
|
| 170 |
max_tokens = gr.Slider(256, 4096, value=2048, step=256, label="📏 Max Tokens")
|
| 171 |
+
|
| 172 |
with gr.Tabs():
|
| 173 |
with gr.TabItem("💬 Chat"):
|
| 174 |
chatbot = gr.Chatbot(height=400)
|
|
|
|
| 180 |
clear = gr.Button("Clear")
|
| 181 |
transcribe_btn = gr.Button("🎤 Transcribe", variant="secondary")
|
| 182 |
gr.Examples(["Write a Python quicksort function", "Explain async/await in JavaScript"], inputs=msg)
|
| 183 |
+
|
| 184 |
with gr.TabItem("⚡ Generate"):
|
| 185 |
with gr.Row():
|
| 186 |
with gr.Column():
|
|
|
|
| 188 |
gen_lang = gr.Dropdown(["Python", "JavaScript", "TypeScript", "Go", "Rust", "Java", "C++"], value="Python", label="Language")
|
| 189 |
gen_btn = gr.Button("Generate", variant="primary")
|
| 190 |
gen_output = gr.Code(label="Code", language="python", lines=15)
|
| 191 |
+
|
| 192 |
with gr.TabItem("🔍 Explain"):
|
| 193 |
with gr.Row():
|
| 194 |
explain_input = gr.Code(label="Paste code", lines=10)
|
| 195 |
explain_output = gr.Markdown()
|
| 196 |
+
explain_btn = gr.Button("Explain", variant="primary")
|
| 197 |
+
|
| 198 |
with gr.TabItem("🔧 Fix"):
|
| 199 |
with gr.Row():
|
| 200 |
with gr.Column():
|
|
|
|
| 202 |
fix_error = gr.Textbox(label="Error message", lines=2)
|
| 203 |
fix_btn = gr.Button("Fix", variant="primary")
|
| 204 |
fix_output = gr.Markdown()
|
| 205 |
+
|
| 206 |
def respond(message, history, model, temp, tokens):
|
| 207 |
history = history or []
|
| 208 |
for chunk in chat_stream(message, history, model, temp, tokens):
|
| 209 |
yield history + [[message, chunk]], ""
|
| 210 |
+
|
| 211 |
msg.submit(respond, [msg, chatbot, model_dropdown, temperature, max_tokens], [chatbot, msg])
|
| 212 |
send.click(respond, [msg, chatbot, model_dropdown, temperature, max_tokens], [chatbot, msg])
|
| 213 |
clear.click(lambda: [], None, chatbot)
|
|
|
|
| 216 |
explain_btn.click(explain_code, [explain_input, model_dropdown, max_tokens], explain_output)
|
| 217 |
fix_btn.click(fix_code, [fix_input, fix_error, model_dropdown, max_tokens], fix_output)
|
| 218 |
|
| 219 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|