AIencoder commited on
Commit
97c05be
·
verified ·
1 Parent(s): 1c4e80f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -22
app.py CHANGED
@@ -5,11 +5,12 @@ from faster_whisper import WhisperModel
5
 
6
  OLLAMA_URL = "http://localhost:11434"
7
 
 
8
  MODELS = {
9
  "Qwen2.5-Coder 1.5B (Fastest)": "qwen2.5-coder:1.5b",
10
  "Qwen2.5-Coder 3B (Fast)": "qwen2.5-coder:3b",
11
  "Qwen2.5-Coder 7B (Quality)": "qwen2.5-coder:7b",
12
- "Qwen3-Coder 30B-A3B (Best)": "qwen3-coder-30b-a3b",
13
  }
14
 
15
  print("Loading Whisper...")
@@ -23,6 +24,32 @@ def check_ollama():
23
  except:
24
  return False
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  def transcribe_audio(audio):
27
  if audio is None:
28
  return ""
@@ -36,24 +63,24 @@ def chat_stream(message, history, model_name, temperature, max_tokens):
36
  if not check_ollama():
37
  yield "⏳ Ollama starting... wait 30 seconds and try again."
38
  return
39
-
40
  model = MODELS.get(model_name, "qwen2.5-coder:3b")
41
  messages = [{"role": "system", "content": "You are an expert coding assistant. Always use markdown code blocks."}]
42
-
43
  for user_msg, assistant_msg in history:
44
  messages.append({"role": "user", "content": user_msg})
45
  if assistant_msg:
46
  messages.append({"role": "assistant", "content": assistant_msg})
47
-
48
  messages.append({"role": "user", "content": message})
49
-
50
  try:
51
  response = requests.post(
52
  f"{OLLAMA_URL}/api/chat",
53
  json={"model": model, "messages": messages, "stream": True, "options": {"temperature": temperature, "num_predict": max_tokens}},
54
  stream=True, timeout=300
55
  )
56
-
57
  full = ""
58
  for line in response.iter_lines():
59
  if line:
@@ -72,10 +99,10 @@ def generate_code(prompt, language, model_name, max_tokens):
72
  return "Please describe what you want."
73
  if not check_ollama():
74
  return "⏳ Ollama starting..."
75
-
76
  model = MODELS.get(model_name, "qwen2.5-coder:3b")
77
  full_prompt = f"Write {language} code for: {prompt}\n\nOutput ONLY code in a markdown block."
78
-
79
  try:
80
  r = requests.post(
81
  f"{OLLAMA_URL}/api/generate",
@@ -101,9 +128,9 @@ def explain_code(code, model_name, max_tokens):
101
  return "Paste code to explain."
102
  if not check_ollama():
103
  return "⏳ Ollama starting..."
104
-
105
  model = MODELS.get(model_name, "qwen2.5-coder:3b")
106
-
107
  try:
108
  r = requests.post(
109
  f"{OLLAMA_URL}/api/generate",
@@ -119,10 +146,10 @@ def fix_code(code, error, model_name, max_tokens):
119
  return "Paste code to fix."
120
  if not check_ollama():
121
  return "⏳ Ollama starting..."
122
-
123
  model = MODELS.get(model_name, "qwen2.5-coder:3b")
124
  prompt = f"Fix this code:\n```\n{code}\n```\nError: {error or 'Not working'}"
125
-
126
  try:
127
  r = requests.post(
128
  f"{OLLAMA_URL}/api/generate",
@@ -134,14 +161,14 @@ def fix_code(code, error, model_name, max_tokens):
134
  return f"Error: {e}"
135
 
136
  with gr.Blocks(title="Axon v5.1", theme=gr.themes.Soft(primary_hue="purple")) as demo:
137
-
138
  gr.Markdown("# 🔥 Axon v5.1\n**Ollama Edition** • Qwen2.5-Coder + Qwen3-Coder • No rate limits!")
139
-
140
  with gr.Row():
141
  model_dropdown = gr.Dropdown(choices=list(MODELS.keys()), value="Qwen2.5-Coder 3B (Fast)", label="🤖 Model")
142
  temperature = gr.Slider(0, 1, value=0.7, step=0.1, label="🌡️ Temperature")
143
  max_tokens = gr.Slider(256, 4096, value=2048, step=256, label="📏 Max Tokens")
144
-
145
  with gr.Tabs():
146
  with gr.TabItem("💬 Chat"):
147
  chatbot = gr.Chatbot(height=400)
@@ -153,7 +180,7 @@ with gr.Blocks(title="Axon v5.1", theme=gr.themes.Soft(primary_hue="purple")) as
153
  clear = gr.Button("Clear")
154
  transcribe_btn = gr.Button("🎤 Transcribe", variant="secondary")
155
  gr.Examples(["Write a Python quicksort function", "Explain async/await in JavaScript"], inputs=msg)
156
-
157
  with gr.TabItem("⚡ Generate"):
158
  with gr.Row():
159
  with gr.Column():
@@ -161,13 +188,13 @@ with gr.Blocks(title="Axon v5.1", theme=gr.themes.Soft(primary_hue="purple")) as
161
  gen_lang = gr.Dropdown(["Python", "JavaScript", "TypeScript", "Go", "Rust", "Java", "C++"], value="Python", label="Language")
162
  gen_btn = gr.Button("Generate", variant="primary")
163
  gen_output = gr.Code(label="Code", language="python", lines=15)
164
-
165
  with gr.TabItem("🔍 Explain"):
166
  with gr.Row():
167
  explain_input = gr.Code(label="Paste code", lines=10)
168
  explain_output = gr.Markdown()
169
- explain_btn = gr.Button("Explain", variant="primary")
170
-
171
  with gr.TabItem("🔧 Fix"):
172
  with gr.Row():
173
  with gr.Column():
@@ -175,12 +202,12 @@ with gr.Blocks(title="Axon v5.1", theme=gr.themes.Soft(primary_hue="purple")) as
175
  fix_error = gr.Textbox(label="Error message", lines=2)
176
  fix_btn = gr.Button("Fix", variant="primary")
177
  fix_output = gr.Markdown()
178
-
179
  def respond(message, history, model, temp, tokens):
180
  history = history or []
181
  for chunk in chat_stream(message, history, model, temp, tokens):
182
  yield history + [[message, chunk]], ""
183
-
184
  msg.submit(respond, [msg, chatbot, model_dropdown, temperature, max_tokens], [chatbot, msg])
185
  send.click(respond, [msg, chatbot, model_dropdown, temperature, max_tokens], [chatbot, msg])
186
  clear.click(lambda: [], None, chatbot)
@@ -189,4 +216,4 @@ with gr.Blocks(title="Axon v5.1", theme=gr.themes.Soft(primary_hue="purple")) as
189
  explain_btn.click(explain_code, [explain_input, model_dropdown, max_tokens], explain_output)
190
  fix_btn.click(fix_code, [fix_input, fix_error, model_dropdown, max_tokens], fix_output)
191
 
192
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
5
 
6
  OLLAMA_URL = "http://localhost:11434"
7
 
8
+ # UPDATED: Pointing to the working bartowski GGUF repo
9
  MODELS = {
10
  "Qwen2.5-Coder 1.5B (Fastest)": "qwen2.5-coder:1.5b",
11
  "Qwen2.5-Coder 3B (Fast)": "qwen2.5-coder:3b",
12
  "Qwen2.5-Coder 7B (Quality)": "qwen2.5-coder:7b",
13
+ "Qwen3-Coder 30B-A3B (Best)": "hf.co/bartowski/Qwen_Qwen3-30B-A3B-GGUF",
14
  }
15
 
16
  print("Loading Whisper...")
 
24
  except:
25
  return False
26
 
27
+ # NEW FUNCTION: Auto-download the model if missing
28
+ def ensure_model(model_name):
29
+ if not check_ollama():
30
+ print("❌ Ollama not running, skipping model download.")
31
+ return
32
+
33
+ print(f"🔎 Checking for model: {model_name}")
34
+ try:
35
+ # Check if model is already loaded
36
+ check = requests.post(f"{OLLAMA_URL}/api/show", json={"name": model_name})
37
+ if check.status_code == 200:
38
+ print(f"✅ {model_name} is ready!")
39
+ return
40
+
41
+ # If not, pull it
42
+ print(f"📥 Downloading {model_name}... (This may take a few minutes)")
43
+ with requests.post(f"{OLLAMA_URL}/api/pull", json={"name": model_name}, stream=True) as r:
44
+ for line in r.iter_lines():
45
+ pass
46
+ print(f"🎉 Download complete: {model_name}")
47
+ except Exception as e:
48
+ print(f"⚠️ Error pulling model: {e}")
49
+
50
+ # TRIGGER DOWNLOAD IMMEDIATELY
51
+ ensure_model("hf.co/bartowski/Qwen_Qwen3-30B-A3B-GGUF")
52
+
53
  def transcribe_audio(audio):
54
  if audio is None:
55
  return ""
 
63
  if not check_ollama():
64
  yield "⏳ Ollama starting... wait 30 seconds and try again."
65
  return
66
+
67
  model = MODELS.get(model_name, "qwen2.5-coder:3b")
68
  messages = [{"role": "system", "content": "You are an expert coding assistant. Always use markdown code blocks."}]
69
+
70
  for user_msg, assistant_msg in history:
71
  messages.append({"role": "user", "content": user_msg})
72
  if assistant_msg:
73
  messages.append({"role": "assistant", "content": assistant_msg})
74
+
75
  messages.append({"role": "user", "content": message})
76
+
77
  try:
78
  response = requests.post(
79
  f"{OLLAMA_URL}/api/chat",
80
  json={"model": model, "messages": messages, "stream": True, "options": {"temperature": temperature, "num_predict": max_tokens}},
81
  stream=True, timeout=300
82
  )
83
+
84
  full = ""
85
  for line in response.iter_lines():
86
  if line:
 
99
  return "Please describe what you want."
100
  if not check_ollama():
101
  return "⏳ Ollama starting..."
102
+
103
  model = MODELS.get(model_name, "qwen2.5-coder:3b")
104
  full_prompt = f"Write {language} code for: {prompt}\n\nOutput ONLY code in a markdown block."
105
+
106
  try:
107
  r = requests.post(
108
  f"{OLLAMA_URL}/api/generate",
 
128
  return "Paste code to explain."
129
  if not check_ollama():
130
  return "⏳ Ollama starting..."
131
+
132
  model = MODELS.get(model_name, "qwen2.5-coder:3b")
133
+
134
  try:
135
  r = requests.post(
136
  f"{OLLAMA_URL}/api/generate",
 
146
  return "Paste code to fix."
147
  if not check_ollama():
148
  return "⏳ Ollama starting..."
149
+
150
  model = MODELS.get(model_name, "qwen2.5-coder:3b")
151
  prompt = f"Fix this code:\n```\n{code}\n```\nError: {error or 'Not working'}"
152
+
153
  try:
154
  r = requests.post(
155
  f"{OLLAMA_URL}/api/generate",
 
161
  return f"Error: {e}"
162
 
163
  with gr.Blocks(title="Axon v5.1", theme=gr.themes.Soft(primary_hue="purple")) as demo:
164
+
165
  gr.Markdown("# 🔥 Axon v5.1\n**Ollama Edition** • Qwen2.5-Coder + Qwen3-Coder • No rate limits!")
166
+
167
  with gr.Row():
168
  model_dropdown = gr.Dropdown(choices=list(MODELS.keys()), value="Qwen2.5-Coder 3B (Fast)", label="🤖 Model")
169
  temperature = gr.Slider(0, 1, value=0.7, step=0.1, label="🌡️ Temperature")
170
  max_tokens = gr.Slider(256, 4096, value=2048, step=256, label="📏 Max Tokens")
171
+
172
  with gr.Tabs():
173
  with gr.TabItem("💬 Chat"):
174
  chatbot = gr.Chatbot(height=400)
 
180
  clear = gr.Button("Clear")
181
  transcribe_btn = gr.Button("🎤 Transcribe", variant="secondary")
182
  gr.Examples(["Write a Python quicksort function", "Explain async/await in JavaScript"], inputs=msg)
183
+
184
  with gr.TabItem("⚡ Generate"):
185
  with gr.Row():
186
  with gr.Column():
 
188
  gen_lang = gr.Dropdown(["Python", "JavaScript", "TypeScript", "Go", "Rust", "Java", "C++"], value="Python", label="Language")
189
  gen_btn = gr.Button("Generate", variant="primary")
190
  gen_output = gr.Code(label="Code", language="python", lines=15)
191
+
192
  with gr.TabItem("🔍 Explain"):
193
  with gr.Row():
194
  explain_input = gr.Code(label="Paste code", lines=10)
195
  explain_output = gr.Markdown()
196
+ explain_btn = gr.Button("Explain", variant="primary")
197
+
198
  with gr.TabItem("🔧 Fix"):
199
  with gr.Row():
200
  with gr.Column():
 
202
  fix_error = gr.Textbox(label="Error message", lines=2)
203
  fix_btn = gr.Button("Fix", variant="primary")
204
  fix_output = gr.Markdown()
205
+
206
  def respond(message, history, model, temp, tokens):
207
  history = history or []
208
  for chunk in chat_stream(message, history, model, temp, tokens):
209
  yield history + [[message, chunk]], ""
210
+
211
  msg.submit(respond, [msg, chatbot, model_dropdown, temperature, max_tokens], [chatbot, msg])
212
  send.click(respond, [msg, chatbot, model_dropdown, temperature, max_tokens], [chatbot, msg])
213
  clear.click(lambda: [], None, chatbot)
 
216
  explain_btn.click(explain_code, [explain_input, model_dropdown, max_tokens], explain_output)
217
  fix_btn.click(fix_code, [fix_input, fix_error, model_dropdown, max_tokens], fix_output)
218
 
219
+ demo.launch(server_name="0.0.0.0", server_port=7860)