AIencoder commited on
Commit
6fce65f
·
verified ·
1 Parent(s): 6871109

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -72
app.py CHANGED
@@ -1,17 +1,15 @@
1
  import gradio as gr
2
  import requests
3
  import json
4
- import time
5
  from faster_whisper import WhisperModel
6
 
7
  OLLAMA_URL = "http://localhost:11434"
8
 
9
- # Pointing to the working bartowski GGUF repo
 
10
  MODELS = {
11
- "Qwen2.5-Coder 1.5B (Fastest)": "qwen2.5-coder:1.5b",
12
  "Qwen2.5-Coder 3B (Fast)": "qwen2.5-coder:3b",
13
- "Qwen2.5-Coder 7B (Quality)": "qwen2.5-coder:7b",
14
- "Qwen3-Coder 30B-A3B (Best)" : "hf.co/bartowski/Qwen_Qwen3-30B-A3B-GGUF:Q4_K_M",
15
  }
16
 
17
  print("Loading Whisper...")
@@ -20,44 +18,11 @@ print("Whisper ready!")
20
 
21
  def check_ollama():
22
  try:
23
- r = requests.get(f"{OLLAMA_URL}/api/tags", timeout=5)
24
  return r.status_code == 200
25
  except:
26
  return False
27
 
28
- # NEW FUNCTION: Robust startup that waits for Ollama
29
- def ensure_model(model_name):
30
- print(f"⏳ Waiting for Ollama to start...")
31
-
32
- # Wait up to 30 seconds for Ollama to be ready
33
- retries = 0
34
- while not check_ollama():
35
- time.sleep(2)
36
- retries += 1
37
- if retries > 15:
38
- print("❌ Ollama failed to start in time.")
39
- return
40
-
41
- print(f"🔎 Checking for model: {model_name}")
42
- try:
43
- # Check if model is already loaded
44
- check = requests.post(f"{OLLAMA_URL}/api/show", json={"name": model_name})
45
- if check.status_code == 200:
46
- print(f"✅ {model_name} is ready!")
47
- return
48
-
49
- # If not, pull it
50
- print(f"📥 Downloading {model_name}... (This WILL take time for 30B)")
51
- with requests.post(f"{OLLAMA_URL}/api/pull", json={"name": model_name}, stream=True) as r:
52
- for line in r.iter_lines():
53
- pass
54
- print(f"🎉 Download complete: {model_name}")
55
- except Exception as e:
56
- print(f"⚠️ Error pulling model: {e}")
57
-
58
- # TRIGGER DOWNLOAD IMMEDIATELY
59
- ensure_model("hf.co/bartowski/Qwen_Qwen3-30B-A3B-GGUF:Q4_K_M")
60
-
61
  def transcribe_audio(audio):
62
  if audio is None:
63
  return ""
@@ -68,10 +33,6 @@ def transcribe_audio(audio):
68
  return f"[STT Error: {e}]"
69
 
70
  def chat_stream(message, history, model_name, temperature, max_tokens):
71
- if not check_ollama():
72
- yield "⏳ Ollama starting... wait 30 seconds and try again."
73
- return
74
-
75
  model = MODELS.get(model_name, "qwen2.5-coder:3b")
76
  messages = [{"role": "system", "content": "You are an expert coding assistant. Always use markdown code blocks."}]
77
 
@@ -103,11 +64,7 @@ def chat_stream(message, history, model_name, temperature, max_tokens):
103
  yield f"Error: {e}"
104
 
105
  def generate_code(prompt, language, model_name, max_tokens):
106
- if not prompt.strip():
107
- return "Please describe what you want."
108
- if not check_ollama():
109
- return "⏳ Ollama starting..."
110
-
111
  model = MODELS.get(model_name, "qwen2.5-coder:3b")
112
  full_prompt = f"Write {language} code for: {prompt}\n\nOutput ONLY code in a markdown block."
113
 
@@ -122,23 +79,15 @@ def generate_code(prompt, language, model_name, max_tokens):
122
  if "```" in result:
123
  parts = result.split("```")
124
  if len(parts) >= 2:
125
- code = parts[1]
126
- if "\n" in code:
127
- code = code.split("\n", 1)[-1]
128
- return code.strip()
129
  return result
130
  return f"Error: {r.text}"
131
  except Exception as e:
132
  return f"Error: {e}"
133
 
134
  def explain_code(code, model_name, max_tokens):
135
- if not code.strip():
136
- return "Paste code to explain."
137
- if not check_ollama():
138
- return "⏳ Ollama starting..."
139
-
140
  model = MODELS.get(model_name, "qwen2.5-coder:3b")
141
-
142
  try:
143
  r = requests.post(
144
  f"{OLLAMA_URL}/api/generate",
@@ -150,14 +99,9 @@ def explain_code(code, model_name, max_tokens):
150
  return f"Error: {e}"
151
 
152
  def fix_code(code, error, model_name, max_tokens):
153
- if not code.strip():
154
- return "Paste code to fix."
155
- if not check_ollama():
156
- return "⏳ Ollama starting..."
157
-
158
  model = MODELS.get(model_name, "qwen2.5-coder:3b")
159
  prompt = f"Fix this code:\n```\n{code}\n```\nError: {error or 'Not working'}"
160
-
161
  try:
162
  r = requests.post(
163
  f"{OLLAMA_URL}/api/generate",
@@ -169,13 +113,12 @@ def fix_code(code, error, model_name, max_tokens):
169
  return f"Error: {e}"
170
 
171
  with gr.Blocks(title="Axon v5.1", theme=gr.themes.Soft(primary_hue="purple")) as demo:
172
-
173
- gr.Markdown("# 🔥 Axon v5.1\n**Ollama Edition** • Qwen2.5-Coder + Qwen3-Coder • No rate limits!")
174
-
175
  with gr.Row():
176
  model_dropdown = gr.Dropdown(choices=list(MODELS.keys()), value="Qwen2.5-Coder 3B (Fast)", label="🤖 Model")
177
- temperature = gr.Slider(0, 1, value=0.7, step=0.1, label="🌡️ Temperature")
178
- max_tokens = gr.Slider(256, 4096, value=2048, step=256, label="📏 Max Tokens")
179
 
180
  with gr.Tabs():
181
  with gr.TabItem("💬 Chat"):
@@ -187,13 +130,13 @@ with gr.Blocks(title="Axon v5.1", theme=gr.themes.Soft(primary_hue="purple")) as
187
  with gr.Row():
188
  clear = gr.Button("Clear")
189
  transcribe_btn = gr.Button("🎤 Transcribe", variant="secondary")
190
- gr.Examples(["Write a Python quicksort function", "Explain async/await in JavaScript"], inputs=msg)
191
 
192
  with gr.TabItem("⚡ Generate"):
193
  with gr.Row():
194
  with gr.Column():
195
- gen_prompt = gr.Textbox(label="Describe what you want", lines=3)
196
- gen_lang = gr.Dropdown(["Python", "JavaScript", "TypeScript", "Go", "Rust", "Java", "C++"], value="Python", label="Language")
197
  gen_btn = gr.Button("Generate", variant="primary")
198
  gen_output = gr.Code(label="Code", language="python", lines=15)
199
 
 
1
  import gradio as gr
2
  import requests
3
  import json
 
4
  from faster_whisper import WhisperModel
5
 
6
  OLLAMA_URL = "http://localhost:11434"
7
 
8
+ # We just list the models here.
9
+ # They are guaranteed to exist because entrypoint.sh downloaded them first!
10
  MODELS = {
 
11
  "Qwen2.5-Coder 3B (Fast)": "qwen2.5-coder:3b",
12
+ "Qwen3-Coder 30B-A3B (Best)": "hf.co/bartowski/Qwen_Qwen3-30B-A3B-GGUF:Q4_K_M",
 
13
  }
14
 
15
  print("Loading Whisper...")
 
18
 
19
  def check_ollama():
20
  try:
21
+ r = requests.get(f"{OLLAMA_URL}/api/tags", timeout=2)
22
  return r.status_code == 200
23
  except:
24
  return False
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  def transcribe_audio(audio):
27
  if audio is None:
28
  return ""
 
33
  return f"[STT Error: {e}]"
34
 
35
  def chat_stream(message, history, model_name, temperature, max_tokens):
 
 
 
 
36
  model = MODELS.get(model_name, "qwen2.5-coder:3b")
37
  messages = [{"role": "system", "content": "You are an expert coding assistant. Always use markdown code blocks."}]
38
 
 
64
  yield f"Error: {e}"
65
 
66
  def generate_code(prompt, language, model_name, max_tokens):
67
+ if not prompt.strip(): return "Please describe what you want."
 
 
 
 
68
  model = MODELS.get(model_name, "qwen2.5-coder:3b")
69
  full_prompt = f"Write {language} code for: {prompt}\n\nOutput ONLY code in a markdown block."
70
 
 
79
  if "```" in result:
80
  parts = result.split("```")
81
  if len(parts) >= 2:
82
+ return parts[1].split("\n", 1)[-1].strip() if "\n" in parts[1] else parts[1]
 
 
 
83
  return result
84
  return f"Error: {r.text}"
85
  except Exception as e:
86
  return f"Error: {e}"
87
 
88
  def explain_code(code, model_name, max_tokens):
89
+ if not code.strip(): return "Paste code to explain."
 
 
 
 
90
  model = MODELS.get(model_name, "qwen2.5-coder:3b")
 
91
  try:
92
  r = requests.post(
93
  f"{OLLAMA_URL}/api/generate",
 
99
  return f"Error: {e}"
100
 
101
  def fix_code(code, error, model_name, max_tokens):
102
+ if not code.strip(): return "Paste code to fix."
 
 
 
 
103
  model = MODELS.get(model_name, "qwen2.5-coder:3b")
104
  prompt = f"Fix this code:\n```\n{code}\n```\nError: {error or 'Not working'}"
 
105
  try:
106
  r = requests.post(
107
  f"{OLLAMA_URL}/api/generate",
 
113
  return f"Error: {e}"
114
 
115
  with gr.Blocks(title="Axon v5.1", theme=gr.themes.Soft(primary_hue="purple")) as demo:
116
+ gr.Markdown("# 🔥 Axon v5.1\n**Ollama Edition** • Qwen2.5-Coder + Qwen3-Coder")
117
+
 
118
  with gr.Row():
119
  model_dropdown = gr.Dropdown(choices=list(MODELS.keys()), value="Qwen2.5-Coder 3B (Fast)", label="🤖 Model")
120
+ temperature = gr.Slider(0, 1, value=0.7, step=0.1, label="Temp")
121
+ max_tokens = gr.Slider(256, 4096, value=2048, step=256, label="Max Tokens")
122
 
123
  with gr.Tabs():
124
  with gr.TabItem("💬 Chat"):
 
130
  with gr.Row():
131
  clear = gr.Button("Clear")
132
  transcribe_btn = gr.Button("🎤 Transcribe", variant="secondary")
133
+ gr.Examples(["Write a Python quicksort function"], inputs=msg)
134
 
135
  with gr.TabItem("⚡ Generate"):
136
  with gr.Row():
137
  with gr.Column():
138
+ gen_prompt = gr.Textbox(label="Describe request", lines=3)
139
+ gen_lang = gr.Dropdown(["Python", "JavaScript", "Go", "Rust", "C++"], value="Python", label="Language")
140
  gen_btn = gr.Button("Generate", variant="primary")
141
  gen_output = gr.Code(label="Code", language="python", lines=15)
142