AIencoder commited on
Commit
e5c493a
·
verified ·
1 Parent(s): a2c0e36

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -85
app.py CHANGED
@@ -1,54 +1,27 @@
1
  import gradio as gr
2
- from ctransformers import AutoModelForCausalLM
3
- from huggingface_hub import hf_hub_download
4
  from faster_whisper import WhisperModel
5
 
 
 
6
  MODELS = {
7
- "Qwen2.5-Coder 3B (Fast)": {
8
- "repo": "Qwen/Qwen2.5-Coder-3B-Instruct-GGUF",
9
- "file": "qwen2.5-coder-3b-instruct-q4_k_m.gguf",
10
- "type": "qwen2"
11
- },
12
- "Qwen2.5-Coder 7B (Quality)": {
13
- "repo": "Qwen/Qwen2.5-Coder-7B-Instruct-GGUF",
14
- "file": "qwen2.5-coder-7b-instruct-q4_k_m.gguf",
15
- "type": "qwen2"
16
- },
17
- "Qwen3-Coder 30B-A3B (Best)": {
18
- "repo": "Qwen/Qwen3-Coder-30B-A3B-Instruct-GGUF",
19
- "file": "qwen3-coder-30b-a3b-instruct-q4_k_m.gguf",
20
- "type": "qwen2"
21
- },
22
  }
23
 
24
- loaded_models = {}
25
-
26
  print("Loading Whisper...")
27
  whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8")
28
  print("Whisper ready!")
29
 
30
- def get_model(model_name):
31
- if model_name in loaded_models:
32
- return loaded_models[model_name]
33
-
34
- info = MODELS.get(model_name)
35
- if not info:
36
- return None
37
-
38
- print(f"Downloading {model_name}...")
39
- path = hf_hub_download(repo_id=info["repo"], filename=info["file"])
40
-
41
- print(f"Loading {model_name}...")
42
- llm = AutoModelForCausalLM.from_pretrained(
43
- path,
44
- model_type=info["type"],
45
- context_length=4096,
46
- threads=4
47
- )
48
- loaded_models[model_name] = llm
49
- print(f"{model_name} ready!")
50
-
51
- return llm
52
 
53
  def transcribe_audio(audio):
54
  if audio is None:
@@ -59,82 +32,110 @@ def transcribe_audio(audio):
59
  except Exception as e:
60
  return f"[STT Error: {e}]"
61
 
62
- def chat(message, history, model_name, temperature, max_tokens):
63
- llm = get_model(model_name)
64
- if llm is None:
65
- return "❌ Model not found"
66
 
67
- prompt = "<|im_start|>system\nYou are an expert coding assistant. Always use markdown code blocks.<|im_end|>\n"
 
68
 
69
  for user_msg, assistant_msg in history:
70
- prompt += f"<|im_start|>user\n{user_msg}<|im_end|>\n"
71
  if assistant_msg:
72
- prompt += f"<|im_start|>assistant\n{assistant_msg}<|im_end|>\n"
73
 
74
- prompt += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
75
 
76
  try:
77
- response = llm(prompt, max_new_tokens=max_tokens, temperature=temperature)
78
- return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  except Exception as e:
80
- return f"Error: {e}"
81
 
82
  def generate_code(prompt, language, model_name, max_tokens):
83
  if not prompt.strip():
84
  return "Please describe what you want."
 
 
85
 
86
- llm = get_model(model_name)
87
- if llm is None:
88
- return "❌ Model not found"
89
-
90
- full_prompt = f"<|im_start|>user\nWrite {language} code for: {prompt}\n\nOutput ONLY code in a markdown block.<|im_end|>\n<|im_start|>assistant\n"
91
 
92
  try:
93
- result = llm(full_prompt, max_new_tokens=max_tokens, temperature=0.3)
94
- if "```" in result:
95
- parts = result.split("```")
96
- if len(parts) >= 2:
97
- code = parts[1]
98
- if "\n" in code:
99
- code = code.split("\n", 1)[-1]
100
- return code.strip()
101
- return result
 
 
 
 
 
 
 
102
  except Exception as e:
103
  return f"Error: {e}"
104
 
105
  def explain_code(code, model_name, max_tokens):
106
  if not code.strip():
107
  return "Paste code to explain."
 
 
108
 
109
- llm = get_model(model_name)
110
- if llm is None:
111
- return "❌ Model not found"
112
-
113
- prompt = f"<|im_start|>user\nExplain this code:\n```\n{code}\n```<|im_end|>\n<|im_start|>assistant\n"
114
 
115
  try:
116
- return llm(prompt, max_new_tokens=max_tokens, temperature=0.5)
 
 
 
 
 
117
  except Exception as e:
118
  return f"Error: {e}"
119
 
120
  def fix_code(code, error, model_name, max_tokens):
121
  if not code.strip():
122
  return "Paste code to fix."
 
 
123
 
124
- llm = get_model(model_name)
125
- if llm is None:
126
- return "❌ Model not found"
127
-
128
- prompt = f"<|im_start|>user\nFix this code:\n```\n{code}\n```\nError: {error or 'Not working'}<|im_end|>\n<|im_start|>assistant\n"
129
 
130
  try:
131
- return llm(prompt, max_new_tokens=max_tokens, temperature=0.3)
 
 
 
 
 
132
  except Exception as e:
133
  return f"Error: {e}"
134
 
135
  with gr.Blocks(title="Axon v5.1", theme=gr.themes.Soft(primary_hue="purple")) as demo:
136
 
137
- gr.Markdown("# 🔥 Axon v5.1\n**CTransformers Edition** • Any GGUF • No rate limits!")
138
 
139
  with gr.Row():
140
  model_dropdown = gr.Dropdown(choices=list(MODELS.keys()), value="Qwen2.5-Coder 3B (Fast)", label="🤖 Model")
@@ -177,9 +178,8 @@ with gr.Blocks(title="Axon v5.1", theme=gr.themes.Soft(primary_hue="purple")) as
177
 
178
  def respond(message, history, model, temp, tokens):
179
  history = history or []
180
- response = chat(message, history, model, temp, tokens)
181
- history.append([message, response])
182
- return history, ""
183
 
184
  msg.submit(respond, [msg, chatbot, model_dropdown, temperature, max_tokens], [chatbot, msg])
185
  send.click(respond, [msg, chatbot, model_dropdown, temperature, max_tokens], [chatbot, msg])
@@ -189,7 +189,4 @@ with gr.Blocks(title="Axon v5.1", theme=gr.themes.Soft(primary_hue="purple")) as
189
  explain_btn.click(explain_code, [explain_input, model_dropdown, max_tokens], explain_output)
190
  fix_btn.click(fix_code, [fix_input, fix_error, model_dropdown, max_tokens], fix_output)
191
 
192
- print("Pre-loading default model...")
193
- get_model("Qwen2.5-Coder 3B (Fast)")
194
-
195
  demo.launch(server_name="0.0.0.0", server_port=7860)
 
1
  import gradio as gr
2
+ import requests
3
+ import json
4
  from faster_whisper import WhisperModel
5
 
6
+ OLLAMA_URL = "http://localhost:11434"
7
+
8
  MODELS = {
9
+ "Qwen2.5-Coder 1.5B (Fastest)": "qwen2.5-coder:1.5b",
10
+ "Qwen2.5-Coder 3B (Fast)": "qwen2.5-coder:3b",
11
+ "Qwen2.5-Coder 7B (Quality)": "qwen2.5-coder:7b",
12
+ "Qwen3-Coder 30B-A3B (Best)": "qwen3-coder-30b-a3b",
 
 
 
 
 
 
 
 
 
 
 
13
  }
14
 
 
 
15
  print("Loading Whisper...")
16
  whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8")
17
  print("Whisper ready!")
18
 
19
+ def check_ollama():
20
+ try:
21
+ r = requests.get(f"{OLLAMA_URL}/api/tags", timeout=5)
22
+ return r.status_code == 200
23
+ except:
24
+ return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  def transcribe_audio(audio):
27
  if audio is None:
 
32
  except Exception as e:
33
  return f"[STT Error: {e}]"
34
 
35
+ def chat_stream(message, history, model_name, temperature, max_tokens):
36
+ if not check_ollama():
37
+ yield "⏳ Ollama starting... wait 30 seconds and try again."
38
+ return
39
 
40
+ model = MODELS.get(model_name, "qwen2.5-coder:3b")
41
+ messages = [{"role": "system", "content": "You are an expert coding assistant. Always use markdown code blocks."}]
42
 
43
  for user_msg, assistant_msg in history:
44
+ messages.append({"role": "user", "content": user_msg})
45
  if assistant_msg:
46
+ messages.append({"role": "assistant", "content": assistant_msg})
47
 
48
+ messages.append({"role": "user", "content": message})
49
 
50
  try:
51
+ response = requests.post(
52
+ f"{OLLAMA_URL}/api/chat",
53
+ json={"model": model, "messages": messages, "stream": True, "options": {"temperature": temperature, "num_predict": max_tokens}},
54
+ stream=True, timeout=300
55
+ )
56
+
57
+ full = ""
58
+ for line in response.iter_lines():
59
+ if line:
60
+ try:
61
+ data = json.loads(line)
62
+ if "message" in data:
63
+ full += data["message"].get("content", "")
64
+ yield full
65
+ except:
66
+ continue
67
  except Exception as e:
68
+ yield f"Error: {e}"
69
 
70
  def generate_code(prompt, language, model_name, max_tokens):
71
  if not prompt.strip():
72
  return "Please describe what you want."
73
+ if not check_ollama():
74
+ return "⏳ Ollama starting..."
75
 
76
+ model = MODELS.get(model_name, "qwen2.5-coder:3b")
77
+ full_prompt = f"Write {language} code for: {prompt}\n\nOutput ONLY code in a markdown block."
 
 
 
78
 
79
  try:
80
+ r = requests.post(
81
+ f"{OLLAMA_URL}/api/generate",
82
+ json={"model": model, "prompt": full_prompt, "stream": False, "options": {"temperature": 0.3, "num_predict": max_tokens}},
83
+ timeout=300
84
+ )
85
+ if r.status_code == 200:
86
+ result = r.json().get("response", "")
87
+ if "```" in result:
88
+ parts = result.split("```")
89
+ if len(parts) >= 2:
90
+ code = parts[1]
91
+ if "\n" in code:
92
+ code = code.split("\n", 1)[-1]
93
+ return code.strip()
94
+ return result
95
+ return f"Error: {r.text}"
96
  except Exception as e:
97
  return f"Error: {e}"
98
 
99
  def explain_code(code, model_name, max_tokens):
100
  if not code.strip():
101
  return "Paste code to explain."
102
+ if not check_ollama():
103
+ return "⏳ Ollama starting..."
104
 
105
+ model = MODELS.get(model_name, "qwen2.5-coder:3b")
 
 
 
 
106
 
107
  try:
108
+ r = requests.post(
109
+ f"{OLLAMA_URL}/api/generate",
110
+ json={"model": model, "prompt": f"Explain this code:\n```\n{code}\n```", "stream": False, "options": {"num_predict": max_tokens}},
111
+ timeout=300
112
+ )
113
+ return r.json().get("response", "") if r.status_code == 200 else f"Error: {r.text}"
114
  except Exception as e:
115
  return f"Error: {e}"
116
 
117
  def fix_code(code, error, model_name, max_tokens):
118
  if not code.strip():
119
  return "Paste code to fix."
120
+ if not check_ollama():
121
+ return "⏳ Ollama starting..."
122
 
123
+ model = MODELS.get(model_name, "qwen2.5-coder:3b")
124
+ prompt = f"Fix this code:\n```\n{code}\n```\nError: {error or 'Not working'}"
 
 
 
125
 
126
  try:
127
+ r = requests.post(
128
+ f"{OLLAMA_URL}/api/generate",
129
+ json={"model": model, "prompt": prompt, "stream": False, "options": {"temperature": 0.3, "num_predict": max_tokens}},
130
+ timeout=300
131
+ )
132
+ return r.json().get("response", "") if r.status_code == 200 else f"Error: {r.text}"
133
  except Exception as e:
134
  return f"Error: {e}"
135
 
136
  with gr.Blocks(title="Axon v5.1", theme=gr.themes.Soft(primary_hue="purple")) as demo:
137
 
138
+ gr.Markdown("# 🔥 Axon v5.1\n**Ollama Edition** • Qwen2.5-Coder + Qwen3-Coder • No rate limits!")
139
 
140
  with gr.Row():
141
  model_dropdown = gr.Dropdown(choices=list(MODELS.keys()), value="Qwen2.5-Coder 3B (Fast)", label="🤖 Model")
 
178
 
179
  def respond(message, history, model, temp, tokens):
180
  history = history or []
181
+ for chunk in chat_stream(message, history, model, temp, tokens):
182
+ yield history + [[message, chunk]], ""
 
183
 
184
  msg.submit(respond, [msg, chatbot, model_dropdown, temperature, max_tokens], [chatbot, msg])
185
  send.click(respond, [msg, chatbot, model_dropdown, temperature, max_tokens], [chatbot, msg])
 
189
  explain_btn.click(explain_code, [explain_input, model_dropdown, max_tokens], explain_output)
190
  fix_btn.click(fix_code, [fix_input, fix_error, model_dropdown, max_tokens], fix_output)
191
 
 
 
 
192
  demo.launch(server_name="0.0.0.0", server_port=7860)