AIencoder commited on
Commit
6a5f395
·
verified ·
1 Parent(s): 260c979

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -85
app.py CHANGED
@@ -1,143 +1,140 @@
1
  import gradio as gr
2
- import requests
3
- import json
4
  from faster_whisper import WhisperModel
5
 
6
- OLLAMA_URL = "http://localhost:11434"
7
-
8
  MODELS = {
9
- "Qwen2.5-Coder 1.5B (Fastest)": "qwen2.5-coder:1.5b",
10
- "Qwen2.5-Coder 3B (Fast)": "qwen2.5-coder:3b",
11
- "Qwen2.5-Coder 7B (Quality)": "qwen2.5-coder:7b",
12
- "Qwen3-Coder 30B-A3B (Best)": "qwen3-coder:30b-a3b",
 
 
 
 
 
 
 
 
 
 
 
13
  }
14
 
 
 
15
  print("Loading Whisper...")
16
  whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8")
17
  print("Whisper ready!")
18
 
19
- def check_ollama():
20
- try:
21
- r = requests.get(f"{OLLAMA_URL}/api/tags", timeout=5)
22
- return r.status_code == 200
23
- except:
24
- return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  def transcribe_audio(audio):
27
  if audio is None:
28
  return ""
29
-
30
  try:
31
  segments, _ = whisper_model.transcribe(audio)
32
- text = " ".join([seg.text for seg in segments])
33
- return text.strip()
34
  except Exception as e:
35
  return f"[STT Error: {e}]"
36
 
37
- def chat_stream(message, history, model_name, temperature, max_tokens):
38
- if not check_ollama():
39
- yield "⏳ Ollama starting... wait 30 seconds and try again."
40
- return
41
 
42
- model = MODELS.get(model_name, "qwen2.5-coder:3b")
43
- messages = [{"role": "system", "content": "You are an expert coding assistant. Always use markdown code blocks."}]
44
 
45
  for user_msg, assistant_msg in history:
46
- messages.append({"role": "user", "content": user_msg})
47
  if assistant_msg:
48
- messages.append({"role": "assistant", "content": assistant_msg})
49
 
50
- messages.append({"role": "user", "content": message})
51
 
52
  try:
53
- response = requests.post(
54
- f"{OLLAMA_URL}/api/chat",
55
- json={"model": model, "messages": messages, "stream": True, "options": {"temperature": temperature, "num_predict": max_tokens}},
56
- stream=True, timeout=300
57
- )
58
-
59
- full = ""
60
- for line in response.iter_lines():
61
- if line:
62
- try:
63
- data = json.loads(line)
64
- if "message" in data:
65
- full += data["message"].get("content", "")
66
- yield full
67
- except:
68
- continue
69
  except Exception as e:
70
- yield f"Error: {e}"
71
 
72
  def generate_code(prompt, language, model_name, max_tokens):
73
  if not prompt.strip():
74
  return "Please describe what you want."
75
- if not check_ollama():
76
- return "⏳ Ollama starting..."
77
 
78
- model = MODELS.get(model_name, "qwen2.5-coder:3b")
79
- full_prompt = f"Write {language} code for: {prompt}\n\nOutput ONLY code in a markdown block."
 
 
 
80
 
81
  try:
82
- r = requests.post(
83
- f"{OLLAMA_URL}/api/generate",
84
- json={"model": model, "prompt": full_prompt, "stream": False, "options": {"temperature": 0.3, "num_predict": max_tokens}},
85
- timeout=300
86
- )
87
- if r.status_code == 200:
88
- result = r.json().get("response", "")
89
- if "```" in result:
90
- parts = result.split("```")
91
- if len(parts) >= 2:
92
- code = parts[1]
93
- if "\n" in code:
94
- code = code.split("\n", 1)[-1]
95
- return code.strip()
96
- return result
97
- return f"Error: {r.text}"
98
  except Exception as e:
99
  return f"Error: {e}"
100
 
101
  def explain_code(code, model_name, max_tokens):
102
  if not code.strip():
103
  return "Paste code to explain."
104
- if not check_ollama():
105
- return "⏳ Ollama starting..."
106
 
107
- model = MODELS.get(model_name, "qwen2.5-coder:3b")
 
 
 
 
108
 
109
  try:
110
- r = requests.post(
111
- f"{OLLAMA_URL}/api/generate",
112
- json={"model": model, "prompt": f"Explain this code:\n```\n{code}\n```", "stream": False, "options": {"num_predict": max_tokens}},
113
- timeout=300
114
- )
115
- return r.json().get("response", "") if r.status_code == 200 else f"Error: {r.text}"
116
  except Exception as e:
117
  return f"Error: {e}"
118
 
119
  def fix_code(code, error, model_name, max_tokens):
120
  if not code.strip():
121
  return "Paste code to fix."
122
- if not check_ollama():
123
- return "⏳ Ollama starting..."
124
 
125
- model = MODELS.get(model_name, "qwen2.5-coder:3b")
126
- prompt = f"Fix this code:\n```\n{code}\n```\nError: {error or 'Not working'}"
 
 
 
127
 
128
  try:
129
- r = requests.post(
130
- f"{OLLAMA_URL}/api/generate",
131
- json={"model": model, "prompt": prompt, "stream": False, "options": {"temperature": 0.3, "num_predict": max_tokens}},
132
- timeout=300
133
- )
134
- return r.json().get("response", "") if r.status_code == 200 else f"Error: {r.text}"
135
  except Exception as e:
136
  return f"Error: {e}"
137
 
138
  with gr.Blocks(title="Axon v5.1", theme=gr.themes.Soft(primary_hue="purple")) as demo:
139
 
140
- gr.Markdown("# 🔥 Axon v5.1\n**Ollama Edition** • Qwen2.5-Coder running locally • No rate limits!")
141
 
142
  with gr.Row():
143
  model_dropdown = gr.Dropdown(choices=list(MODELS.keys()), value="Qwen2.5-Coder 3B (Fast)", label="🤖 Model")
@@ -180,8 +177,9 @@ with gr.Blocks(title="Axon v5.1", theme=gr.themes.Soft(primary_hue="purple")) as
180
 
181
  def respond(message, history, model, temp, tokens):
182
  history = history or []
183
- for chunk in chat_stream(message, history, model, temp, tokens):
184
- yield history + [[message, chunk]], ""
 
185
 
186
  msg.submit(respond, [msg, chatbot, model_dropdown, temperature, max_tokens], [chatbot, msg])
187
  send.click(respond, [msg, chatbot, model_dropdown, temperature, max_tokens], [chatbot, msg])
@@ -191,4 +189,7 @@ with gr.Blocks(title="Axon v5.1", theme=gr.themes.Soft(primary_hue="purple")) as
191
  explain_btn.click(explain_code, [explain_input, model_dropdown, max_tokens], explain_output)
192
  fix_btn.click(fix_code, [fix_input, fix_error, model_dropdown, max_tokens], fix_output)
193
 
 
 
 
194
  demo.launch(server_name="0.0.0.0", server_port=7860)
 
1
  import gradio as gr
2
+ from ctransformers import AutoModelForCausalLM
3
+ from huggingface_hub import hf_hub_download
4
  from faster_whisper import WhisperModel
5
 
 
 
6
  MODELS = {
7
+ "Qwen2.5-Coder 3B (Fast)": {
8
+ "repo": "Qwen/Qwen2.5-Coder-3B-Instruct-GGUF",
9
+ "file": "qwen2.5-coder-3b-instruct-q4_k_m.gguf",
10
+ "type": "qwen2"
11
+ },
12
+ "Qwen2.5-Coder 7B (Quality)": {
13
+ "repo": "Qwen/Qwen2.5-Coder-7B-Instruct-GGUF",
14
+ "file": "qwen2.5-coder-7b-instruct-q4_k_m.gguf",
15
+ "type": "qwen2"
16
+ },
17
+ "Qwen3-Coder 30B-A3B (Best)": {
18
+ "repo": "Qwen/Qwen3-Coder-30B-A3B-Instruct-GGUF",
19
+ "file": "qwen3-coder-30b-a3b-instruct-q4_k_m.gguf",
20
+ "type": "qwen2"
21
+ },
22
  }
23
 
24
+ loaded_models = {}
25
+
26
  print("Loading Whisper...")
27
  whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8")
28
  print("Whisper ready!")
29
 
30
+ def get_model(model_name):
31
+ if model_name in loaded_models:
32
+ return loaded_models[model_name]
33
+
34
+ info = MODELS.get(model_name)
35
+ if not info:
36
+ return None
37
+
38
+ print(f"Downloading {model_name}...")
39
+ path = hf_hub_download(repo_id=info["repo"], filename=info["file"])
40
+
41
+ print(f"Loading {model_name}...")
42
+ llm = AutoModelForCausalLM.from_pretrained(
43
+ path,
44
+ model_type=info["type"],
45
+ context_length=4096,
46
+ threads=4
47
+ )
48
+ loaded_models[model_name] = llm
49
+ print(f"{model_name} ready!")
50
+
51
+ return llm
52
 
53
  def transcribe_audio(audio):
54
  if audio is None:
55
  return ""
 
56
  try:
57
  segments, _ = whisper_model.transcribe(audio)
58
+ return " ".join([seg.text for seg in segments]).strip()
 
59
  except Exception as e:
60
  return f"[STT Error: {e}]"
61
 
62
+ def chat(message, history, model_name, temperature, max_tokens):
63
+ llm = get_model(model_name)
64
+ if llm is None:
65
+ return "❌ Model not found"
66
 
67
+ prompt = "<|im_start|>system\nYou are an expert coding assistant. Always use markdown code blocks.<|im_end|>\n"
 
68
 
69
  for user_msg, assistant_msg in history:
70
+ prompt += f"<|im_start|>user\n{user_msg}<|im_end|>\n"
71
  if assistant_msg:
72
+ prompt += f"<|im_start|>assistant\n{assistant_msg}<|im_end|>\n"
73
 
74
+ prompt += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
75
 
76
  try:
77
+ response = llm(prompt, max_new_tokens=max_tokens, temperature=temperature)
78
+ return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  except Exception as e:
80
+ return f"Error: {e}"
81
 
82
  def generate_code(prompt, language, model_name, max_tokens):
83
  if not prompt.strip():
84
  return "Please describe what you want."
 
 
85
 
86
+ llm = get_model(model_name)
87
+ if llm is None:
88
+ return "❌ Model not found"
89
+
90
+ full_prompt = f"<|im_start|>user\nWrite {language} code for: {prompt}\n\nOutput ONLY code in a markdown block.<|im_end|>\n<|im_start|>assistant\n"
91
 
92
  try:
93
+ result = llm(full_prompt, max_new_tokens=max_tokens, temperature=0.3)
94
+ if "```" in result:
95
+ parts = result.split("```")
96
+ if len(parts) >= 2:
97
+ code = parts[1]
98
+ if "\n" in code:
99
+ code = code.split("\n", 1)[-1]
100
+ return code.strip()
101
+ return result
 
 
 
 
 
 
 
102
  except Exception as e:
103
  return f"Error: {e}"
104
 
105
  def explain_code(code, model_name, max_tokens):
106
  if not code.strip():
107
  return "Paste code to explain."
 
 
108
 
109
+ llm = get_model(model_name)
110
+ if llm is None:
111
+ return "❌ Model not found"
112
+
113
+ prompt = f"<|im_start|>user\nExplain this code:\n```\n{code}\n```<|im_end|>\n<|im_start|>assistant\n"
114
 
115
  try:
116
+ return llm(prompt, max_new_tokens=max_tokens, temperature=0.5)
 
 
 
 
 
117
  except Exception as e:
118
  return f"Error: {e}"
119
 
120
  def fix_code(code, error, model_name, max_tokens):
121
  if not code.strip():
122
  return "Paste code to fix."
 
 
123
 
124
+ llm = get_model(model_name)
125
+ if llm is None:
126
+ return "❌ Model not found"
127
+
128
+ prompt = f"<|im_start|>user\nFix this code:\n```\n{code}\n```\nError: {error or 'Not working'}<|im_end|>\n<|im_start|>assistant\n"
129
 
130
  try:
131
+ return llm(prompt, max_new_tokens=max_tokens, temperature=0.3)
 
 
 
 
 
132
  except Exception as e:
133
  return f"Error: {e}"
134
 
135
  with gr.Blocks(title="Axon v5.1", theme=gr.themes.Soft(primary_hue="purple")) as demo:
136
 
137
+ gr.Markdown("# 🔥 Axon v5.1\n**CTransformers Edition** • Any GGUF • No rate limits!")
138
 
139
  with gr.Row():
140
  model_dropdown = gr.Dropdown(choices=list(MODELS.keys()), value="Qwen2.5-Coder 3B (Fast)", label="🤖 Model")
 
177
 
178
  def respond(message, history, model, temp, tokens):
179
  history = history or []
180
+ response = chat(message, history, model, temp, tokens)
181
+ history.append([message, response])
182
+ return history, ""
183
 
184
  msg.submit(respond, [msg, chatbot, model_dropdown, temperature, max_tokens], [chatbot, msg])
185
  send.click(respond, [msg, chatbot, model_dropdown, temperature, max_tokens], [chatbot, msg])
 
189
  explain_btn.click(explain_code, [explain_input, model_dropdown, max_tokens], explain_output)
190
  fix_btn.click(fix_code, [fix_input, fix_error, model_dropdown, max_tokens], fix_output)
191
 
192
+ print("Pre-loading default model...")
193
+ get_model("Qwen2.5-Coder 3B (Fast)")
194
+
195
  demo.launch(server_name="0.0.0.0", server_port=7860)