AIencoder commited on
Commit
ed77a98
·
verified ·
1 Parent(s): 3a61070

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -13
app.py CHANGED
@@ -6,12 +6,20 @@ from datetime import datetime
6
  from pathlib import Path
7
  from llama_cpp import Llama
8
  from faster_whisper import WhisperModel
 
9
 
10
  # ===== CONFIG =====
11
  MODELS_DIR = "/data/models"
12
  MAX_TOKENS = 2048
13
  CONTEXT_SIZE = 4096
14
 
 
 
 
 
 
 
 
15
  MODELS = {
16
  "⭐ Qwen3 Coder 30B-A3B (Best)": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M.gguf",
17
  "🏆 Qwen2.5 Coder 14B (Premium)": "qwen2.5-coder-14b-instruct-q4_k_m.gguf",
@@ -60,9 +68,22 @@ def load_model(model_name):
60
  return None
61
 
62
  model_path = os.path.join(MODELS_DIR, filename)
 
 
63
  if not os.path.exists(model_path):
64
- print(f" Model not found: {model_path}")
65
- return None
 
 
 
 
 
 
 
 
 
 
 
66
 
67
  print(f"📥 Loading {model_name}...")
68
  try:
@@ -208,12 +229,10 @@ def export_code(code, language):
208
  # ===== STREAMING (UPDATED FOR GRADIO 5) =====
209
 
210
  def chat_stream(message, history, model_name, temperature, max_tokens):
211
- # Initialize history if None (Gradio 5 sometimes sends None on first load)
212
  history = history or []
213
 
214
  valid, error = validate_input(message, "Message")
215
  if not valid:
216
- # Append error as assistant message
217
  history.append({"role": "user", "content": message})
218
  history.append({"role": "assistant", "content": error})
219
  yield history
@@ -226,7 +245,6 @@ def chat_stream(message, history, model_name, temperature, max_tokens):
226
  yield history
227
  return
228
 
229
- # Build conversation string from Dict history
230
  if "deepseek" in model_name.lower():
231
  conv = "### Instruction:\nYou are an expert coding assistant. Use markdown code blocks.\n\n"
232
  for msg in history:
@@ -245,9 +263,7 @@ def chat_stream(message, history, model_name, temperature, max_tokens):
245
  conv += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
246
  stop_tokens = ["<|im_end|>", "<|im_start|>"]
247
 
248
- # Add the new user message to history
249
  history.append({"role": "user", "content": message})
250
- # Add a placeholder for the assistant response
251
  history.append({"role": "assistant", "content": ""})
252
 
253
  try:
@@ -255,7 +271,6 @@ def chat_stream(message, history, model_name, temperature, max_tokens):
255
  for chunk in llm(conv, max_tokens=max_tokens, temperature=temperature, top_p=0.9, stop=stop_tokens, stream=True):
256
  text_chunk = chunk["choices"][0]["text"]
257
  full += text_chunk
258
- # Update the last message (assistant's response)
259
  history[-1]['content'] = full
260
  yield history
261
  except Exception as e:
@@ -490,8 +505,8 @@ dark_theme = gr.themes.Soft(
490
 
491
  # ===== UI =====
492
 
493
- # NOTE: Theme and title moved to launch() for Gradio 5+ compatibility
494
- with gr.Blocks() as demo:
495
 
496
  # State for theme
497
  is_dark = gr.State(True)
@@ -623,7 +638,7 @@ with gr.Blocks() as demo:
623
  # ===== EXPLAIN =====
624
  with gr.TabItem("🔍 Explain"):
625
  with gr.Row():
626
- with gr.Column():
627
  explain_input = gr.Code(label="Code", lines=10)
628
  explain_detail = gr.Radio(["Brief", "Normal", "Detailed"], value="Normal")
629
  explain_btn = gr.Button("🔍 Explain", variant="primary")
@@ -879,5 +894,5 @@ with gr.Blocks() as demo:
879
  print("🔥 Preloading model...")
880
  load_model("🚀 Qwen2.5 Coder 3B (Fast)")
881
 
882
- # Theme and title are now passed here to avoid the UserWarning
883
- demo.launch(server_name="0.0.0.0", server_port=7860, theme=dark_theme, title="Axon v6")
 
6
  from pathlib import Path
7
  from llama_cpp import Llama
8
  from faster_whisper import WhisperModel
9
+ from huggingface_hub import hf_hub_download # Added for auto-download
10
 
11
  # ===== CONFIG =====
12
  MODELS_DIR = "/data/models"
13
  MAX_TOKENS = 2048
14
  CONTEXT_SIZE = 4096
15
 
16
+ # Map models to their likely GGUF repositories (Falling back to Qwen for the 3B)
17
+ MODEL_REPOS = {
18
+ "qwen2.5-coder-3b-instruct-q4_k_m.gguf": "Qwen/Qwen2.5-Coder-3B-Instruct-GGUF",
19
+ "qwen2.5-coder-7b-instruct-q4_k_m.gguf": "Qwen/Qwen2.5-Coder-7B-Instruct-GGUF",
20
+ # You can add others here
21
+ }
22
+
23
  MODELS = {
24
  "⭐ Qwen3 Coder 30B-A3B (Best)": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M.gguf",
25
  "🏆 Qwen2.5 Coder 14B (Premium)": "qwen2.5-coder-14b-instruct-q4_k_m.gguf",
 
68
  return None
69
 
70
  model_path = os.path.join(MODELS_DIR, filename)
71
+
72
+ # --- AUTO DOWNLOAD LOGIC ---
73
  if not os.path.exists(model_path):
74
+ print(f"⬇️ Model not found. Attempting download for {filename}...")
75
+ repo_id = MODEL_REPOS.get(filename, "Qwen/Qwen2.5-Coder-3B-Instruct-GGUF") # Default fallback
76
+ try:
77
+ hf_hub_download(
78
+ repo_id=repo_id,
79
+ filename=filename,
80
+ local_dir=MODELS_DIR,
81
+ local_dir_use_symlinks=False
82
+ )
83
+ print("✅ Download complete!")
84
+ except Exception as e:
85
+ print(f"❌ Download failed: {e}")
86
+ return None
87
 
88
  print(f"📥 Loading {model_name}...")
89
  try:
 
229
  # ===== STREAMING (UPDATED FOR GRADIO 5) =====
230
 
231
  def chat_stream(message, history, model_name, temperature, max_tokens):
 
232
  history = history or []
233
 
234
  valid, error = validate_input(message, "Message")
235
  if not valid:
 
236
  history.append({"role": "user", "content": message})
237
  history.append({"role": "assistant", "content": error})
238
  yield history
 
245
  yield history
246
  return
247
 
 
248
  if "deepseek" in model_name.lower():
249
  conv = "### Instruction:\nYou are an expert coding assistant. Use markdown code blocks.\n\n"
250
  for msg in history:
 
263
  conv += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
264
  stop_tokens = ["<|im_end|>", "<|im_start|>"]
265
 
 
266
  history.append({"role": "user", "content": message})
 
267
  history.append({"role": "assistant", "content": ""})
268
 
269
  try:
 
271
  for chunk in llm(conv, max_tokens=max_tokens, temperature=temperature, top_p=0.9, stop=stop_tokens, stream=True):
272
  text_chunk = chunk["choices"][0]["text"]
273
  full += text_chunk
 
274
  history[-1]['content'] = full
275
  yield history
276
  except Exception as e:
 
505
 
506
  # ===== UI =====
507
 
508
+ # FIX: Title and theme moved here
509
+ with gr.Blocks(title="Axon v6", theme=dark_theme) as demo:
510
 
511
  # State for theme
512
  is_dark = gr.State(True)
 
638
  # ===== EXPLAIN =====
639
  with gr.TabItem("🔍 Explain"):
640
  with gr.Row():
641
+ with gr.Column(): # FIXED: used to be Column()
642
  explain_input = gr.Code(label="Code", lines=10)
643
  explain_detail = gr.Radio(["Brief", "Normal", "Detailed"], value="Normal")
644
  explain_btn = gr.Button("🔍 Explain", variant="primary")
 
894
  print("🔥 Preloading model...")
895
  load_model("🚀 Qwen2.5 Coder 3B (Fast)")
896
 
897
+ # Launch (Removed 'title' and 'theme', they are in Blocks)
898
+ demo.launch(server_name="0.0.0.0", server_port=7860)