Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,12 +6,20 @@ from datetime import datetime
|
|
| 6 |
from pathlib import Path
|
| 7 |
from llama_cpp import Llama
|
| 8 |
from faster_whisper import WhisperModel
|
|
|
|
| 9 |
|
| 10 |
# ===== CONFIG =====
|
| 11 |
MODELS_DIR = "/data/models"
|
| 12 |
MAX_TOKENS = 2048
|
| 13 |
CONTEXT_SIZE = 4096
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
MODELS = {
|
| 16 |
"⭐ Qwen3 Coder 30B-A3B (Best)": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M.gguf",
|
| 17 |
"🏆 Qwen2.5 Coder 14B (Premium)": "qwen2.5-coder-14b-instruct-q4_k_m.gguf",
|
|
@@ -60,9 +68,22 @@ def load_model(model_name):
|
|
| 60 |
return None
|
| 61 |
|
| 62 |
model_path = os.path.join(MODELS_DIR, filename)
|
|
|
|
|
|
|
| 63 |
if not os.path.exists(model_path):
|
| 64 |
-
print(f"
|
| 65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
print(f"📥 Loading {model_name}...")
|
| 68 |
try:
|
|
@@ -208,12 +229,10 @@ def export_code(code, language):
|
|
| 208 |
# ===== STREAMING (UPDATED FOR GRADIO 5) =====
|
| 209 |
|
| 210 |
def chat_stream(message, history, model_name, temperature, max_tokens):
|
| 211 |
-
# Initialize history if None (Gradio 5 sometimes sends None on first load)
|
| 212 |
history = history or []
|
| 213 |
|
| 214 |
valid, error = validate_input(message, "Message")
|
| 215 |
if not valid:
|
| 216 |
-
# Append error as assistant message
|
| 217 |
history.append({"role": "user", "content": message})
|
| 218 |
history.append({"role": "assistant", "content": error})
|
| 219 |
yield history
|
|
@@ -226,7 +245,6 @@ def chat_stream(message, history, model_name, temperature, max_tokens):
|
|
| 226 |
yield history
|
| 227 |
return
|
| 228 |
|
| 229 |
-
# Build conversation string from Dict history
|
| 230 |
if "deepseek" in model_name.lower():
|
| 231 |
conv = "### Instruction:\nYou are an expert coding assistant. Use markdown code blocks.\n\n"
|
| 232 |
for msg in history:
|
|
@@ -245,9 +263,7 @@ def chat_stream(message, history, model_name, temperature, max_tokens):
|
|
| 245 |
conv += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
|
| 246 |
stop_tokens = ["<|im_end|>", "<|im_start|>"]
|
| 247 |
|
| 248 |
-
# Add the new user message to history
|
| 249 |
history.append({"role": "user", "content": message})
|
| 250 |
-
# Add a placeholder for the assistant response
|
| 251 |
history.append({"role": "assistant", "content": ""})
|
| 252 |
|
| 253 |
try:
|
|
@@ -255,7 +271,6 @@ def chat_stream(message, history, model_name, temperature, max_tokens):
|
|
| 255 |
for chunk in llm(conv, max_tokens=max_tokens, temperature=temperature, top_p=0.9, stop=stop_tokens, stream=True):
|
| 256 |
text_chunk = chunk["choices"][0]["text"]
|
| 257 |
full += text_chunk
|
| 258 |
-
# Update the last message (assistant's response)
|
| 259 |
history[-1]['content'] = full
|
| 260 |
yield history
|
| 261 |
except Exception as e:
|
|
@@ -490,8 +505,8 @@ dark_theme = gr.themes.Soft(
|
|
| 490 |
|
| 491 |
# ===== UI =====
|
| 492 |
|
| 493 |
-
#
|
| 494 |
-
with gr.Blocks() as demo:
|
| 495 |
|
| 496 |
# State for theme
|
| 497 |
is_dark = gr.State(True)
|
|
@@ -623,7 +638,7 @@ with gr.Blocks() as demo:
|
|
| 623 |
# ===== EXPLAIN =====
|
| 624 |
with gr.TabItem("🔍 Explain"):
|
| 625 |
with gr.Row():
|
| 626 |
-
with gr.Column():
|
| 627 |
explain_input = gr.Code(label="Code", lines=10)
|
| 628 |
explain_detail = gr.Radio(["Brief", "Normal", "Detailed"], value="Normal")
|
| 629 |
explain_btn = gr.Button("🔍 Explain", variant="primary")
|
|
@@ -879,5 +894,5 @@ with gr.Blocks() as demo:
|
|
| 879 |
print("🔥 Preloading model...")
|
| 880 |
load_model("🚀 Qwen2.5 Coder 3B (Fast)")
|
| 881 |
|
| 882 |
-
#
|
| 883 |
-
demo.launch(server_name="0.0.0.0", server_port=7860
|
|
|
|
| 6 |
from pathlib import Path
|
| 7 |
from llama_cpp import Llama
|
| 8 |
from faster_whisper import WhisperModel
|
| 9 |
+
from huggingface_hub import hf_hub_download # Added for auto-download
|
| 10 |
|
| 11 |
# ===== CONFIG =====
|
| 12 |
MODELS_DIR = "/data/models"
|
| 13 |
MAX_TOKENS = 2048
|
| 14 |
CONTEXT_SIZE = 4096
|
| 15 |
|
| 16 |
+
# Map models to their likely GGUF repositories (Falling back to Qwen for the 3B)
|
| 17 |
+
MODEL_REPOS = {
|
| 18 |
+
"qwen2.5-coder-3b-instruct-q4_k_m.gguf": "Qwen/Qwen2.5-Coder-3B-Instruct-GGUF",
|
| 19 |
+
"qwen2.5-coder-7b-instruct-q4_k_m.gguf": "Qwen/Qwen2.5-Coder-7B-Instruct-GGUF",
|
| 20 |
+
# You can add others here
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
MODELS = {
|
| 24 |
"⭐ Qwen3 Coder 30B-A3B (Best)": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M.gguf",
|
| 25 |
"🏆 Qwen2.5 Coder 14B (Premium)": "qwen2.5-coder-14b-instruct-q4_k_m.gguf",
|
|
|
|
| 68 |
return None
|
| 69 |
|
| 70 |
model_path = os.path.join(MODELS_DIR, filename)
|
| 71 |
+
|
| 72 |
+
# --- AUTO DOWNLOAD LOGIC ---
|
| 73 |
if not os.path.exists(model_path):
|
| 74 |
+
print(f"⬇️ Model not found. Attempting download for {filename}...")
|
| 75 |
+
repo_id = MODEL_REPOS.get(filename, "Qwen/Qwen2.5-Coder-3B-Instruct-GGUF") # Default fallback
|
| 76 |
+
try:
|
| 77 |
+
hf_hub_download(
|
| 78 |
+
repo_id=repo_id,
|
| 79 |
+
filename=filename,
|
| 80 |
+
local_dir=MODELS_DIR,
|
| 81 |
+
local_dir_use_symlinks=False
|
| 82 |
+
)
|
| 83 |
+
print("✅ Download complete!")
|
| 84 |
+
except Exception as e:
|
| 85 |
+
print(f"❌ Download failed: {e}")
|
| 86 |
+
return None
|
| 87 |
|
| 88 |
print(f"📥 Loading {model_name}...")
|
| 89 |
try:
|
|
|
|
| 229 |
# ===== STREAMING (UPDATED FOR GRADIO 5) =====
|
| 230 |
|
| 231 |
def chat_stream(message, history, model_name, temperature, max_tokens):
|
|
|
|
| 232 |
history = history or []
|
| 233 |
|
| 234 |
valid, error = validate_input(message, "Message")
|
| 235 |
if not valid:
|
|
|
|
| 236 |
history.append({"role": "user", "content": message})
|
| 237 |
history.append({"role": "assistant", "content": error})
|
| 238 |
yield history
|
|
|
|
| 245 |
yield history
|
| 246 |
return
|
| 247 |
|
|
|
|
| 248 |
if "deepseek" in model_name.lower():
|
| 249 |
conv = "### Instruction:\nYou are an expert coding assistant. Use markdown code blocks.\n\n"
|
| 250 |
for msg in history:
|
|
|
|
| 263 |
conv += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
|
| 264 |
stop_tokens = ["<|im_end|>", "<|im_start|>"]
|
| 265 |
|
|
|
|
| 266 |
history.append({"role": "user", "content": message})
|
|
|
|
| 267 |
history.append({"role": "assistant", "content": ""})
|
| 268 |
|
| 269 |
try:
|
|
|
|
| 271 |
for chunk in llm(conv, max_tokens=max_tokens, temperature=temperature, top_p=0.9, stop=stop_tokens, stream=True):
|
| 272 |
text_chunk = chunk["choices"][0]["text"]
|
| 273 |
full += text_chunk
|
|
|
|
| 274 |
history[-1]['content'] = full
|
| 275 |
yield history
|
| 276 |
except Exception as e:
|
|
|
|
| 505 |
|
| 506 |
# ===== UI =====
|
| 507 |
|
| 508 |
+
# FIX: Title and theme moved here
|
| 509 |
+
with gr.Blocks(title="Axon v6", theme=dark_theme) as demo:
|
| 510 |
|
| 511 |
# State for theme
|
| 512 |
is_dark = gr.State(True)
|
|
|
|
| 638 |
# ===== EXPLAIN =====
|
| 639 |
with gr.TabItem("🔍 Explain"):
|
| 640 |
with gr.Row():
|
| 641 |
+
with gr.Column(): # FIXED: used to be Column()
|
| 642 |
explain_input = gr.Code(label="Code", lines=10)
|
| 643 |
explain_detail = gr.Radio(["Brief", "Normal", "Detailed"], value="Normal")
|
| 644 |
explain_btn = gr.Button("🔍 Explain", variant="primary")
|
|
|
|
| 894 |
print("🔥 Preloading model...")
|
| 895 |
load_model("🚀 Qwen2.5 Coder 3B (Fast)")
|
| 896 |
|
| 897 |
+
# Launch (Removed 'title' and 'theme', they are in Blocks)
|
| 898 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|