lumin_haiku / app.py
nova
Update app.py
f9de2ee verified
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
# ==========================================
# 🚀 LUMIN CODE: 3B SMART (Balanced)
# ==========================================
# El punto medio perfecto: Más listo que 1.5B, pero corre en CPU.
# Qwen 2.5 Coder 3B
# 1. MODELO "SMART" (3B)
REPO_ID = "Qwen/Qwen2.5-Coder-3B-Instruct-GGUF"
FILENAME = "qwen2.5-coder-3b-instruct-q4_k_m.gguf"
print(f"⬇️ Downloading {FILENAME}...")
model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
print("🚀 Loading model into RAM...")
llm = Llama(
model_path=model_path,
n_ctx=16384, # ✅ 16k Contexto
n_threads=2, # ✅ 2 Núcleos
n_batch=512, # ⚠️ Bajamos batch para dar espacio a la lógica del 3B
f16_kv=False,
flash_attn=False, # OFF por seguridad CPU
verbose=False
)
# 2. LÓGICA
def generate_code(message, history):
if history is None:
history = []
# Prompt Estándar Qwen
prompt = (
"<|im_start|>system\n"
"Eres Lumin Code, experto en programación. Piensa paso a paso y da soluciones correctas.<|im_end|>\n"
)
for item in history:
if isinstance(item, (list, tuple)) and len(item) >= 2:
u, b = item[0], item[1]
if u: prompt += f"<|im_start|>user\n{u}<|im_end|>\n"
if b: prompt += f"<|im_start|>assistant\n{b}<|im_end|>\n"
prompt += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
stream = llm.create_completion(
prompt,
max_tokens=2048,
temperature=0.3,
stream=True,
stop=["<|im_end|>", "<|endoftext|>"]
)
partial_text = ""
for output in stream:
token = output["choices"][0]["text"]
partial_text += token
yield partial_text
# 3. INTERFAZ (Blocks API)
with gr.Blocks(title="Lumin Code 3B Smart") as demo:
gr.Markdown("#Lumin Haiku (3B Smart)\nSpace")
chatbot = gr.Chatbot(height=500)
msg = gr.Textbox(placeholder="Escribe aquí...")
clear = gr.Button("Limpiar")
# Visible Chat
def user(u, h): return "", h + [[u, None]]
def bot(h):
u = h[-1][0]
h_prev = h[:-1]
p = ""
for chunk in generate_code(u, h_prev):
p = chunk
h[-1][1] = p
yield h
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(bot, [chatbot], [chatbot])
clear.click(lambda: None, None, chatbot, queue=False)
# API Oculta "chat"
api_msg = gr.Textbox(visible=False)
api_hist = gr.State()
api_out = gr.Textbox(visible=False)
btn = gr.Button("API", visible=False)
btn.click(fn=generate_code, inputs=[api_msg, api_hist], outputs=[api_out], api_name="chat")
if __name__ == "__main__":
demo.queue().launch(server_name="0.0.0.0", server_port=7860)