Spaces:
Running
Running
| import gradio as gr | |
| from llama_cpp import Llama | |
| from huggingface_hub import hf_hub_download | |
| # ========================================== | |
| # 🚀 LUMIN CODE: 3B SMART (Balanced) | |
| # ========================================== | |
| # El punto medio perfecto: Más listo que 1.5B, pero corre en CPU. | |
| # Qwen 2.5 Coder 3B | |
| # 1. MODELO "SMART" (3B) | |
| REPO_ID = "Qwen/Qwen2.5-Coder-3B-Instruct-GGUF" | |
| FILENAME = "qwen2.5-coder-3b-instruct-q4_k_m.gguf" | |
| print(f"⬇️ Downloading {FILENAME}...") | |
| model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME) | |
| print("🚀 Loading model into RAM...") | |
| llm = Llama( | |
| model_path=model_path, | |
| n_ctx=16384, # ✅ 16k Contexto | |
| n_threads=2, # ✅ 2 Núcleos | |
| n_batch=512, # ⚠️ Bajamos batch para dar espacio a la lógica del 3B | |
| f16_kv=False, | |
| flash_attn=False, # OFF por seguridad CPU | |
| verbose=False | |
| ) | |
| # 2. LÓGICA | |
| def generate_code(message, history): | |
| if history is None: | |
| history = [] | |
| # Prompt Estándar Qwen | |
| prompt = ( | |
| "<|im_start|>system\n" | |
| "Eres Lumin Code, experto en programación. Piensa paso a paso y da soluciones correctas.<|im_end|>\n" | |
| ) | |
| for item in history: | |
| if isinstance(item, (list, tuple)) and len(item) >= 2: | |
| u, b = item[0], item[1] | |
| if u: prompt += f"<|im_start|>user\n{u}<|im_end|>\n" | |
| if b: prompt += f"<|im_start|>assistant\n{b}<|im_end|>\n" | |
| prompt += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n" | |
| stream = llm.create_completion( | |
| prompt, | |
| max_tokens=2048, | |
| temperature=0.3, | |
| stream=True, | |
| stop=["<|im_end|>", "<|endoftext|>"] | |
| ) | |
| partial_text = "" | |
| for output in stream: | |
| token = output["choices"][0]["text"] | |
| partial_text += token | |
| yield partial_text | |
| # 3. INTERFAZ (Blocks API) | |
| with gr.Blocks(title="Lumin Code 3B Smart") as demo: | |
| gr.Markdown("#Lumin Haiku (3B Smart)\nSpace") | |
| chatbot = gr.Chatbot(height=500) | |
| msg = gr.Textbox(placeholder="Escribe aquí...") | |
| clear = gr.Button("Limpiar") | |
| # Visible Chat | |
| def user(u, h): return "", h + [[u, None]] | |
| def bot(h): | |
| u = h[-1][0] | |
| h_prev = h[:-1] | |
| p = "" | |
| for chunk in generate_code(u, h_prev): | |
| p = chunk | |
| h[-1][1] = p | |
| yield h | |
| msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(bot, [chatbot], [chatbot]) | |
| clear.click(lambda: None, None, chatbot, queue=False) | |
| # API Oculta "chat" | |
| api_msg = gr.Textbox(visible=False) | |
| api_hist = gr.State() | |
| api_out = gr.Textbox(visible=False) | |
| btn = gr.Button("API", visible=False) | |
| btn.click(fn=generate_code, inputs=[api_msg, api_hist], outputs=[api_out], api_name="chat") | |
| if __name__ == "__main__": | |
| demo.queue().launch(server_name="0.0.0.0", server_port=7860) |