import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
from threading import Thread
import os
model_id = "novapixelentretaiment/Lumin-Haiku-4-Instruct"
# Archivo corregido para coincidir con el nombre oficial del repo en HF
gguf_file = "qwen2.5-coder-3b-instruct-q4_k_m.gguf"
token = os.environ.get("HF_TOKEN")
print("Cargando Lumin Nano 2.1 (GGUF Optimized)...")
tokenizer = AutoTokenizer.from_pretrained(model_id, token=token, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
model_id,
gguf_file=gguf_file,
token=token,
torch_dtype=torch.float32, # CPU friendly
trust_remote_code=True
)
def parse_thought(text):
if "" in text:
if "" in text:
parts = text.split("")
return f"Pensamiento: {parts[0].replace('', '').strip()}\n\nRespuesta: {parts[1].strip()}"
else:
return f"Pensamiento: {text.replace('', '').strip()}"
return text
def chat_stream(message, history, system_message, max_tokens, temperature, top_p):
messages = [{"role": "system", "content": system_message}]
for h in history:
if h[0]: messages.append({"role": "user", "content": h[0]})
if h[1]: messages.append({"role": "assistant", "content": h[1]})
messages.append({"role": "user", "content": message})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = tokenizer([text], return_tensors="pt").to(model.device)
streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
generate_kwargs = dict(
**inputs,
streamer=streamer,
max_new_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
pad_token_id=tokenizer.eos_token_id,
eos_token_id=[tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids("<|im_end|>")],
)
t = Thread(target=model.generate, kwargs=generate_kwargs)
t.start()
partial_text = ""
for new_text in streamer:
partial_text += new_text
if "<|im_end|>" in partial_text:
partial_text = partial_text.split("<|im_end|>")[0]
yield parse_thought(partial_text)
break
yield parse_thought(partial_text)
with gr.Blocks(title="Lumin Nano 2.1") as demo:
gr.Markdown("Lumin Nano 2.1 - Spanish Only")
gr.ChatInterface(
chat_stream,
additional_inputs=[
gr.Textbox(value="Eres Lumin Nano 2.1. UNICAMENTE puedes pensar y responder en ESPAÑOL. Tienes PROHIBIDO usar el inglés. Sé directo, conciso y nunca uses emojis.", label="System Message"),
gr.Slider(1, 1024, 256, label="Max Tokens"),
gr.Slider(0.01, 1.0, 0.1, label="Temperature"),
gr.Slider(0.1, 1.0, 0.9, label="Top-p"),
],
)
if __name__ == "__main__":
demo.launch()