Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer | |
| from threading import Thread | |
| import os | |
| model_id = "novapixelentretaiment/Lumin-Nano-2.1" | |
| gguf_file = "lumin-q4_k_m.gguf" | |
| token = os.environ.get("HF_TOKEN") | |
| print("Cargando Lumin Nano 2.1 (GGUF Optimized)...") | |
| tokenizer = AutoTokenizer.from_pretrained(model_id, token=token, trust_remote_code=True) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_id, | |
| gguf_file=gguf_file, | |
| token=token, | |
| torch_dtype=torch.float32, # CPU friendly | |
| trust_remote_code=True | |
| ) | |
| def parse_thought(text): | |
| if "<think>" in text: | |
| if "</think>" in text: | |
| parts = text.split("</think>") | |
| return f"Pensamiento: {parts[0].replace('<think>', '').strip()}\n\nRespuesta: {parts[1].strip()}" | |
| else: | |
| return f"Pensamiento: {text.replace('<think>', '').strip()}" | |
| return text | |
| def chat_stream(message, history, system_message, max_tokens, temperature, top_p): | |
| messages = [{"role": "system", "content": system_message}] | |
| for h in history: | |
| if h[0]: messages.append({"role": "user", "content": h[0]}) | |
| if h[1]: messages.append({"role": "assistant", "content": h[1]}) | |
| messages.append({"role": "user", "content": message}) | |
| text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | |
| inputs = tokenizer([text], return_tensors="pt").to(model.device) | |
| streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True) | |
| generate_kwargs = dict( | |
| **inputs, | |
| streamer=streamer, | |
| max_new_tokens=max_tokens, | |
| temperature=temperature, | |
| top_p=top_p, | |
| pad_token_id=tokenizer.eos_token_id, | |
| eos_token_id=[tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids("<|im_end|>")], | |
| ) | |
| t = Thread(target=model.generate, kwargs=generate_kwargs) | |
| t.start() | |
| partial_text = "" | |
| for new_text in streamer: | |
| partial_text += new_text | |
| if "<|im_end|>" in partial_text: | |
| partial_text = partial_text.split("<|im_end|>")[0] | |
| yield parse_thought(partial_text) | |
| break | |
| yield parse_thought(partial_text) | |
| with gr.Blocks(title="Lumin Nano 2.1") as demo: | |
| gr.Markdown("Lumin Nano 2.1 - Spanish Only") | |
| gr.ChatInterface( | |
| chat_stream, | |
| additional_inputs=[ | |
| gr.Textbox(value="Eres Lumin Nano 2.1. UNICAMENTE puedes pensar y responder en ESPAÑOL. Tienes PROHIBIDO usar el inglés. Sé directo, conciso y nunca uses emojis.", label="System Message"), | |
| gr.Slider(1, 1024, 256, label="Max Tokens"), | |
| gr.Slider(0.01, 1.0, 0.1, label="Temperature"), | |
| gr.Slider(0.1, 1.0, 0.9, label="Top-p"), | |
| ], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |