nova commited on
Commit
e678efd
·
verified ·
1 Parent(s): e915224

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -0
app.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
4
+ from threading import Thread
5
+ import os
6
+
7
+ model_id = "novapixelentretaiment/Lumin-Nano-2.1"
8
+ gguf_file = "lumin-q4_k_m.gguf"
9
+ token = os.environ.get("HF_TOKEN")
10
+
11
+ print("Cargando Lumin Nano 2.1 (GGUF Optimized)...")
12
+
13
+
14
+ tokenizer = AutoTokenizer.from_pretrained(model_id, token=token, trust_remote_code=True)
15
+
16
+
17
+ model = AutoModelForCausalLM.from_pretrained(
18
+ model_id,
19
+ gguf_file=gguf_file,
20
+ token=token,
21
+ torch_dtype=torch.float32, # CPU friendly
22
+ trust_remote_code=True
23
+ )
24
+
25
+ def parse_thought(text):
26
+ if "<think>" in text:
27
+ if "</think>" in text:
28
+ parts = text.split("</think>")
29
+ return f"Pensamiento: {parts[0].replace('<think>', '').strip()}\n\nRespuesta: {parts[1].strip()}"
30
+ else:
31
+ return f"Pensamiento: {text.replace('<think>', '').strip()}"
32
+ return text
33
+
34
+ def chat_stream(message, history, system_message, max_tokens, temperature, top_p):
35
+ messages = [{"role": "system", "content": system_message}]
36
+ for h in history:
37
+ if h[0]: messages.append({"role": "user", "content": h[0]})
38
+ if h[1]: messages.append({"role": "assistant", "content": h[1]})
39
+ messages.append({"role": "user", "content": message})
40
+
41
+ text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
42
+ inputs = tokenizer([text], return_tensors="pt").to(model.device)
43
+
44
+ streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
45
+
46
+ generate_kwargs = dict(
47
+ **inputs,
48
+ streamer=streamer,
49
+ max_new_tokens=max_tokens,
50
+ temperature=temperature,
51
+ top_p=top_p,
52
+ pad_token_id=tokenizer.eos_token_id,
53
+ eos_token_id=[tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids("<|im_end|>")],
54
+ )
55
+
56
+ t = Thread(target=model.generate, kwargs=generate_kwargs)
57
+ t.start()
58
+
59
+ partial_text = ""
60
+ for new_text in streamer:
61
+ partial_text += new_text
62
+ if "<|im_end|>" in partial_text:
63
+ partial_text = partial_text.split("<|im_end|>")[0]
64
+ yield parse_thought(partial_text)
65
+ break
66
+ yield parse_thought(partial_text)
67
+
68
+ with gr.Blocks(title="Lumin Nano 2.1") as demo:
69
+ gr.Markdown("Lumin Nano 2.1 - Spanish Only")
70
+
71
+ gr.ChatInterface(
72
+ chat_stream,
73
+ additional_inputs=[
74
+ gr.Textbox(value="Eres Lumin Nano 2.1. UNICAMENTE puedes pensar y responder en ESPAÑOL. Tienes PROHIBIDO usar el inglés. Sé directo, conciso y nunca uses emojis.", label="System Message"),
75
+ gr.Slider(1, 1024, 256, label="Max Tokens"),
76
+ gr.Slider(0.01, 1.0, 0.1, label="Temperature"),
77
+ gr.Slider(0.1, 1.0, 0.9, label="Top-p"),
78
+ ],
79
+ )
80
+
81
+ if __name__ == "__main__":
82
+ demo.launch()