nova commited on
Commit
e8e7d25
·
verified ·
1 Parent(s): 5dd1fc5

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +75 -0
app.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
4
+ from threading import Thread
5
+ import os
6
+
7
+ model_path = "."
8
+ token = os.environ.get("HF_TOKEN")
9
+
10
+ print("Cargando Lumin Nano 2.1...")
11
+
12
+ tokenizer = AutoTokenizer.from_pretrained(model_path, token=token, trust_remote_code=True)
13
+ device = "cuda" if torch.cuda.is_available() else "cpu"
14
+
15
+ model = AutoModelForCausalLM.from_pretrained(
16
+ model_path,
17
+ token=token,
18
+ torch_dtype=torch.float16 if device == "cuda" else torch.float32,
19
+ low_cpu_mem_usage=True,
20
+ trust_remote_code=True
21
+ ).to(device)
22
+
23
+ def parse_thought(text):
24
+ if "<think>" in text:
25
+ if "</think>" in text:
26
+ parts = text.split("</think>")
27
+ return f"Pensamiento: {parts[0].replace('<think>', '').strip()}\n\nRespuesta: {parts[1].strip()}"
28
+ else:
29
+ return f"Pensamiento: {text.replace('<think>', '').strip()}"
30
+ return text
31
+
32
+ def chat_stream(message, history, system_message, max_tokens, temperature, top_p):
33
+ messages = [{"role": "system", "content": system_message}]
34
+ for h in history:
35
+ if h[0]: messages.append({"role": "user", "content": h[0]})
36
+ if h[1]: messages.append({"role": "assistant", "content": h[1]})
37
+ messages.append({"role": "user", "content": message})
38
+
39
+ text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
40
+ inputs = tokenizer([text], return_tensors="pt").to(model.device)
41
+
42
+ streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
43
+
44
+ generate_kwargs = dict(
45
+ **inputs,
46
+ streamer=streamer,
47
+ max_new_tokens=max_tokens,
48
+ temperature=temperature,
49
+ top_p=top_p,
50
+ pad_token_id=tokenizer.eos_token_id,
51
+ )
52
+
53
+ t = Thread(target=model.generate, kwargs=generate_kwargs)
54
+ t.start()
55
+
56
+ partial_text = ""
57
+ for new_text in streamer:
58
+ partial_text += new_text
59
+ yield parse_thought(partial_text)
60
+
61
+ with gr.Blocks(title="Lumin Nano 2.1") as demo:
62
+ gr.Markdown("Lumin Nano 2.1")
63
+
64
+ gr.ChatInterface(
65
+ chat_stream,
66
+ additional_inputs=[
67
+ gr.Textbox(value="Eres Lumin Nano 2.1. Responde de forma muy concisa y directa. No uses emojis.", label="System Message"),
68
+ gr.Slider(1, 1024, 128, label="Max Tokens"),
69
+ gr.Slider(0.1, 1.0, 0.2, label="Temperature"),
70
+ gr.Slider(0.1, 1.0, 0.9, label="Top-p"),
71
+ ],
72
+ )
73
+
74
+ if __name__ == "__main__":
75
+ demo.launch()