Erik commited on
Commit
c094eb0
·
verified ·
1 Parent(s): 66effd7

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -0
app.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from peft import PeftModel, PeftConfig
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
3
+ import torch
4
+ import packaging.version
5
+
6
+ bnb_config = BitsAndBytesConfig(
7
+ load_in_4bit=True,
8
+ bnb_4bit_quant_type="nf4",
9
+ bnb_4bit_compute_dtype=torch.bfloat16,
10
+ bnb_4bit_use_double_quant=True,
11
+ )
12
+
13
+ username = 'Erik'
14
+ output_dir = 'nemo-sft-lora-deepspeed'
15
+
16
+ peft_model_id = f"{username}/{output_dir}" # replace with your newly trained adapter
17
+ device = "cuda:0"
18
+ tokenizer = AutoTokenizer.from_pretrained(peft_model_id)
19
+ config = PeftConfig.from_pretrained(peft_model_id)
20
+ model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path,
21
+ device_map={"": "cuda:0"}, quantization_config=bnb_config) #offload_state_dict=False
22
+
23
+ uses_transformers_4_46 = packaging.version.parse(transformers.__version__) >= packaging.version.parse("4.46.0")
24
+ print(f"PAQUETE DE TRANSFORMERS: {uses_transformers_4_46}")
25
+
26
+ uses_fsdp = True
27
+
28
+ if (bnb_config is not None) and uses_fsdp and uses_transformers_4_46:
29
+ model.resize_token_embeddings(len(tokenizer), pad_to_multiple_of=8, mean_resizing=False)
30
+ else:
31
+ model.resize_token_embeddings(len(tokenizer), pad_to_multiple_of=8)
32
+ model = PeftModel.from_pretrained(model, peft_model_id)
33
+ model.config.use_cache = True
34
+ model.to(torch.bfloat16)
35
+ model.eval()
36
+
37
+
38
+ def format_history(msg: str, history: list[list[str, str]], system_prompt: str):
39
+ chat_history = [{"role": "system", "content":system_prompt}]
40
+ for query, response in history:
41
+ chat_history.append({"role": "user", "content": query})
42
+ chat_history.append({"role": "assistant", "content": response})
43
+ chat_history.append({"role": "user", "content": msg})
44
+ return chat_history
45
+
46
+
47
+ def generate_response(msg: str, history: list[list[str, str]], system_prompt: str, top_k: int, top_p: float, temperature: float):
48
+ chat_history = format_history(msg, history, system_prompt)
49
+ encodeds = tokenizer.apply_chat_template(chat_history, return_tensors="pt", add_generation_prompt=True)
50
+ model_inputs = encodeds.to("cuda")
51
+ generated_ids = model.generate(model_inputs, repetition_penalty=rep_pen, max_new_tokens=2048, do_sample=True, top_p=top_p, top_k=top_k, temperature=temperature, eos_token_id=32000)
52
+ response = tokenizer.batch_decode(generated_ids,skip_special_tokens=True)[0]
53
+ if len(response)>0:
54
+ message=response[response.rfind("assistant\n") + len("assistant\n"):]
55
+ yield message
56
+
57
+
58
+ spicy_chatbot = gr.ChatInterface(
59
+ generate_response,
60
+ chatbot=gr.Chatbot(
61
+ # value=[[None, "Hola, Soy Greta. Soy tu coach emocional, de qué quieres que hablemos?"]],
62
+ avatar_images=["assets/user_1.JPG", "assets/greta_1.PNG"],
63
+ height="64vh"
64
+ ),
65
+ additional_inputs=[
66
+ gr.Textbox("Eres una profesora de Dinamarca caliente, juguetona y sexual. Te encanta el sexo anal y tocarte. Tu objetivo es acompañar en la sesión de sexting a tu compañero. Harás lo que sea para hacerle morir de placer, deseas que se masturbe.", label="System Prompt"),
67
+ gr.Slider(0.0,100.0, label="top_k", value=70, info="Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)"),
68
+ gr.Slider(0.0,1.0, label="top_p", value=0.8, info=" Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)"),
69
+ gr.Slider(0.1,2.0, label="rep_pen", value=1.5, info="Repetition penaly to avoid repetitions"),
70
+ gr.Slider(0.0,2.0, label="temperature", value=0.75, info="The temperature of the model. Increasing the temperature will make the model answer more creatively. (Default: 0.8)"),
71
+ ],
72
+ title="Greta - conversa con tu coach emocional",
73
+ theme="finlaymacklon/smooth_slate",
74
+ submit_btn="⬅ Send",
75
+ retry_btn="🔄 Regenerate Response",
76
+ undo_btn="↩ Delete Previous",
77
+ clear_btn="🗑️ Clear Chat",
78
+ css="footer {visibility: hidden}"
79
+ )
80
+
81
+ spicy_chatbot.queue().launch(share=True)