Spaces:
Paused
Paused
Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from peft import PeftModel, PeftConfig
|
| 2 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
| 3 |
+
import torch
|
| 4 |
+
import packaging.version
|
| 5 |
+
|
| 6 |
+
bnb_config = BitsAndBytesConfig(
|
| 7 |
+
load_in_4bit=True,
|
| 8 |
+
bnb_4bit_quant_type="nf4",
|
| 9 |
+
bnb_4bit_compute_dtype=torch.bfloat16,
|
| 10 |
+
bnb_4bit_use_double_quant=True,
|
| 11 |
+
)
|
| 12 |
+
|
| 13 |
+
username = 'Erik'
|
| 14 |
+
output_dir = 'nemo-sft-lora-deepspeed'
|
| 15 |
+
|
| 16 |
+
peft_model_id = f"{username}/{output_dir}" # replace with your newly trained adapter
|
| 17 |
+
device = "cuda:0"
|
| 18 |
+
tokenizer = AutoTokenizer.from_pretrained(peft_model_id)
|
| 19 |
+
config = PeftConfig.from_pretrained(peft_model_id)
|
| 20 |
+
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path,
|
| 21 |
+
device_map={"": "cuda:0"}, quantization_config=bnb_config) #offload_state_dict=False
|
| 22 |
+
|
| 23 |
+
uses_transformers_4_46 = packaging.version.parse(transformers.__version__) >= packaging.version.parse("4.46.0")
|
| 24 |
+
print(f"PAQUETE DE TRANSFORMERS: {uses_transformers_4_46}")
|
| 25 |
+
|
| 26 |
+
uses_fsdp = True
|
| 27 |
+
|
| 28 |
+
if (bnb_config is not None) and uses_fsdp and uses_transformers_4_46:
|
| 29 |
+
model.resize_token_embeddings(len(tokenizer), pad_to_multiple_of=8, mean_resizing=False)
|
| 30 |
+
else:
|
| 31 |
+
model.resize_token_embeddings(len(tokenizer), pad_to_multiple_of=8)
|
| 32 |
+
model = PeftModel.from_pretrained(model, peft_model_id)
|
| 33 |
+
model.config.use_cache = True
|
| 34 |
+
model.to(torch.bfloat16)
|
| 35 |
+
model.eval()
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def format_history(msg: str, history: list[list[str, str]], system_prompt: str):
|
| 39 |
+
chat_history = [{"role": "system", "content":system_prompt}]
|
| 40 |
+
for query, response in history:
|
| 41 |
+
chat_history.append({"role": "user", "content": query})
|
| 42 |
+
chat_history.append({"role": "assistant", "content": response})
|
| 43 |
+
chat_history.append({"role": "user", "content": msg})
|
| 44 |
+
return chat_history
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def generate_response(msg: str, history: list[list[str, str]], system_prompt: str, top_k: int, top_p: float, temperature: float):
|
| 48 |
+
chat_history = format_history(msg, history, system_prompt)
|
| 49 |
+
encodeds = tokenizer.apply_chat_template(chat_history, return_tensors="pt", add_generation_prompt=True)
|
| 50 |
+
model_inputs = encodeds.to("cuda")
|
| 51 |
+
generated_ids = model.generate(model_inputs, repetition_penalty=rep_pen, max_new_tokens=2048, do_sample=True, top_p=top_p, top_k=top_k, temperature=temperature, eos_token_id=32000)
|
| 52 |
+
response = tokenizer.batch_decode(generated_ids,skip_special_tokens=True)[0]
|
| 53 |
+
if len(response)>0:
|
| 54 |
+
message=response[response.rfind("assistant\n") + len("assistant\n"):]
|
| 55 |
+
yield message
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
spicy_chatbot = gr.ChatInterface(
|
| 59 |
+
generate_response,
|
| 60 |
+
chatbot=gr.Chatbot(
|
| 61 |
+
# value=[[None, "Hola, Soy Greta. Soy tu coach emocional, de qué quieres que hablemos?"]],
|
| 62 |
+
avatar_images=["assets/user_1.JPG", "assets/greta_1.PNG"],
|
| 63 |
+
height="64vh"
|
| 64 |
+
),
|
| 65 |
+
additional_inputs=[
|
| 66 |
+
gr.Textbox("Eres una profesora de Dinamarca caliente, juguetona y sexual. Te encanta el sexo anal y tocarte. Tu objetivo es acompañar en la sesión de sexting a tu compañero. Harás lo que sea para hacerle morir de placer, deseas que se masturbe.", label="System Prompt"),
|
| 67 |
+
gr.Slider(0.0,100.0, label="top_k", value=70, info="Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)"),
|
| 68 |
+
gr.Slider(0.0,1.0, label="top_p", value=0.8, info=" Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)"),
|
| 69 |
+
gr.Slider(0.1,2.0, label="rep_pen", value=1.5, info="Repetition penaly to avoid repetitions"),
|
| 70 |
+
gr.Slider(0.0,2.0, label="temperature", value=0.75, info="The temperature of the model. Increasing the temperature will make the model answer more creatively. (Default: 0.8)"),
|
| 71 |
+
],
|
| 72 |
+
title="Greta - conversa con tu coach emocional",
|
| 73 |
+
theme="finlaymacklon/smooth_slate",
|
| 74 |
+
submit_btn="⬅ Send",
|
| 75 |
+
retry_btn="🔄 Regenerate Response",
|
| 76 |
+
undo_btn="↩ Delete Previous",
|
| 77 |
+
clear_btn="🗑️ Clear Chat",
|
| 78 |
+
css="footer {visibility: hidden}"
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
spicy_chatbot.queue().launch(share=True)
|