epfl-tutor / app.py
matteosz
revert unquantized
206fd23
import gradio as gr
from transformers import AutoTokenizer, pipeline
from peft import AutoPeftModelForCausalLM
import torch
checkpoint = 'ernestoBocini/Phi3-mini-DPO-Tuned'
tokenizer = AutoTokenizer.from_pretrained(checkpoint, trust_remote_code=True)
tokenizer.model_max_length = 256
tokenizer.pad_token = tokenizer.unk_token
tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids(tokenizer.pad_token)
tokenizer.padding_side = 'left'
model = AutoPeftModelForCausalLM.from_pretrained(
checkpoint,
trust_remote_code=True,
device_map='auto',
torch_dtype=torch.bfloat16
).merge_and_unload().to(torch.float16).eval()
pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
)
generation_args = {
"max_new_tokens": 256,
"return_full_text": False,
"temperature": 1.0,
"top_k": 0,
"top_p": 0.9,
"repetition_penalty": 1.1,
"num_return_sequences": 1,
"pad_token_id": tokenizer.eos_token_id,
"do_sample": True,
"length_penalty": 1.0,
"no_repeat_ngram_size": 0,
"early_stopping": True,
"use_cache": True,
}
def chat(user_input):
print('Generating response for input:', user_input)
return pipe(user_input, **generation_args)[0]['generated_text'].trim()
def main():
iface = gr.Interface(fn=chat, inputs="text", outputs="text", title="Chatbot")
iface.launch()
if __name__ == '__main__':
main()