Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import AutoTokenizer, pipeline | |
| from peft import AutoPeftModelForCausalLM | |
| import torch | |
| checkpoint = 'ernestoBocini/Phi3-mini-DPO-Tuned' | |
| tokenizer = AutoTokenizer.from_pretrained(checkpoint, trust_remote_code=True) | |
| tokenizer.model_max_length = 256 | |
| tokenizer.pad_token = tokenizer.unk_token | |
| tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids(tokenizer.pad_token) | |
| tokenizer.padding_side = 'left' | |
| model = AutoPeftModelForCausalLM.from_pretrained( | |
| checkpoint, | |
| trust_remote_code=True, | |
| device_map='auto', | |
| torch_dtype=torch.bfloat16 | |
| ).merge_and_unload().to(torch.float16).eval() | |
| pipe = pipeline( | |
| "text-generation", | |
| model=model, | |
| tokenizer=tokenizer, | |
| ) | |
| generation_args = { | |
| "max_new_tokens": 256, | |
| "return_full_text": False, | |
| "temperature": 1.0, | |
| "top_k": 0, | |
| "top_p": 0.9, | |
| "repetition_penalty": 1.1, | |
| "num_return_sequences": 1, | |
| "pad_token_id": tokenizer.eos_token_id, | |
| "do_sample": True, | |
| "length_penalty": 1.0, | |
| "no_repeat_ngram_size": 0, | |
| "early_stopping": True, | |
| "use_cache": True, | |
| } | |
| def chat(user_input): | |
| print('Generating response for input:', user_input) | |
| return pipe(user_input, **generation_args)[0]['generated_text'].trim() | |
| def main(): | |
| iface = gr.Interface(fn=chat, inputs="text", outputs="text", title="Chatbot") | |
| iface.launch() | |
| if __name__ == '__main__': | |
| main() |