| import torch |
| from transformers import LlamaForCausalLM, LlamaTokenizer, pipeline |
| from peft import PeftModel |
| import re |
| import gradio as gr |
|
|
| tokenizer = LlamaTokenizer.from_pretrained('mostafaamiri/persian_llama_7b') |
|
|
| base_model = LlamaForCausalLM.from_pretrained( |
| "meta-llama/Llama-2-7b-hf", |
| load_in_8bit=False, |
| ) |
| base_model.resize_token_embeddings(len(tokenizer)) |
|
|
| model = PeftModel.from_pretrained( |
| base_model, |
| "mostafaamiri/persian_llama_7b",) |
|
|
|
|
| prompt_input = ( |
| "Below is an instruction that describes a task. " |
| "Write a response that appropriately completes the request.\n\n" |
| "### Instruction:\n\n{instruction}\n\n### Response:\n\n" |
| ) |
|
|
|
|
| def generate_prompt(instruction, input=None): |
| if input: |
| instruction = instruction + '\n' + input |
| return prompt_input.format_map({'instruction': instruction}) |
| config=dict( |
| temperature=0.2, |
| top_k=40, |
| top_p=0.9, |
| do_sample=True, |
| num_beams=1, |
| repetition_penalty=1.2, |
| max_new_tokens=300 |
| ) |
|
|
|
|
| def launch_model(text): |
|
|
| sample_data = [text] |
| inputToken = tokenizer(generate_prompt(sample_data) , return_tensors="pt") |
|
|
| outputs = model.generate(**inputToken, **config) |
| output = tokenizer.decode(outputs[0],skip_special_tokens=True) |
| output = re.sub(r"Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\n\n\[.*\]\n\n### Response:\n\n", "", output) |
|
|
| return output |
|
|
|
|
| iface = gr.Interface(fn=launch_model, inputs="text", outputs="text") |
| iface.launch() |