| import gradio as gr |
| import time |
| from ctransformers import AutoModelForCausalLM |
| from huggingface_hub import hf_hub_download |
|
|
| PROMPT_TEMPLATE = ( |
| |
| ) |
|
|
| def load_llm(): |
|
|
| |
| llm = AutoModelForCausalLM.from_pretrained( |
| "s3nh/PY007-TinyLlama-1.1B-Chat-v0.2-GGUF", |
| model_file="PY007-TinyLlama-1.1B-Chat-v0.2.Q4_K_M.gguf", |
| model_type="llama", |
| gpu_layers=0, |
| max_new_tokens = 1096, |
| repetition_penalty = 1.13, |
| temperature = 0.1 |
| ) |
| return llm |
|
|
| def llm_function(message, chat_history): |
| llm = load_llm() |
| formatted_message = PROMPT_TEMPLATE + f"<s>[INST]{message}[/INST]</s>" |
| response = llm( |
| formatted_message |
| ) |
| output_texts = response |
| return output_texts |
|
|
| title = "这里是小兮辞" |
|
|
| examples = [ |
| 'What is yellow fever.', |
| ] |
|
|
| gr.ChatInterface( |
| fn=llm_function, |
| title=title, |
| examples=examples |
| ).launch() |
|
|