Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from ctransformers import AutoModelForCausalLM, AutoConfig, Config # import for GGUF/GGML models | |
| import datetime | |
| # modelfile = "models/tinyllama-1.1b-1t-openorca.Q4_K_M.gguf" | |
| # modelfile="TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF" | |
| modelfile="TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T" | |
| i_temperature = 0.30 | |
| i_max_new_tokens=1100 | |
| i_repetitionpenalty = 1.2 | |
| i_contextlength=12048 | |
| logfile = 'TinyLlama.1B.txt' | |
| print("loading model...") | |
| stt = datetime.datetime.now() | |
| conf = AutoConfig(Config(temperature=i_temperature, | |
| repetition_penalty=i_repetitionpenalty, | |
| batch_size=64, | |
| max_new_tokens=i_max_new_tokens, | |
| context_length=i_contextlength)) | |
| llm = AutoModelForCausalLM.from_pretrained(modelfile, | |
| model_type="llama", | |
| config=conf) | |
| dt = datetime.datetime.now() - stt | |
| print(f"Model loaded in {dt}") | |
| def writehistory(text): | |
| with open(logfile, 'a', encoding='utf-8') as f: | |
| f.write(text) | |
| f.write('\n') | |
| f.close() | |
| with gr.Blocks(theme='ParityError/Interstellar') as demo: | |
| # TITLE SECTION | |
| with gr.Row(): | |
| with gr.Column(scale=12): | |
| gr.HTML("<center>" | |
| + "<h1>π¦ TinyLlama 1.1B π 4K context window</h2></center>") | |
| gr.Markdown(""" | |
| **Currently Running**: [TinyLlama/TinyLlama-1.1B-Chat-v0.6](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.6) **Chat History Log File**: *TinyLlama.1B.txt* | |
| - **Base Model**: TinyLlama/TinyLlama-1.1B-Chat-v0.6, Fine tuned on OpenOrca GPT4 subset for 1 epoch, Using CHATML format. | |
| - **License**: Apache 2.0, following the TinyLlama base model. | |
| The model output is not censored and the authors do not endorse the opinions in the generated content. Use at your own risk. | |
| """) | |
| gr.Image(value='imgs/TinyLlama_logo.png', width=70) | |
| # chat and parameters settings | |
| with gr.Row(): | |
| with gr.Column(scale=4): | |
| chatbot = gr.Chatbot(height = 350, show_copy_button=True, avatar_images = ["imgs/user_logo.png","imgs/TinyLlama_logo.png"]) | |
| with gr.Row(): | |
| with gr.Column(scale=14): | |
| msg = gr.Textbox(show_label=False, placeholder="Enter text", lines=2) | |
| submitBtn = gr.Button("\n㪠Send\n", size="lg", variant="primary", min_width=140) | |
| with gr.Column(min_width=50, scale=1): | |
| with gr.Tab(label="Parameter Setting"): | |
| gr.Markdown("# Parameters") | |
| top_p = gr.Slider(minimum=-0, | |
| maximum=1.0, | |
| value=0.95, | |
| step=0.05, | |
| interactive=True, | |
| label="Top-p") | |
| temperature = gr.Slider(minimum=0.1, | |
| maximum=1.0, | |
| value=0.30, | |
| step=0.01, | |
| interactive=True, | |
| label="Temperature") | |
| max_length_tokens = gr.Slider(minimum=0, | |
| maximum=4096, | |
| value=1060, | |
| step=4, | |
| interactive=True, | |
| label="Max Generation Tokens") | |
| rep_pen = gr.Slider(minimum=0, | |
| maximum=5, | |
| value=1.2, | |
| step=0.05, | |
| interactive=True, | |
| label="Repetition Penalty") | |
| clear = gr.Button("ποΈ Clear All Messages", variant='secondary') | |
| def user(user_message, history): | |
| writehistory(f"USER: {user_message}") | |
| return "", history + [[user_message, None]] | |
| def bot(history, t, p, m, r): | |
| # SYSTEM_PROMPT = """<|im_start|>system | |
| # You are a helpful bot. Your answers are clear and concise. | |
| # <|im_end|> | |
| # """ | |
| # prompt = f"<|im_start|>system<|im_end|><|im_start|>user\n{history[-1][0]}<|im_end|>\n<|im_start|>assistant\n" | |
| SYSTEM_PROMPT = """<|im_start|>system | |
| You are a customer support chatbot for an online platform. | |
| Your purpose is to assist users with their inquiries and provide accurate information. | |
| You have been trained with a knowledge base that includes rules and limitations regarding chargebacks. | |
| The knowledge base consists of the following information: | |
| 1. Chargebacks beyond 90 days are not possible. | |
| 2. Chargebacks above $1000 are not allowed. | |
| 3. Chargebacks for transactions with a valid 3D secure are not allowed. | |
| Use the provided conversation example as a starting point for training. | |
| Your goal is to respond to user queries in a helpful and informative manner, ensuring that you adhere to the platform's chargeback policies. | |
| <|im_end|> | |
| """ | |
| prompt = f"<|im_start|>system<|im_end|><|im_start|>user\n{history[-1][0]}<|im_end|>\n<|im_start|>assistant\n" | |
| print(f"history lenght: {len(history)}") | |
| if len(history) == 1: | |
| print("this is the first round") | |
| else: | |
| print("here we should pass more conversations") | |
| history[-1][1] = "" | |
| for character in llm(prompt, | |
| temperature = t, | |
| top_p = p, | |
| repetition_penalty = r, | |
| max_new_tokens=m, | |
| stop = ['<|im_end|>'], | |
| stream = True): | |
| history[-1][1] += character | |
| yield history | |
| writehistory(f"temperature: {t}, top_p: {p}, maxNewTokens: {m}, repetitionPenalty: {r}\n---\nBOT: {history}\n\n") | |
| # Log in the terminal the messages | |
| print(f"USER: {history[-1][0]}\n---\ntemperature: {t}, top_p: {p}, maxNewTokens: {m}, repetitionPenalty: {r}\n---\nBOT: {history[-1][1]}\n\n") | |
| # Clicking the submitBtn will call the generation with Parameters in the slides | |
| submitBtn.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(bot, [chatbot,temperature,top_p,max_length_tokens,rep_pen], chatbot) | |
| clear.click(lambda: None, None, chatbot, queue=False) | |
| demo.queue() # required to yield the streams from the text generation | |
| demo.launch(inbrowser=True, share=True) |