Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import re | |
| import time | |
| import streamlit as st | |
| from transformers import pipeline, Conversation, AutoTokenizer | |
| #"meta-llama/Llama-2-13b-chat-hf" | |
| my_config = {'model_name': "BramVanroy/Llama-2-13b-chat-dutch", 'do_sample': True, 'temperature': 0.1, 'repetition_penalty': 1.1, 'max_new_tokens': 500} | |
| print(f"Loading the model: {my_config['model_name']}....") | |
| time_load_model_start = time.time() | |
| print(time_load_model_start) | |
| # Load the model and tokenizer outside of the functions | |
| llm = pipeline("text-generation", | |
| model=my_config['model_name'], | |
| #tokenizer=AutoTokenizer.from_pretrained(my_config['model_name']), | |
| #do_sample=my_config['do_sample'], | |
| ##temperature=my_config['temperature'], | |
| #repetition_penalty=my_config['repetition_penalty'], | |
| #max_new_tokens=my_config['max_new_tokens'] | |
| ) | |
| time_load_model_end = time.time() | |
| elapsed_time = time_load_model_end - time_load_model_start | |
| print(f"Elapsed time to load the model: {elapsed_time:.2f} sec") | |
| def get_answer(llm): | |
| return "tekst output" | |
| #gr.ChatInterface(get_llama_response).launch() | |
| demo = gr.Interface(fn=get_answer, inputs="text", outputs="text") | |
| demo.launch(share=True) |