Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import re
|
| 3 |
+
import time
|
| 4 |
+
import streamlit as st
|
| 5 |
+
from transformers import pipeline, Conversation, AutoTokenizer
|
| 6 |
+
#"meta-llama/Llama-2-13b-chat-hf"
|
| 7 |
+
my_config = {'model_name': "BramVanroy/Llama-2-13b-chat-dutch", 'do_sample': True, 'temperature': 0.1, 'repetition_penalty': 1.1, 'max_new_tokens': 500}
|
| 8 |
+
|
| 9 |
+
print(f"Loading the model: {my_config['model_name']}....")
|
| 10 |
+
time_load_model_start = time.time()
|
| 11 |
+
|
| 12 |
+
print(time_load_model_start)
|
| 13 |
+
|
| 14 |
+
# Load the model and tokenizer outside of the functions
|
| 15 |
+
llm = pipeline("text-generation",
|
| 16 |
+
model=my_config['model_name'],
|
| 17 |
+
#tokenizer=AutoTokenizer.from_pretrained(my_config['model_name']),
|
| 18 |
+
#do_sample=my_config['do_sample'],
|
| 19 |
+
##temperature=my_config['temperature'],
|
| 20 |
+
#repetition_penalty=my_config['repetition_penalty'],
|
| 21 |
+
#max_new_tokens=my_config['max_new_tokens']
|
| 22 |
+
)
|
| 23 |
+
time_load_model_end = time.time()
|
| 24 |
+
elapsed_time = time_load_model_end - time_load_model_start
|
| 25 |
+
print(f"Elapsed time to load the model: {elapsed_time:.2f} sec")
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def get_answer(llm):
|
| 29 |
+
|
| 30 |
+
return "tekst output"
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
#gr.ChatInterface(get_llama_response).launch()
|
| 34 |
+
demo = gr.Interface(fn=get_answer, inputs="text", outputs="text")
|
| 35 |
+
|
| 36 |
+
demo.launch(share=True)
|