Spaces:

polpoDevs
/

LlamaDutchDemo

Sleeping

LlamaDutchDemo / app.py

Create app.py

f48f572 verified almost 2 years ago

1.26 kB

	import gradio as gr
	import re
	import time
	import streamlit as st
	from transformers import pipeline, Conversation, AutoTokenizer
	#"meta-llama/Llama-2-13b-chat-hf"
	my_config = {'model_name': "BramVanroy/Llama-2-13b-chat-dutch", 'do_sample': True, 'temperature': 0.1, 'repetition_penalty': 1.1, 'max_new_tokens': 500}

	print(f"Loading the model: {my_config['model_name']}....")
	time_load_model_start = time.time()

	print(time_load_model_start)

	# Load the model and tokenizer outside of the functions
	llm = pipeline("text-generation",
	model=my_config['model_name'],
	#tokenizer=AutoTokenizer.from_pretrained(my_config['model_name']),
	#do_sample=my_config['do_sample'],
	##temperature=my_config['temperature'],
	#repetition_penalty=my_config['repetition_penalty'],
	#max_new_tokens=my_config['max_new_tokens']
	)
	time_load_model_end = time.time()
	elapsed_time = time_load_model_end - time_load_model_start
	print(f"Elapsed time to load the model: {elapsed_time:.2f} sec")


	def get_answer(llm):

	return "tekst output"


	#gr.ChatInterface(get_llama_response).launch()
	demo = gr.Interface(fn=get_answer, inputs="text", outputs="text")

	demo.launch(share=True)