Spaces:

KeithCu
/

SolarHelperGradio

Sleeping

SolarHelperGradio / app.py

Update app.py

9b5ecd1 almost 2 years ago

1.76 kB

	import gradio as gr
	import litellm

	model_name = "OpenHermes 2.5"


	def inference(message, history):
	try:
	flattened_history = [item for sublist in history for item in sublist]
	full_message = " ".join(flattened_history + [message])
	messages_litellm = [{"role": "user", "content": full_message}] # litellm message format
	partial_message = ""
	for chunk in litellm.completion(model="together_ai/teknium/OpenHermes-2p5-Mistral-7B",
	messages=messages_litellm,
	max_new_tokens=4096,
	temperature=.7,
	top_k=100,
	top_p=.9,
	repetition_penalty=1.18,
	stream=True):
	partial_message += chunk['choices'][0]['delta']['content'] # extract text from streamed litellm chunks
	yield partial_message
	except Exception as e:
	print("Exception encountered:", str(e))
	yield f"An Error occured please 'Clear' the error and try your question again"


	gr.ChatInterface(
	inference,
	chatbot=gr.Chatbot(height=400),
	textbox=gr.Textbox(placeholder="Enter text here...", container=False, scale=5),
	description=f"""
	CURRENT PROMPT TEMPLATE: {model_name}.
	An incorrect prompt template will cause performance to suffer.
	Check the API specifications to ensure this format matches the target LLM.""",
	title="Simple Chatbot Test Application",
	examples=["Define 'deep learning' in once sentence."],
	retry_btn="Retry",
	undo_btn="Undo",
	clear_btn="Clear",
	theme=None,
	).queue().launch()