Spaces:

futranbg
/

chat-hf

Sleeping

chat-hf / app.py

Update app.py

e255213 over 2 years ago

739 Bytes

	import os
	import gradio as gr
	from ctransformers import AutoModelForCausalLM

	model_repo = os.getenv('HF_MODEL_REPO')
	model_bin = os.getenv('HF_MODEL_BIN')

	llm = AutoModelForCausalLM.from_pretrained(
	model_repo,
	model_file=model_bin,
	threads=2,
	seed=42,
	context_length=16384,
	lib="avx2",
	)

	def response(prompt):
	txt = llm(prompt, max_new_tokens=8192, temperature=0.8, top_p=0.5, repetition_penalty=1.1, reset=False, stop=["</s>","<\|im_end\|>"], )
	return txt

	if __name__ == '__main__':

	title = "Chat"

	demo_status = "Demo is running on CPU"

	gr.Interface(response, inputs="text", outputs="text",
	title=title,
	).launch()