Spaces:

FGOTYT
/

Crazy_Reasoning_1.5B

Sleeping

App Files Files Community

Crazy_Reasoning_1.5B / app.py

FGOTYT

Update app.py

03791ce verified over 1 year ago

raw

history blame contribute delete

2.27 kB

	import gradio as gr
	from llama_cpp import Llama

	# Initialize the model
	model = Llama(
	model_path="Crazy_Reasoning_1.5B.Q4_K_M.gguf", # Replace with your model path
	n_ctx=32768, # Context window
	n_threads=2 # Number of CPU threads to use
	)

	def format_response(text):
	# Обрабатываем переносы строк для HTML
	return text.replace('\n', '<br>')

	def respond(
	message,
	history: list[tuple[str, str]],
	system_message,
	max_tokens,
	temperature,
	top_p,
	):
	# Format the conversation history
	messages = [{"role": "system", "content": system_message}]
	for user_msg, assistant_msg in history:
	if user_msg:
	messages.append({"role": "user", "content": user_msg})
	if assistant_msg:
	messages.append({"role": "assistant", "content": assistant_msg})
	messages.append({"role": "user", "content": message})

	# Generate response
	response = ""
	stream = model.create_chat_completion(
	messages=messages,
	max_tokens=max_tokens,
	stream=True,
	temperature=temperature,
	top_p=top_p
	)

	for chunk in stream:
	# Extract content from the chunk
	if 'choices' in chunk and len(chunk['choices']) > 0:
	if 'text' in chunk['choices'][0]:
	content = chunk['choices'][0]['text']
	elif 'delta' in chunk['choices'][0] and 'content' in chunk['choices'][0]['delta']:
	content = chunk['choices'][0]['delta']['content']
	else:
	continue

	if content is not None:
	response += content
	yield format_response(response)

	# Create the Gradio interface with HTML formatting
	demo = gr.ChatInterface(
	respond,
	additional_inputs=[
	gr.Textbox(value="", label="System prompt"),
	gr.Slider(minimum=1, maximum=2048, value=1024, step=1, label="Maximum new tokens"),
	gr.Slider(minimum=0.1, maximum=2.0, value=1, step=0.01, label="Temperature"),
	gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.95,
	step=0.05,
	label="Top-p",
	),
	],
	)

	if __name__ == "__main__":
	demo.launch()