Spaces:

thanthamky
/

chainlit

Sleeping

App Files Files Community

chainlit / src /app.py

thanthamky

Update src/app.py

dfc0ea8 verified about 2 months ago

raw

history blame contribute delete

2.8 kB

	import os
	import chainlit as cl
	from langchain_community.llms import Ollama

	# Get the Ollama URL from the environment, defaulting to localhost if not found
	OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "https://thanthamky-ollama-api-analytic.hf.space")
	# Define the model you want to use (make sure you pull it first!)
	MODEL_NAME = "qwen3:0.6b"


	@cl.on_chat_start
	async def on_chat_start():
	# Initialize the Ollama LLM without the forbidden callback_manager
	llm = Ollama(
	base_url=OLLAMA_BASE_URL,
	model=MODEL_NAME
	)

	# Store the LLM in the user session
	cl.user_session.set("llm", llm)

	await cl.Message(
	content=f"Hello! I am connected to Ollama running {MODEL_NAME}. How can I help you today?"
	).send()


	@cl.on_message
	async def on_message(message: cl.Message):
	llm = cl.user_session.get("llm")

	# cl.Step creates the collapsible "Thinking..." box in the UI
	think_step = cl.Step(name="Thinking")
	msg = cl.Message(content="")

	is_thinking = False
	buffer = ""

	async for chunk in llm.astream(message.content):
	buffer += chunk

	# 1. Detect the start of the thinking process
	if "<think>" in buffer:
	buffer = buffer.replace("<think>", "").lstrip('\n')
	is_thinking = True
	await think_step.send()

	# 2. Detect the end of the thinking process
	if "</think>" in buffer:
	parts = buffer.split("</think>")
	# Send the remaining thought to the step and finalize it
	await think_step.stream_token(parts[0])
	await think_step.update()

	# Keep the rest of the text for the main answer
	buffer = parts[1].lstrip('\n')
	is_thinking = False
	await msg.send()

	# 3. Stream the text to the correct UI element
	if is_thinking:
	# We hold the stream back slightly if it looks like a closing tag (</think>) is forming
	if not any(buffer.endswith(partial) for partial in ['<', '</', '</t', '</th', '</thi', '</thin', '</think']):
	await think_step.stream_token(buffer)
	buffer = ""
	elif not is_thinking and "<think>" not in buffer:
	# If we aren't thinking, send text to the main message
	if not msg.id:
	await msg.send()
	await msg.stream_token(buffer)
	buffer = ""

	# Flush any leftover text in the buffer when generation stops
	if buffer:
	if is_thinking:
	await think_step.stream_token(buffer)
	await think_step.update()
	else:
	await msg.stream_token(buffer)

	await msg.update()