Spaces:
Sleeping
Sleeping
File size: 2,798 Bytes
7342418 1f34084 7342418 1f34084 7342418 1f34084 7342418 dfc0ea8 7342418 dfc0ea8 7342418 dfc0ea8 7342418 dfc0ea8 7342418 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 | import os
import chainlit as cl
from langchain_community.llms import Ollama
# Get the Ollama URL from the environment, defaulting to localhost if not found
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "https://thanthamky-ollama-api-analytic.hf.space")
# Define the model you want to use (make sure you pull it first!)
MODEL_NAME = "qwen3:0.6b"
@cl.on_chat_start
async def on_chat_start():
# Initialize the Ollama LLM without the forbidden callback_manager
llm = Ollama(
base_url=OLLAMA_BASE_URL,
model=MODEL_NAME
)
# Store the LLM in the user session
cl.user_session.set("llm", llm)
await cl.Message(
content=f"Hello! I am connected to Ollama running **{MODEL_NAME}**. How can I help you today?"
).send()
@cl.on_message
async def on_message(message: cl.Message):
llm = cl.user_session.get("llm")
# cl.Step creates the collapsible "Thinking..." box in the UI
think_step = cl.Step(name="Thinking")
msg = cl.Message(content="")
is_thinking = False
buffer = ""
async for chunk in llm.astream(message.content):
buffer += chunk
# 1. Detect the start of the thinking process
if "<think>" in buffer:
buffer = buffer.replace("<think>", "").lstrip('\n')
is_thinking = True
await think_step.send()
# 2. Detect the end of the thinking process
if "</think>" in buffer:
parts = buffer.split("</think>")
# Send the remaining thought to the step and finalize it
await think_step.stream_token(parts[0])
await think_step.update()
# Keep the rest of the text for the main answer
buffer = parts[1].lstrip('\n')
is_thinking = False
await msg.send()
# 3. Stream the text to the correct UI element
if is_thinking:
# We hold the stream back slightly if it looks like a closing tag (</think>) is forming
if not any(buffer.endswith(partial) for partial in ['<', '</', '</t', '</th', '</thi', '</thin', '</think']):
await think_step.stream_token(buffer)
buffer = ""
elif not is_thinking and "<think>" not in buffer:
# If we aren't thinking, send text to the main message
if not msg.id:
await msg.send()
await msg.stream_token(buffer)
buffer = ""
# Flush any leftover text in the buffer when generation stops
if buffer:
if is_thinking:
await think_step.stream_token(buffer)
await think_step.update()
else:
await msg.stream_token(buffer)
await msg.update() |