import os import chainlit as cl from langchain_community.llms import Ollama # Get the Ollama URL from the environment, defaulting to localhost if not found OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "https://thanthamky-ollama-api-analytic.hf.space") # Define the model you want to use (make sure you pull it first!) MODEL_NAME = "qwen3:0.6b" @cl.on_chat_start async def on_chat_start(): # Initialize the Ollama LLM without the forbidden callback_manager llm = Ollama( base_url=OLLAMA_BASE_URL, model=MODEL_NAME ) # Store the LLM in the user session cl.user_session.set("llm", llm) await cl.Message( content=f"Hello! I am connected to Ollama running **{MODEL_NAME}**. How can I help you today?" ).send() @cl.on_message async def on_message(message: cl.Message): llm = cl.user_session.get("llm") # cl.Step creates the collapsible "Thinking..." box in the UI think_step = cl.Step(name="Thinking") msg = cl.Message(content="") is_thinking = False buffer = "" async for chunk in llm.astream(message.content): buffer += chunk # 1. Detect the start of the thinking process if "" in buffer: buffer = buffer.replace("", "").lstrip('\n') is_thinking = True await think_step.send() # 2. Detect the end of the thinking process if "" in buffer: parts = buffer.split("") # Send the remaining thought to the step and finalize it await think_step.stream_token(parts[0]) await think_step.update() # Keep the rest of the text for the main answer buffer = parts[1].lstrip('\n') is_thinking = False await msg.send() # 3. Stream the text to the correct UI element if is_thinking: # We hold the stream back slightly if it looks like a closing tag () is forming if not any(buffer.endswith(partial) for partial in ['<', '" not in buffer: # If we aren't thinking, send text to the main message if not msg.id: await msg.send() await msg.stream_token(buffer) buffer = "" # Flush any leftover text in the buffer when generation stops if buffer: if is_thinking: await think_step.stream_token(buffer) await think_step.update() else: await msg.stream_token(buffer) await msg.update()