Spaces:
Sleeping
Sleeping
| import os | |
| import chainlit as cl | |
| from langchain_community.llms import Ollama | |
| # Get the Ollama URL from the environment, defaulting to localhost if not found | |
| OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "https://thanthamky-ollama-api-analytic.hf.space") | |
| # Define the model you want to use (make sure you pull it first!) | |
| MODEL_NAME = "qwen3:0.6b" | |
| async def on_chat_start(): | |
| # Initialize the Ollama LLM without the forbidden callback_manager | |
| llm = Ollama( | |
| base_url=OLLAMA_BASE_URL, | |
| model=MODEL_NAME | |
| ) | |
| # Store the LLM in the user session | |
| cl.user_session.set("llm", llm) | |
| await cl.Message( | |
| content=f"Hello! I am connected to Ollama running **{MODEL_NAME}**. How can I help you today?" | |
| ).send() | |
| async def on_message(message: cl.Message): | |
| llm = cl.user_session.get("llm") | |
| # cl.Step creates the collapsible "Thinking..." box in the UI | |
| think_step = cl.Step(name="Thinking") | |
| msg = cl.Message(content="") | |
| is_thinking = False | |
| buffer = "" | |
| async for chunk in llm.astream(message.content): | |
| buffer += chunk | |
| # 1. Detect the start of the thinking process | |
| if "<think>" in buffer: | |
| buffer = buffer.replace("<think>", "").lstrip('\n') | |
| is_thinking = True | |
| await think_step.send() | |
| # 2. Detect the end of the thinking process | |
| if "</think>" in buffer: | |
| parts = buffer.split("</think>") | |
| # Send the remaining thought to the step and finalize it | |
| await think_step.stream_token(parts[0]) | |
| await think_step.update() | |
| # Keep the rest of the text for the main answer | |
| buffer = parts[1].lstrip('\n') | |
| is_thinking = False | |
| await msg.send() | |
| # 3. Stream the text to the correct UI element | |
| if is_thinking: | |
| # We hold the stream back slightly if it looks like a closing tag (</think>) is forming | |
| if not any(buffer.endswith(partial) for partial in ['<', '</', '</t', '</th', '</thi', '</thin', '</think']): | |
| await think_step.stream_token(buffer) | |
| buffer = "" | |
| elif not is_thinking and "<think>" not in buffer: | |
| # If we aren't thinking, send text to the main message | |
| if not msg.id: | |
| await msg.send() | |
| await msg.stream_token(buffer) | |
| buffer = "" | |
| # Flush any leftover text in the buffer when generation stops | |
| if buffer: | |
| if is_thinking: | |
| await think_step.stream_token(buffer) | |
| await think_step.update() | |
| else: | |
| await msg.stream_token(buffer) | |
| await msg.update() |