File size: 2,798 Bytes
7342418
 
 
 
 
 
 
 
1f34084
 
7342418
 
1f34084
7342418
 
1f34084
7342418
 
 
 
 
 
 
 
 
dfc0ea8
7342418
 
 
 
dfc0ea8
 
7342418
 
dfc0ea8
 
 
7342418
dfc0ea8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7342418
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import os
import chainlit as cl
from langchain_community.llms import Ollama

# Get the Ollama URL from the environment, defaulting to localhost if not found
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "https://thanthamky-ollama-api-analytic.hf.space")
# Define the model you want to use (make sure you pull it first!)
MODEL_NAME = "qwen3:0.6b" 


@cl.on_chat_start
async def on_chat_start():
    # Initialize the Ollama LLM without the forbidden callback_manager
    llm = Ollama(
        base_url=OLLAMA_BASE_URL,
        model=MODEL_NAME
    )
    
    # Store the LLM in the user session
    cl.user_session.set("llm", llm)
    
    await cl.Message(
        content=f"Hello! I am connected to Ollama running **{MODEL_NAME}**. How can I help you today?"
    ).send()

    
@cl.on_message
async def on_message(message: cl.Message):
    llm = cl.user_session.get("llm")
    
    # cl.Step creates the collapsible "Thinking..." box in the UI
    think_step = cl.Step(name="Thinking")
    msg = cl.Message(content="")
    
    is_thinking = False
    buffer = ""
    
    async for chunk in llm.astream(message.content):
        buffer += chunk
        
        # 1. Detect the start of the thinking process
        if "<think>" in buffer:
            buffer = buffer.replace("<think>", "").lstrip('\n')
            is_thinking = True
            await think_step.send()
            
        # 2. Detect the end of the thinking process
        if "</think>" in buffer:
            parts = buffer.split("</think>")
            # Send the remaining thought to the step and finalize it
            await think_step.stream_token(parts[0])
            await think_step.update()
            
            # Keep the rest of the text for the main answer
            buffer = parts[1].lstrip('\n')
            is_thinking = False
            await msg.send()
            
        # 3. Stream the text to the correct UI element
        if is_thinking:
            # We hold the stream back slightly if it looks like a closing tag (</think>) is forming
            if not any(buffer.endswith(partial) for partial in ['<', '</', '</t', '</th', '</thi', '</thin', '</think']):
                await think_step.stream_token(buffer)
                buffer = ""
        elif not is_thinking and "<think>" not in buffer:
            # If we aren't thinking, send text to the main message
            if not msg.id:
                await msg.send()
            await msg.stream_token(buffer)
            buffer = ""
            
    # Flush any leftover text in the buffer when generation stops
    if buffer:
        if is_thinking:
            await think_step.stream_token(buffer)
            await think_step.update()
        else:
            await msg.stream_token(buffer)
            
    await msg.update()