Spaces:
Sleeping
Sleeping
| import os | |
| import chainlit as cl | |
| from langchain_community.llms import Ollama | |
| from langchain.callbacks.manager import CallbackManager | |
| from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler | |
| # Get the Ollama URL from the environment, defaulting to localhost if not found | |
| OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "https://thanthamky-ollama-api-analytic.hf.space") | |
| # Define the model you want to use (make sure you pull it first!) | |
| MODEL_NAME = "qwen3:0.6b" | |
| async def on_chat_start(): | |
| # Initialize the Ollama LLM | |
| llm = Ollama( | |
| base_url=OLLAMA_BASE_URL, | |
| model=MODEL_NAME, | |
| callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]) | |
| ) | |
| # Store the LLM in the user session | |
| cl.user_session.set("llm", llm) | |
| await cl.Message( | |
| content=f"Hello! I am connected to Ollama running **{MODEL_NAME}**. How can I help you today?" | |
| ).send() | |
| async def on_message(message: cl.Message): | |
| # Retrieve the LLM from the session | |
| llm = cl.user_session.get("llm") | |
| # Create an empty message to stream the response into | |
| msg = cl.Message(content="") | |
| await msg.send() | |
| # Stream the response from Ollama | |
| async for chunk in llm.astream(message.content): | |
| if chunk: | |
| await msg.stream_token(chunk) | |
| await msg.update() |