import gradio as gr from llama_cpp import Llama # 1. Load Nebulos llm = Llama.from_pretrained( repo_id="Erik22TY/Nebulos-Distill-Qwen3-0.6B", filename="Nebulos-Distill-Qwen3-0.6B.gguf", n_ctx=2048 ) def chat_with_nebulos(message, history): # Prompt format for reasoning full_prompt = f"User: {message}\nAssistant: " # Generate Thought output = llm(full_prompt, max_tokens=512, stop=[""], echo=False) thought = output['choices'][0]['text'].strip() # Generate Final Answer final_prompt = full_prompt + thought + "" final_output = llm(final_prompt, max_tokens=512, stop=["User:"], echo=False) answer = final_output['choices'][0]['text'].strip() history.append(gr.ChatMessage(role="assistant", content=thought, metadata={"title": "🧠 Thinking..."})) history.append(gr.ChatMessage(role="assistant", content=answer)) return history # 2. The UI with your custom placeholder and examples with gr.Blocks(theme="soft") as demo: gr.Markdown("# 🌌 Nebulos-Distill-Qwen3-0.6B Chat") chatbot = gr.Chatbot(type="messages", label="Nebulos Chat") # 'Ask anything...' is now the placeholder! msg = gr.Textbox(placeholder="Ask anything...", container=False, scale=7) gr.Examples( examples=[ "Hi! 😊", "Give me a Sigma building tip for a Roblox obby.", "How do I optimize Minecraft performance on Linux Mint?", "Explain why 32GB RAM is the best 'Sigma' choice for 2026.", "Write a simple Luau script to make a Part change color on touch.", "Tell me a joke about a Python programmer stuck in a loop." ], inputs=msg ) msg.submit(chat_with_nebulos, [msg, chatbot], [chatbot]) if __name__ == "__main__": demo.launch()