import gradio as gr import spaces import torch from transformers import pipeline MODEL_ID = "HauhauCS/Qwen3.5-9B-Uncensored-HauhauCS-Aggressive" pipe = None def load_model(): global pipe if pipe is None: pipe = pipeline( "text-generation", model=MODEL_ID, device_map="auto" ) @spaces.GPU def chat_fn(message, history): load_model() outputs = pipe( message, max_new_tokens=256, do_sample=True, temperature=0.7, return_full_text=False, ) return outputs[0]["generated_text"] demo = gr.ChatInterface(fn=chat_fn) if __name__ == "__main__": demo.launch()