| | import gradio as gr |
| | import spaces |
| | import torch |
| | from transformers import pipeline |
| |
|
| | MODEL_ID = "HauhauCS/Qwen3.5-9B-Uncensored-HauhauCS-Aggressive" |
| |
|
| | pipe = None |
| |
|
| | def load_model(): |
| | global pipe |
| | if pipe is None: |
| | pipe = pipeline( |
| | "text-generation", |
| | model=MODEL_ID, |
| | device_map="auto" |
| | ) |
| |
|
| | @spaces.GPU |
| | def chat_fn(message, history): |
| | load_model() |
| |
|
| | outputs = pipe( |
| | message, |
| | max_new_tokens=256, |
| | do_sample=True, |
| | temperature=0.7, |
| | return_full_text=False, |
| | ) |
| | return outputs[0]["generated_text"] |
| |
|
| | demo = gr.ChatInterface(fn=chat_fn) |
| |
|
| | if __name__ == "__main__": |
| | demo.launch() |