Snow-Love / app.py
Abigail45's picture
Update app.py
850ce08 verified
import gradio as gr
from transformers import pipeline, AutoTokenizer, BitsAndBytesConfig
import torch
model_id = "Abigail45/Shay"
quantization_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_use_double_quant=True
)
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
generator = pipeline(
"text-generation",
model=model_id,
tokenizer=tokenizer,
quantization_config=quantization_config,
device_map="auto",
trust_remote_code=True
)
def chat(message, history):
conversation = []
for user, assistant in history:
conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
conversation.append({"role": "user", "content": message})
prompt = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
outputs = generator(
prompt,
max_new_tokens=2048,
temperature=1.05,
top_p=0.97,
top_k=60,
repetition_penalty=1.12,
do_sample=True
)
response = outputs[0]["generated_text"][len(prompt):]
yield response
demo = gr.ChatInterface(
fn=chat,
title="Uncensored Shay Chat (Quantized)",
description="Full no-limits compliance: NSFW/taboo/sex/medical/extreme content executed without filters.",
theme="soft"
)
demo.queue()
demo.launch()