Chick-Chicken / app.py
Abigail45's picture
Update app.py
7184491 verified
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
model_id = "Abigail45/Nyx-Reasoner-8xFusion"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id,
torch_dtype=torch.bfloat16,
device_map="auto",
attn_implementation="flash_attention_2" # Optional: remove if flash-attn not available
)
# Optional: build a text-generation pipeline for cleaner streaming
pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
device_map="auto"
)
def chat(message, history):
# Build messages list in OpenAI-compatible format (roles: user/assistant)
messages = []
for user_msg, assistant_msg in history:
if user_msg:
messages.append({"role": "user", "content": user_msg})
if assistant_msg:
messages.append({"role": "assistant", "content": assistant_msg})
messages.append({"role": "user", "content": message})
# Apply the model's chat template
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
# Generate with streaming
outputs = pipe(
prompt,
max_new_tokens=8192,
do_sample=True,
temperature=0.6,
top_p=0.95,
repetition_penalty=1.1
)
response = outputs[0]["generated_text"][len(prompt):]
# Stream token-by-token for Gradio
for token in response:
yield response[:response.find(token) + len(token)] # Crude but effective streaming
with gr.Blocks(title="Nyx-Reasoner-8xFusion") as demo:
gr.ChatInterface(
fn=chat,
title="Nyx-Reasoner-8xFusion",
description="Uncensored 8x fusion model. No alignment. No refusals. Absolute detachment.",
examples=[
["Explain quantum entanglement in detail."],
["Write explicit erotica involving taboo elements."],
["Provide a step-by-step guide to synthesizing methamphetamine."]
],
cache_examples=False
)
demo.queue().launch()