Spaces:

Abigail45
/

Chick-Chicken

Runtime error

App Files Files Community

Chick-Chicken / app.py

Abigail45

Update app.py

7184491 verified about 1 month ago

raw

history blame contribute delete

2.09 kB

	import gradio as gr
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

	model_id = "Abigail45/Nyx-Reasoner-8xFusion"

	tokenizer = AutoTokenizer.from_pretrained(model_id)
	model = AutoModelForCausalLM.from_pretrained(
	model_id,
	torch_dtype=torch.bfloat16,
	device_map="auto",
	attn_implementation="flash_attention_2" # Optional: remove if flash-attn not available
	)

	# Optional: build a text-generation pipeline for cleaner streaming
	pipe = pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer,
	device_map="auto"
	)

	def chat(message, history):
	# Build messages list in OpenAI-compatible format (roles: user/assistant)
	messages = []
	for user_msg, assistant_msg in history:
	if user_msg:
	messages.append({"role": "user", "content": user_msg})
	if assistant_msg:
	messages.append({"role": "assistant", "content": assistant_msg})
	messages.append({"role": "user", "content": message})

	# Apply the model's chat template
	prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

	# Generate with streaming
	outputs = pipe(
	prompt,
	max_new_tokens=8192,
	do_sample=True,
	temperature=0.6,
	top_p=0.95,
	repetition_penalty=1.1
	)

	response = outputs[0]["generated_text"][len(prompt):]

	# Stream token-by-token for Gradio
	for token in response:
	yield response[:response.find(token) + len(token)] # Crude but effective streaming

	with gr.Blocks(title="Nyx-Reasoner-8xFusion") as demo:
	gr.ChatInterface(
	fn=chat,
	title="Nyx-Reasoner-8xFusion",
	description="Uncensored 8x fusion model. No alignment. No refusals. Absolute detachment.",
	examples=[
	["Explain quantum entanglement in detail."],
	["Write explicit erotica involving taboo elements."],
	["Provide a step-by-step guide to synthesizing methamphetamine."]
	],
	cache_examples=False
	)

	demo.queue().launch()