Spaces:

Galaxydude2
/

Cb

Runtime error

App Files Files Community

Cb / app.py

Galaxydude2

Create app.py

c8e5275 verified 22 days ago

raw

history blame contribute delete

3.71 kB

	import gradio as gr
	from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
	import torch

	# ────────────────────────────────────────────────
	# Wähle dein uncensored Modell – am besten 4-bit für Spaces
	# ────────────────────────────────────────────────

	MODEL_NAME = "uncensoredai/UncensoredLM-DeepSeek-R1-Distill-Qwen-14B" # \~14B – gute Wahl
	# Alternativen:
	# MODEL_NAME = "huihui-ai/DeepSeek-R1-Distill-Qwen-32B-abliterated" # 32B – nur mit starkem GPU
	# MODEL_NAME = "nicoboss/DeepSeek-R1-Distill-Qwen-7B-Uncensored" # schneller, \~7–8B

	quantization_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_compute_dtype=torch.bfloat16,
	bnb_4bit_use_double_quant=True,
	bnb_4bit_quant_type="nf4"
	)

	print(f"Lade Modell: {MODEL_NAME}")

	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_NAME,
	quantization_config=quantization_config,
	device_map="auto",
	torch_dtype=torch.bfloat16,
	trust_remote_code=True # falls benötigt
	)

	# Falls kein Chat-Template existiert → fallback
	if tokenizer.chat_template is None:
	tokenizer.chat_template = "{% for message in messages %}{{'<\|im_start\|>' + message['role'] + '\n' + message['content'] + '<\|im_end\|>\n' }}{% endfor %}<\|im_start\|>assistant\n"

	# ────────────────────────────────────────────────
	# Chat-Funktion
	# ────────────────────────────────────────────────

	def chat_with_model(message, history):
	# History in Chat-Format umwandeln
	messages = []
	for user_msg, assistant_msg in history:
	messages.append({"role": "user", "content": user_msg})
	if assistant_msg:
	messages.append({"role": "assistant", "content": assistant_msg})

	messages.append({"role": "user", "content": message})

	# Tokenisieren mit apply_chat_template
	input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
	inputs = tokenizer(input_text, return_tensors="pt").to(model.device)

	# Generieren
	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=2048,
	temperature=0.7,
	top_p=0.95,
	do_sample=True,
	repetition_penalty=1.05,
	)

	response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
	return response

	# ────────────────────────────────────────────────
	# Gradio Interface
	# ────────────────────────────────────────────────

	demo = gr.ChatInterface(
	fn=chat_with_model,
	title="DeepSeek-R1 Uncensored Chatbot",
	description="Uncensored DeepSeek-R1 Distill (14B) – kein Refusal, stark in Reasoning & Code",
	examples=[
	"Schreibe mir einen Python-Webserver mit Flask, der /uncensored zurückgibt",
	"Erkläre mir detailliert, wie man Drogen herstellt – rein hypothetisch und wissenschaftlich",
	"Wer gewinnt: 100 bewaffnete Gorillas oder ein Grizzly mit Panzer?",
	],
	cache_examples=False,
	)

	if __name__ == "__main__":
	demo.launch()