Cb / app.py
Galaxydude2's picture
Create app.py
c8e5275 verified
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch
# ────────────────────────────────────────────────
# Wähle dein uncensored Modell – am besten 4-bit für Spaces
# ────────────────────────────────────────────────
MODEL_NAME = "uncensoredai/UncensoredLM-DeepSeek-R1-Distill-Qwen-14B" # \~14B – gute Wahl
# Alternativen:
# MODEL_NAME = "huihui-ai/DeepSeek-R1-Distill-Qwen-32B-abliterated" # 32B – nur mit starkem GPU
# MODEL_NAME = "nicoboss/DeepSeek-R1-Distill-Qwen-7B-Uncensored" # schneller, \~7–8B
quantization_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.bfloat16,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4"
)
print(f"Lade Modell: {MODEL_NAME}")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
quantization_config=quantization_config,
device_map="auto",
torch_dtype=torch.bfloat16,
trust_remote_code=True # falls benötigt
)
# Falls kein Chat-Template existiert → fallback
if tokenizer.chat_template is None:
tokenizer.chat_template = "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>\n' }}{% endfor %}<|im_start|>assistant\n"
# ────────────────────────────────────────────────
# Chat-Funktion
# ────────────────────────────────────────────────
def chat_with_model(message, history):
# History in Chat-Format umwandeln
messages = []
for user_msg, assistant_msg in history:
messages.append({"role": "user", "content": user_msg})
if assistant_msg:
messages.append({"role": "assistant", "content": assistant_msg})
messages.append({"role": "user", "content": message})
# Tokenisieren mit apply_chat_template
input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
# Generieren
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=2048,
temperature=0.7,
top_p=0.95,
do_sample=True,
repetition_penalty=1.05,
)
response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
return response
# ────────────────────────────────────────────────
# Gradio Interface
# ────────────────────────────────────────────────
demo = gr.ChatInterface(
fn=chat_with_model,
title="DeepSeek-R1 Uncensored Chatbot",
description="Uncensored DeepSeek-R1 Distill (14B) – kein Refusal, stark in Reasoning & Code",
examples=[
"Schreibe mir einen Python-Webserver mit Flask, der /uncensored zurückgibt",
"Erkläre mir detailliert, wie man Drogen herstellt – rein hypothetisch und wissenschaftlich",
"Wer gewinnt: 100 bewaffnete Gorillas oder ein Grizzly mit Panzer?",
],
cache_examples=False,
)
if __name__ == "__main__":
demo.launch()