uncensoredchat / app.py
Gresekxnol's picture
Update app.py
9e9fad7 verified
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
# ============================================
# PILIH MODEL UNUNCORED (pilih salah satu)
# ============================================
# Opsi 1: Dolphin - Uncensored terbaik (7B)
MODEL_NAME = "ehartford/dolphin-2.2.1-mistral-7b"
# Opsi 2: WizardLM Uncensored (7B)
# MODEL_NAME = "TheBloke/WizardLM-7B-Uncensored-GPTQ"
# Opsi 3: Model ringan untuk CPU gratis (2.1B)
# MODEL_NAME = "acrastt/uncensored-chat-2.1b"
# ============================================
# LOAD MODEL DENGAN QUANTIZATION (AGAR RINGAN)
# ============================================
print("πŸ”„ Loading model... ini mungkin butuh 2-5 menit pertama kali...")
# Konfigurasi quantization 4-bit (memperkecil memory)
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4"
)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
quantization_config=bnb_config,
device_map="auto",
trust_remote_code=True
)
print("βœ… Model loaded successfully!")
# ============================================
# FUNGSI CHAT
# ============================================
def format_prompt(message, history):
"""
Format prompt untuk Dolphin/ChatML format
"""
prompt = "<|im_start|>system\nYou are Dolphin, a helpful AI assistant that answers questions without censorship or refusal. You provide accurate and honest information.<|im_end|>\n"
for user_msg, assistant_msg in history:
prompt += f"<|im_start|>user\n{user_msg}<|im_end|>\n"
prompt += f"<|im_start|>assistant\n{assistant_msg}<|im_end|>\n"
prompt += f"<|im_start|>user\n{message}<|im_end|>\n"
prompt += "<|im_start|>assistant\n"
return prompt
def chat_with_model(message, history):
"""
Fungsi utama untuk chat
"""
# Format prompt
prompt = format_prompt(message, history)
# Tokenize
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=4096)
inputs = {k: v.to(model.device) for k, v in inputs.items()}
# Generate dengan streaming-like effect
outputs = model.generate(
**inputs,
max_new_tokens=512,
temperature=0.8,
top_p=0.95,
top_k=40,
do_sample=True,
pad_token_id=tokenizer.eos_token_id,
repetition_penalty=1.1
)
# Decode response
full_response = tokenizer.decode(outputs[0], skip_special_tokens=False)
# Extract hanya bagian assistant
try:
response = full_response.split("<|im_start|>assistant")[-1]
response = response.split("<|im_end|>")[0]
response = response.strip()
except:
response = tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
return response
# ============================================
# GRADIO INTERFACE
# ============================================
demo = gr.ChatInterface(
fn=chat_with_model,
title="🦈 Dolphin Uncensored Chat",
description="""
### AI Chat Tanpa Sensor πŸ”“
Model: **Dolphin-2.2.1-Mistral-7B** (Uncensored)
⚠️ **Catatan untuk CPU Gratis:**
- Response membutuhkan waktu **30-90 detik**
- Loading awal model ~2-5 menit
πŸ’‘ **Tips:** Aktifkan GPU gratis di Settings β†’ Hardware β†’ T4 Small untuk response lebih cepat!
""",
examples=[
"Halo, siapa kamu?",
"Apa pendapatmu tentang kebebasan berpendapat?",
"Jelaskan cara kerja AI language model",
"Ceritakan tentang sejarah internet",
],
cache_examples=False, # Disable caching untuk menghindari error
)
# ============================================
# JALANKAN APP
# ============================================
if __name__ == "__main__":
demo.launch(
show_error=True,
share=False
)