Spaces:
Running
Running
| import gradio as gr | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig | |
| import torch | |
| # ============================================ | |
| # PILIH MODEL UNUNCORED (pilih salah satu) | |
| # ============================================ | |
| # Opsi 1: Dolphin - Uncensored terbaik (7B) | |
| MODEL_NAME = "ehartford/dolphin-2.2.1-mistral-7b" | |
| # Opsi 2: WizardLM Uncensored (7B) | |
| # MODEL_NAME = "TheBloke/WizardLM-7B-Uncensored-GPTQ" | |
| # Opsi 3: Model ringan untuk CPU gratis (2.1B) | |
| # MODEL_NAME = "acrastt/uncensored-chat-2.1b" | |
| # ============================================ | |
| # LOAD MODEL DENGAN QUANTIZATION (AGAR RINGAN) | |
| # ============================================ | |
| print("π Loading model... ini mungkin butuh 2-5 menit pertama kali...") | |
| # Konfigurasi quantization 4-bit (memperkecil memory) | |
| bnb_config = BitsAndBytesConfig( | |
| load_in_4bit=True, | |
| bnb_4bit_compute_dtype=torch.float16, | |
| bnb_4bit_use_double_quant=True, | |
| bnb_4bit_quant_type="nf4" | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_NAME, | |
| quantization_config=bnb_config, | |
| device_map="auto", | |
| trust_remote_code=True | |
| ) | |
| print("β Model loaded successfully!") | |
| # ============================================ | |
| # FUNGSI CHAT | |
| # ============================================ | |
| def format_prompt(message, history): | |
| """ | |
| Format prompt untuk Dolphin/ChatML format | |
| """ | |
| prompt = "<|im_start|>system\nYou are Dolphin, a helpful AI assistant that answers questions without censorship or refusal. You provide accurate and honest information.<|im_end|>\n" | |
| for user_msg, assistant_msg in history: | |
| prompt += f"<|im_start|>user\n{user_msg}<|im_end|>\n" | |
| prompt += f"<|im_start|>assistant\n{assistant_msg}<|im_end|>\n" | |
| prompt += f"<|im_start|>user\n{message}<|im_end|>\n" | |
| prompt += "<|im_start|>assistant\n" | |
| return prompt | |
| def chat_with_model(message, history): | |
| """ | |
| Fungsi utama untuk chat | |
| """ | |
| # Format prompt | |
| prompt = format_prompt(message, history) | |
| # Tokenize | |
| inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=4096) | |
| inputs = {k: v.to(model.device) for k, v in inputs.items()} | |
| # Generate dengan streaming-like effect | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=512, | |
| temperature=0.8, | |
| top_p=0.95, | |
| top_k=40, | |
| do_sample=True, | |
| pad_token_id=tokenizer.eos_token_id, | |
| repetition_penalty=1.1 | |
| ) | |
| # Decode response | |
| full_response = tokenizer.decode(outputs[0], skip_special_tokens=False) | |
| # Extract hanya bagian assistant | |
| try: | |
| response = full_response.split("<|im_start|>assistant")[-1] | |
| response = response.split("<|im_end|>")[0] | |
| response = response.strip() | |
| except: | |
| response = tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True) | |
| return response | |
| # ============================================ | |
| # GRADIO INTERFACE | |
| # ============================================ | |
| demo = gr.ChatInterface( | |
| fn=chat_with_model, | |
| title="π¦ Dolphin Uncensored Chat", | |
| description=""" | |
| ### AI Chat Tanpa Sensor π | |
| Model: **Dolphin-2.2.1-Mistral-7B** (Uncensored) | |
| β οΈ **Catatan untuk CPU Gratis:** | |
| - Response membutuhkan waktu **30-90 detik** | |
| - Loading awal model ~2-5 menit | |
| π‘ **Tips:** Aktifkan GPU gratis di Settings β Hardware β T4 Small untuk response lebih cepat! | |
| """, | |
| examples=[ | |
| "Halo, siapa kamu?", | |
| "Apa pendapatmu tentang kebebasan berpendapat?", | |
| "Jelaskan cara kerja AI language model", | |
| "Ceritakan tentang sejarah internet", | |
| ], | |
| cache_examples=False, # Disable caching untuk menghindari error | |
| ) | |
| # ============================================ | |
| # JALANKAN APP | |
| # ============================================ | |
| if __name__ == "__main__": | |
| demo.launch( | |
| show_error=True, | |
| share=False | |
| ) |