Spaces:
Sleeping
Sleeping
| import gradio as ui | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer | |
| from threading import Thread | |
| # Model PALING KECIL BANGET & PALING KENCENG di CPU Space | |
| model_id = "HuggingFaceTB/SmolLM2-135M-Instruct" | |
| print("Memuat Model Paling Kecil di Dunia (SmolLM2-135M)...") | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_id, | |
| dtype=torch.float32, | |
| device_map="cpu" | |
| ) | |
| print("Model Terkecil Siap Beraksi Tanpa Delay!") | |
| def chat_smol(message, history): | |
| conversation = [] | |
| # Masukkan riwayat chat | |
| for user_msg, ai_msg in history: | |
| conversation.append({"role": "user", "content": user_msg}) | |
| conversation.append({"role": "assistant", "content": ai_msg}) | |
| conversation.append({"role": "user", "content": message}) | |
| input_ids = tokenizer.apply_chat_template( | |
| conversation, | |
| tokenize=True, | |
| add_generation_prompt=True, | |
| return_tensors="pt" | |
| ).to("cpu") | |
| streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) | |
| generation_kwargs = dict( | |
| input_ids=input_ids, | |
| streamer=streamer, | |
| max_new_tokens=250, # Dibatasi biar makin instan jawabannya | |
| temperature=0.6, | |
| top_p=0.9 | |
| ) | |
| thread = Thread(target=model.generate, kwargs=generation_kwargs) | |
| thread.start() | |
| partial_text = "" | |
| for new_text in streamer: | |
| partial_text += new_text | |
| yield partial_text | |
| # Tampilan UI Chatbot Gradio | |
| demo = ui.ChatInterface( | |
| fn=chat_smol, | |
| title="⚡ Ultra Micro Chatbot (SmolLM2)", | |
| description="Menggunakan model 135M Parameter. Ini adalah spek paling ringan, dijamin langsung merespon secepat kilat tanpa loading lama!" | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |