ai / app.py
FarelDeveloper's picture
Update app.py
53d54ab verified
Raw
History Blame Contribute Delete
1.86 kB
import gradio as ui
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
from threading import Thread
# Model PALING KECIL BANGET & PALING KENCENG di CPU Space
model_id = "HuggingFaceTB/SmolLM2-135M-Instruct"
print("Memuat Model Paling Kecil di Dunia (SmolLM2-135M)...")
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id,
dtype=torch.float32,
device_map="cpu"
)
print("Model Terkecil Siap Beraksi Tanpa Delay!")
def chat_smol(message, history):
conversation = []
# Masukkan riwayat chat
for user_msg, ai_msg in history:
conversation.append({"role": "user", "content": user_msg})
conversation.append({"role": "assistant", "content": ai_msg})
conversation.append({"role": "user", "content": message})
input_ids = tokenizer.apply_chat_template(
conversation,
tokenize=True,
add_generation_prompt=True,
return_tensors="pt"
).to("cpu")
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generation_kwargs = dict(
input_ids=input_ids,
streamer=streamer,
max_new_tokens=250, # Dibatasi biar makin instan jawabannya
temperature=0.6,
top_p=0.9
)
thread = Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()
partial_text = ""
for new_text in streamer:
partial_text += new_text
yield partial_text
# Tampilan UI Chatbot Gradio
demo = ui.ChatInterface(
fn=chat_smol,
title="⚡ Ultra Micro Chatbot (SmolLM2)",
description="Menggunakan model 135M Parameter. Ini adalah spek paling ringan, dijamin langsung merespon secepat kilat tanpa loading lama!"
)
if __name__ == "__main__":
demo.launch()