Spaces:

Mlaana
/

Chatbot

Sleeping

App Files Files Community

Mlaana commited on Jun 9, 2025

Commit

aa5931c

1 Parent(s): fab03e7

Upload Gdown

Browse files

Files changed (7) hide show

app.py +70 -2
chat_template.jinja → model/chat_template.jinja +0 -0
config.json → model/config.json +0 -0
special_tokens_map.json → model/special_tokens_map.json +0 -0
tokenizer.json → model/tokenizer.json +0 -0
tokenizer_config.json → model/tokenizer_config.json +0 -0
requirements.txt +3 -1

app.py CHANGED Viewed

@@ -1,2 +1,70 @@
-from transformers import AutoTokenizer, AutoModelForCausalLM
-import gradio as gd

+import os
+import gdown
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
+import gradio as gr
+# ==== STEP 1: Download model (kalau belum ada) ====
+os.makedirs("model", exist_ok=True)
+MODEL_URL = "https://drive.google.com/uc?id=1pcEj5kQcdq2YkjLU-KyYz8qcD4VkJhKy"  # <- Ganti file ID-nya
+MODEL_PATH = "model/model.safetensors"
+if not os.path.exists(MODEL_PATH):
+    print("⬇ Downloading model weights...")
+    gdown.download(MODEL_URL, MODEL_PATH, quiet=False)
+else:
+    print("✅ Model file already exists")
+# ==== STEP 2: Load tokenizer & model ====
+print("🔧 Loading model & tokenizer...")
+tokenizer = AutoTokenizer.from_pretrained("model")
+model = AutoModelForCausalLM.from_pretrained("model", torch_dtype=torch.float16)
+# Gunakan CUDA kalau tersedia
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)
+# Optional: streaming token
+streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
+# ==== STEP 3: Define response logic ====
+def respond(message, history, max_tokens, temperature, top_p):
+    input_ids = tokenizer.encode(message, return_tensors="pt").to(device)
+    history_text = ""
+    if history:
+        for user, bot in history:
+            history_text += f"<|user|>{user}<|assistant|>{bot}"
+    full_input = history_text + f"<|user|>{message}<|assistant|>"
+    inputs = tokenizer(full_input, return_tensors="pt").to(device)
+    output = model.generate(
+        **inputs,
+        max_new_tokens=max_tokens,
+        do_sample=True,
+        temperature=temperature,
+        top_p=top_p,
+        pad_token_id=tokenizer.eos_token_id
+    )
+    output_text = tokenizer.decode(output[0], skip_special_tokens=True)
+    # Ambil jawaban terakhir saja
+    answer = output_text.split("<|assistant|>")[-1].strip()
+    return answer
+# ==== STEP 4: Gradio UI ====
+chat = gr.ChatInterface(
+    fn=respond,
+    additional_inputs=[
+        gr.Slider(64, 1024, value=256, label="Max Tokens"),
+        gr.Slider(0.1, 1.5, value=0.7, step=0.1, label="Temperature"),
+        gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p"),
+    ],
+    title="🦙 TinyLLaMA Chatbot",
+    description="Fine-tuned TinyLLaMA using QLoRA.",
+)
+if __name__ == "__main__":
+    chat.launch()

chat_template.jinja → model/chat_template.jinja RENAMED Viewed

File without changes

config.json → model/config.json RENAMED Viewed

File without changes

special_tokens_map.json → model/special_tokens_map.json RENAMED Viewed

File without changes

tokenizer.json → model/tokenizer.json RENAMED Viewed

File without changes

tokenizer_config.json → model/tokenizer_config.json RENAMED Viewed

File without changes

requirements.txt CHANGED Viewed

@@ -1,2 +1,4 @@
 gradio
-transformers

 gradio
+transformers
+gdown
+torch