Mlaana commited on
Commit
aa5931c
Β·
1 Parent(s): fab03e7

Upload Gdown

Browse files
app.py CHANGED
@@ -1,2 +1,70 @@
1
- from transformers import AutoTokenizer, AutoModelForCausalLM
2
- import gradio as gd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gdown
3
+ import torch
4
+ from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
5
+ import gradio as gr
6
+
7
+ # ==== STEP 1: Download model (kalau belum ada) ====
8
+ os.makedirs("model", exist_ok=True)
9
+
10
+ MODEL_URL = "https://drive.google.com/uc?id=1pcEj5kQcdq2YkjLU-KyYz8qcD4VkJhKy" # <- Ganti file ID-nya
11
+ MODEL_PATH = "model/model.safetensors"
12
+
13
+ if not os.path.exists(MODEL_PATH):
14
+ print("⬇ Downloading model weights...")
15
+ gdown.download(MODEL_URL, MODEL_PATH, quiet=False)
16
+ else:
17
+ print("βœ… Model file already exists")
18
+
19
+ # ==== STEP 2: Load tokenizer & model ====
20
+ print("πŸ”§ Loading model & tokenizer...")
21
+ tokenizer = AutoTokenizer.from_pretrained("model")
22
+ model = AutoModelForCausalLM.from_pretrained("model", torch_dtype=torch.float16)
23
+
24
+ # Gunakan CUDA kalau tersedia
25
+ device = "cuda" if torch.cuda.is_available() else "cpu"
26
+ model.to(device)
27
+
28
+ # Optional: streaming token
29
+ streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
30
+
31
+ # ==== STEP 3: Define response logic ====
32
+ def respond(message, history, max_tokens, temperature, top_p):
33
+ input_ids = tokenizer.encode(message, return_tensors="pt").to(device)
34
+ history_text = ""
35
+
36
+ if history:
37
+ for user, bot in history:
38
+ history_text += f"<|user|>{user}<|assistant|>{bot}"
39
+
40
+ full_input = history_text + f"<|user|>{message}<|assistant|>"
41
+
42
+ inputs = tokenizer(full_input, return_tensors="pt").to(device)
43
+ output = model.generate(
44
+ **inputs,
45
+ max_new_tokens=max_tokens,
46
+ do_sample=True,
47
+ temperature=temperature,
48
+ top_p=top_p,
49
+ pad_token_id=tokenizer.eos_token_id
50
+ )
51
+
52
+ output_text = tokenizer.decode(output[0], skip_special_tokens=True)
53
+ # Ambil jawaban terakhir saja
54
+ answer = output_text.split("<|assistant|>")[-1].strip()
55
+ return answer
56
+
57
+ # ==== STEP 4: Gradio UI ====
58
+ chat = gr.ChatInterface(
59
+ fn=respond,
60
+ additional_inputs=[
61
+ gr.Slider(64, 1024, value=256, label="Max Tokens"),
62
+ gr.Slider(0.1, 1.5, value=0.7, step=0.1, label="Temperature"),
63
+ gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p"),
64
+ ],
65
+ title="πŸ¦™ TinyLLaMA Chatbot",
66
+ description="Fine-tuned TinyLLaMA using QLoRA.",
67
+ )
68
+
69
+ if __name__ == "__main__":
70
+ chat.launch()
chat_template.jinja β†’ model/chat_template.jinja RENAMED
File without changes
config.json β†’ model/config.json RENAMED
File without changes
special_tokens_map.json β†’ model/special_tokens_map.json RENAMED
File without changes
tokenizer.json β†’ model/tokenizer.json RENAMED
File without changes
tokenizer_config.json β†’ model/tokenizer_config.json RENAMED
File without changes
requirements.txt CHANGED
@@ -1,2 +1,4 @@
1
  gradio
2
- transformers
 
 
 
1
  gradio
2
+ transformers
3
+ gdown
4
+ torch