Mlaana commited on
Commit
09c5fbc
·
1 Parent(s): aa5931c
Files changed (1) hide show
  1. app.py +28 -21
app.py CHANGED
@@ -21,24 +21,19 @@ print("🔧 Loading model & tokenizer...")
21
  tokenizer = AutoTokenizer.from_pretrained("model")
22
  model = AutoModelForCausalLM.from_pretrained("model", torch_dtype=torch.float16)
23
 
24
- # Gunakan CUDA kalau tersedia
25
  device = "cuda" if torch.cuda.is_available() else "cpu"
26
  model.to(device)
27
 
28
- # Optional: streaming token
29
  streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
30
 
31
  # ==== STEP 3: Define response logic ====
32
  def respond(message, history, max_tokens, temperature, top_p):
33
- input_ids = tokenizer.encode(message, return_tensors="pt").to(device)
34
  history_text = ""
35
-
36
  if history:
37
  for user, bot in history:
38
  history_text += f"<|user|>{user}<|assistant|>{bot}"
39
-
40
  full_input = history_text + f"<|user|>{message}<|assistant|>"
41
-
42
  inputs = tokenizer(full_input, return_tensors="pt").to(device)
43
  output = model.generate(
44
  **inputs,
@@ -46,25 +41,37 @@ def respond(message, history, max_tokens, temperature, top_p):
46
  do_sample=True,
47
  temperature=temperature,
48
  top_p=top_p,
49
- pad_token_id=tokenizer.eos_token_id
50
  )
51
-
52
  output_text = tokenizer.decode(output[0], skip_special_tokens=True)
53
- # Ambil jawaban terakhir saja
54
  answer = output_text.split("<|assistant|>")[-1].strip()
55
  return answer
56
 
57
- # ==== STEP 4: Gradio UI ====
58
- chat = gr.ChatInterface(
59
- fn=respond,
60
- additional_inputs=[
61
- gr.Slider(64, 1024, value=256, label="Max Tokens"),
62
- gr.Slider(0.1, 1.5, value=0.7, step=0.1, label="Temperature"),
63
- gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p"),
64
- ],
65
- title="🦙 TinyLLaMA Chatbot",
66
- description="Fine-tuned TinyLLaMA using QLoRA.",
67
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
  if __name__ == "__main__":
70
- chat.launch()
 
21
  tokenizer = AutoTokenizer.from_pretrained("model")
22
  model = AutoModelForCausalLM.from_pretrained("model", torch_dtype=torch.float16)
23
 
 
24
  device = "cuda" if torch.cuda.is_available() else "cpu"
25
  model.to(device)
26
 
 
27
  streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
28
 
29
  # ==== STEP 3: Define response logic ====
30
  def respond(message, history, max_tokens, temperature, top_p):
 
31
  history_text = ""
 
32
  if history:
33
  for user, bot in history:
34
  history_text += f"<|user|>{user}<|assistant|>{bot}"
 
35
  full_input = history_text + f"<|user|>{message}<|assistant|>"
36
+
37
  inputs = tokenizer(full_input, return_tensors="pt").to(device)
38
  output = model.generate(
39
  **inputs,
 
41
  do_sample=True,
42
  temperature=temperature,
43
  top_p=top_p,
44
+ pad_token_id=tokenizer.eos_token_id,
45
  )
 
46
  output_text = tokenizer.decode(output[0], skip_special_tokens=True)
 
47
  answer = output_text.split("<|assistant|>")[-1].strip()
48
  return answer
49
 
50
+ # ==== STEP 4: Gradio UI (fix supaya gak ada warning) ====
51
+
52
+ def gradio_respond(message, history, max_tokens, temperature, top_p):
53
+ # history di Gradio tipe baru: list of dict {"user":..., "assistant":...}
54
+ # tapi fungsi respond kamu pake tuple, jadi convert dulu
55
+ history_tuples = [(h["user"], h["assistant"]) for h in history] if history else []
56
+ bot_response = respond(message, history_tuples, max_tokens, temperature, top_p)
57
+ # update history dengan format baru
58
+ history = history + [{"user": message, "assistant": bot_response}]
59
+ return "", history
60
+
61
+ with gr.Blocks() as demo:
62
+ chatbot = gr.Chatbot(label="🦙 TinyLLaMA Chatbot", type="messages", value=[])
63
+ max_tokens = gr.Slider(64, 1024, value=256, label="Max Tokens")
64
+ temperature = gr.Slider(0.1, 1.5, value=0.7, step=0.1, label="Temperature")
65
+ top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p")
66
+ txt = gr.Textbox(placeholder="Ketik pesanmu...", show_label=False)
67
+
68
+ txt.submit(
69
+ gradio_respond,
70
+ inputs=[txt, chatbot, max_tokens, temperature, top_p],
71
+ outputs=[txt, chatbot],
72
+ )
73
+
74
+ gr.Markdown("Fine-tuned TinyLLaMA menggunakan QLoRA.")
75
 
76
  if __name__ == "__main__":
77
+ demo.launch()