Mlaana commited on
Commit
2431f8a
·
1 Parent(s): 6b8b9bf
Files changed (1) hide show
  1. app.py +21 -29
app.py CHANGED
@@ -21,19 +21,23 @@ print("🔧 Loading model & tokenizer...")
21
  tokenizer = AutoTokenizer.from_pretrained("model")
22
  model = AutoModelForCausalLM.from_pretrained("model", torch_dtype=torch.float16)
23
 
 
24
  device = "cuda" if torch.cuda.is_available() else "cpu"
25
  model.to(device)
26
-
27
  streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
28
 
29
  # ==== STEP 3: Define response logic ====
30
  def respond(message, history, max_tokens, temperature, top_p):
 
31
  history_text = ""
 
32
  if history:
33
  for user, bot in history:
34
  history_text += f"<|user|>{user}<|assistant|>{bot}"
 
35
  full_input = history_text + f"<|user|>{message}<|assistant|>"
36
-
37
  inputs = tokenizer(full_input, return_tensors="pt").to(device)
38
  output = model.generate(
39
  **inputs,
@@ -41,37 +45,25 @@ def respond(message, history, max_tokens, temperature, top_p):
41
  do_sample=True,
42
  temperature=temperature,
43
  top_p=top_p,
44
- pad_token_id=tokenizer.eos_token_id,
45
  )
 
46
  output_text = tokenizer.decode(output[0], skip_special_tokens=True)
 
47
  answer = output_text.split("<|assistant|>")[-1].strip()
48
  return answer
49
 
50
- # ==== STEP 4: Gradio UI (fix supaya gak ada warning) ====
51
-
52
- def gradio_respond(message, history, max_tokens, temperature, top_p):
53
- # history di Gradio tipe baru: list of dict {"user":..., "assistant":...}
54
- # tapi fungsi respond kamu pake tuple, jadi convert dulu
55
- history_tuples = [(h["user"], h["assistant"]) for h in history] if history else []
56
- bot_response = respond(message, history_tuples, max_tokens, temperature, top_p)
57
- # update history dengan format baru
58
- history = history + [{"user": message, "assistant": bot_response}]
59
- return "", history
60
-
61
- with gr.Blocks() as demo:
62
- chatbot = gr.Chatbot(label="🦙 TinyLLaMA Chatbot", type="messages", value=[])
63
- max_tokens = gr.Slider(64, 1024, value=256, label="Max Tokens")
64
- temperature = gr.Slider(0.1, 1.5, value=0.7, step=0.1, label="Temperature")
65
- top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p")
66
- txt = gr.Textbox(placeholder="Ketik pesanmu...", show_label=False)
67
-
68
- txt.submit(
69
- gradio_respond,
70
- inputs=[txt, chatbot, max_tokens, temperature, top_p],
71
- outputs=[txt, chatbot],
72
- )
73
-
74
- gr.Markdown("Fine-tuned TinyLLaMA menggunakan QLoRA.")
75
 
76
  if __name__ == "__main__":
77
- demo.launch(debug=True)
 
21
  tokenizer = AutoTokenizer.from_pretrained("model")
22
  model = AutoModelForCausalLM.from_pretrained("model", torch_dtype=torch.float16)
23
 
24
+ # Gunakan CUDA kalau tersedia
25
  device = "cuda" if torch.cuda.is_available() else "cpu"
26
  model.to(device)
27
+ # Optional: streaming token
28
  streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
29
 
30
  # ==== STEP 3: Define response logic ====
31
  def respond(message, history, max_tokens, temperature, top_p):
32
+ input_ids = tokenizer.encode(message, return_tensors="pt").to(device)
33
  history_text = ""
34
+
35
  if history:
36
  for user, bot in history:
37
  history_text += f"<|user|>{user}<|assistant|>{bot}"
38
+
39
  full_input = history_text + f"<|user|>{message}<|assistant|>"
40
+
41
  inputs = tokenizer(full_input, return_tensors="pt").to(device)
42
  output = model.generate(
43
  **inputs,
 
45
  do_sample=True,
46
  temperature=temperature,
47
  top_p=top_p,
48
+ pad_token_id=tokenizer.eos_token_id
49
  )
50
+
51
  output_text = tokenizer.decode(output[0], skip_special_tokens=True)
52
+ # Ambil jawaban terakhir saja
53
  answer = output_text.split("<|assistant|>")[-1].strip()
54
  return answer
55
 
56
+ # ==== STEP 4: Gradio UI ====
57
+ chat = gr.ChatInterface(
58
+ fn=respond,
59
+ additional_inputs=[
60
+ gr.Slider(64, 1024, value=256, label="Max Tokens"),
61
+ gr.Slider(0.1, 1.5, value=0.7, step=0.1, label="Temperature"),
62
+ gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p"),
63
+ ],
64
+ title="🦙 TinyLLaMA Chatbot",
65
+ description="Fine-tuned TinyLLaMA using QLoRA.",
66
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
  if __name__ == "__main__":
69
+ chat.launch()