FaiziRBLX commited on
Commit
4bc037c
·
verified ·
1 Parent(s): 1aa267f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -46
app.py CHANGED
@@ -3,61 +3,50 @@ import gradio as gr
3
  from transformers import AutoTokenizer
4
  from best import ModelConfig, IndonesianLLM
5
 
6
- # IMPORT ARSITEKTUR DARI best.py
7
- # (Sesuaikan nama class-nya dengan yang ada di best.py, misalnya 'ModelConfig' dan 'MyTransformer')
8
- # =======================================================
9
- # 1. BANGUN KERANGKA MODELNYA DULU (Jangan sampai terhapus)
10
-
11
  # Load tokenizer
12
  tokenizer = AutoTokenizer.from_pretrained("indolem/indobert-base-uncased")
13
  tokenizer.add_special_tokens({"additional_special_tokens": ["<cot>", "</cot>"]})
14
- # =======================================================
15
- config = ModelConfig()
16
- model = IndonesianLLM(config) # <--- PENTING: Ganti 'NamaClassModelKamu' sesuai dengan nama class yang ada di best.py!
17
-
18
- # =======================================================
19
- # 2. LOAD BOBOT MODEL DAN PERBAIKI NAMANYA
20
- # =======================================================
21
- state_dict = torch.load("model.pt", map_location=torch.device('cpu'), weights_only=False)
22
-
23
- new_state_dict = {}
24
- for key, value in state_dict.items():
25
- if key.startswith('model.'):
26
- new_key = key[6:]
27
- new_state_dict[new_key] = value
28
- else:
29
- new_state_dict[key] = value
30
-
31
- # =======================================================
32
- # 3. MASUKKAN BOBOT KE DALAM KERANGKA MODEL
33
- # =======================================================
34
- model.load_state_dict(new_state_dict)
35
  model.eval()
36
 
37
- # 4. Fungsi Inference (Logika saat model menerima teks)
 
 
 
38
  def predict(teks_input):
39
- # Ini adalah contoh, sesuaikan dengan cara modelmu men-generate teks
40
- inputs = tokenizer(teks_input, return_tensors="pt")
41
-
42
- with torch.no_grad():
43
- # Asumsi modelmu punya fungsi generate atau forward
44
- # Jika modelmu butuh max_length, tambahkan di sini
45
- outputs = model(inputs["input_ids"])
46
-
47
- # Decode output kembali menjadi teks
48
- # (Logika decode ini bergantung pada output dari class modelmu di best.py)
49
- # hasil_teks = tokenizer.decode(outputs[0], skip_special_tokens=True)
50
- hasil_teks = "Ini contoh output respons dari model."
51
-
52
- return hasil_teks
53
-
54
- # 5. Buat API dengan Gradio
55
  iface = gr.Interface(
56
- fn=predict,
57
- inputs=gr.Textbox(lines=2, placeholder="Ketik pesan di sini..."),
58
  outputs="text",
59
  title="Indonesian LLM API"
60
  )
61
 
62
- # Jalankan server
63
  iface.launch()
 
3
  from transformers import AutoTokenizer
4
  from best import ModelConfig, IndonesianLLM
5
 
 
 
 
 
 
6
  # Load tokenizer
7
  tokenizer = AutoTokenizer.from_pretrained("indolem/indobert-base-uncased")
8
  tokenizer.add_special_tokens({"additional_special_tokens": ["<cot>", "</cot>"]})
9
+
10
+ # Load checkpoint (strukturnya: {"model_state_dict": ..., "config": ..., dst})
11
+ checkpoint = torch.load("model.pt", map_location=torch.device('cpu'), weights_only=False)
12
+
13
+ # Ambil config dari checkpoint (bukan ModelConfig default!)
14
+ config = checkpoint['config']
15
+
16
+ # Bangun kerangka model sesuai config yang tersimpan
17
+ model = IndonesianLLM(config)
18
+
19
+ # Ambil bobot, konversi fp16 → fp32 jika perlu
20
+ state_dict = checkpoint['model_state_dict']
21
+ if checkpoint.get('dtype') == 'fp16':
22
+ state_dict = {k: v.float() if v.dtype == torch.float16 else v
23
+ for k, v in state_dict.items()}
24
+
25
+ model.load_state_dict(state_dict)
 
 
 
 
26
  model.eval()
27
 
28
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
29
+ model.to(device)
30
+
31
+ # Fungsi inference
32
  def predict(teks_input):
33
+ from best import generate_text, _extract_thinking
34
+ prompt = f"{teks_input} <cot>"
35
+ full = generate_text(
36
+ model=model, tokenizer=tokenizer, prompt=prompt,
37
+ max_new_tokens=200, temperature=0.7,
38
+ top_k=50, top_p=0.9, device=device
39
+ )
40
+ raw = full[len(prompt):].strip()
41
+ _, answer = _extract_thinking(raw)
42
+ return answer if answer else "Maaf, saya tidak mengerti."
43
+
44
+ # Gradio UI
 
 
 
 
45
  iface = gr.Interface(
46
+ fn=predict,
47
+ inputs=gr.Textbox(lines=2, placeholder="Ketik pesan di sini..."),
48
  outputs="text",
49
  title="Indonesian LLM API"
50
  )
51
 
 
52
  iface.launch()