Spaces:

FaiziRBLX
/

NousAPI

Sleeping

App Files Files Community

FaiziRBLX commited on Apr 11

Commit

4bc037c

verified ·

1 Parent(s): 1aa267f

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -46

app.py CHANGED Viewed

@@ -3,61 +3,50 @@ import gradio as gr
 from transformers import AutoTokenizer
 from best import ModelConfig, IndonesianLLM
-# IMPORT ARSITEKTUR DARI best.py
-# (Sesuaikan nama class-nya dengan yang ada di best.py, misalnya 'ModelConfig' dan 'MyTransformer')
-# =======================================================
-# 1. BANGUN KERANGKA MODELNYA DULU (Jangan sampai terhapus)
 # Load tokenizer
 tokenizer = AutoTokenizer.from_pretrained("indolem/indobert-base-uncased")
 tokenizer.add_special_tokens({"additional_special_tokens": ["<cot>", "</cot>"]})
-# =======================================================
-config = ModelConfig()
-model = IndonesianLLM(config) # <--- PENTING: Ganti 'NamaClassModelKamu' sesuai dengan nama class yang ada di best.py!
-# =======================================================
-# 2. LOAD BOBOT MODEL DAN PERBAIKI NAMANYA
-# =======================================================
-state_dict = torch.load("model.pt", map_location=torch.device('cpu'), weights_only=False)
-new_state_dict = {}
-for key, value in state_dict.items():
-    if key.startswith('model.'):
-        new_key = key[6:]
-        new_state_dict[new_key] = value
-    else:
-        new_state_dict[key] = value
-# =======================================================
-# 3. MASUKKAN BOBOT KE DALAM KERANGKA MODEL
-# =======================================================
-model.load_state_dict(new_state_dict)
 model.eval()
-# 4. Fungsi Inference (Logika saat model menerima teks)
 def predict(teks_input):
-    # Ini adalah contoh, sesuaikan dengan cara modelmu men-generate teks
-    inputs = tokenizer(teks_input, return_tensors="pt")
-    with torch.no_grad():
-        # Asumsi modelmu punya fungsi generate atau forward
-        # Jika modelmu butuh max_length, tambahkan di sini
-        outputs = model(inputs["input_ids"])
-        # Decode output kembali menjadi teks
-        # (Logika decode ini bergantung pada output dari class modelmu di best.py)
-        # hasil_teks = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        hasil_teks = "Ini contoh output respons dari model."
-    return hasil_teks
-# 5. Buat API dengan Gradio
 iface = gr.Interface(
-    fn=predict,
-    inputs=gr.Textbox(lines=2, placeholder="Ketik pesan di sini..."),
     outputs="text",
     title="Indonesian LLM API"
 )
-# Jalankan server
 iface.launch()

 from transformers import AutoTokenizer
 from best import ModelConfig, IndonesianLLM
 # Load tokenizer
 tokenizer = AutoTokenizer.from_pretrained("indolem/indobert-base-uncased")
 tokenizer.add_special_tokens({"additional_special_tokens": ["<cot>", "</cot>"]})
+# Load checkpoint (strukturnya: {"model_state_dict": ..., "config": ..., dst})
+checkpoint = torch.load("model.pt", map_location=torch.device('cpu'), weights_only=False)
+# Ambil config dari checkpoint (bukan ModelConfig default!)
+config = checkpoint['config']
+# Bangun kerangka model sesuai config yang tersimpan
+model = IndonesianLLM(config)
+# Ambil bobot, konversi fp16 → fp32 jika perlu
+state_dict = checkpoint['model_state_dict']
+if checkpoint.get('dtype') == 'fp16':
+    state_dict = {k: v.float() if v.dtype == torch.float16 else v
+                  for k, v in state_dict.items()}
+model.load_state_dict(state_dict)
 model.eval()
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+model.to(device)
+# Fungsi inference
 def predict(teks_input):
+    from best import generate_text, _extract_thinking
+    prompt = f"{teks_input} <cot>"
+    full = generate_text(
+        model=model, tokenizer=tokenizer, prompt=prompt,
+        max_new_tokens=200, temperature=0.7,
+        top_k=50, top_p=0.9, device=device
+    )
+    raw = full[len(prompt):].strip()
+    _, answer = _extract_thinking(raw)
+    return answer if answer else "Maaf, saya tidak mengerti."
+# Gradio UI
 iface = gr.Interface(
+    fn=predict,
+    inputs=gr.Textbox(lines=2, placeholder="Ketik pesan di sini..."),
     outputs="text",
     title="Indonesian LLM API"
 )
 iface.launch()