Spaces:

vortexa64
/

chatbot

Paused

App Files Files Community

vortexa64 commited on Sep 7, 2025

Commit

800491f

verified ·

1 Parent(s): 34d2621

Create app.py

Browse files

Files changed (1) hide show

app.py +100 -0

app.py ADDED Viewed

	@@ -0,0 +1,100 @@

+import numpy as np
+import json
+import gradio as gr
+# === LOAD CORPUS & DATASET ===
+with open("corpus.txt", "r", encoding="utf-8") as f:
+    corpus = f.read().splitlines()
+with open("dataset.json", "r", encoding="utf-8") as f:
+    qa_data = json.load(f)
+# === BUILD VOCAB ===
+all_texts = corpus + list(qa_data.keys()) + list(qa_data.values())
+vocab = list(set(" ".join(all_texts).split()))
+word2id = {w: i for i, w in enumerate(vocab)}
+id2word = {i: w for w, i in word2id.items()}
+vocab_size = len(vocab)
+def one_hot(word):
+    vec = np.zeros(vocab_size)
+    if word in word2id:
+        vec[word2id[word]] = 1
+    return vec
+# === PARAMETER MODEL ===
+np.random.seed(42)
+hidden_size = 512   # bisa diubah
+W1 = np.random.randn(vocab_size, hidden_size) * 0.01
+W2 = np.random.randn(hidden_size, vocab_size) * 0.01
+lr = 0.05
+def softmax(x):
+    e = np.exp(x - np.max(x))
+    return e / e.sum()
+def train_step(sentence):
+    global W1, W2
+    words = sentence.split()
+    loss = 0
+    for i in range(len(words)-1):
+        x = one_hot(words[i])
+        y_true = one_hot(words[i+1])
+        h = np.dot(x, W1)
+        o = np.dot(h, W2)
+        y_pred = softmax(o)
+        loss += np.mean((y_true - y_pred)**2)
+        grad_o = y_pred - y_true
+        dW2 = np.outer(h, grad_o)
+        dW1 = np.outer(x, np.dot(W2, grad_o))
+        W1 -= lr * dW1
+        W2 -= lr * dW2
+    return loss
+# === PRETRAIN DENGAN CORPUS ===
+for epoch in range(200):
+    total_loss = 0
+    for line in corpus:
+        total_loss += train_step(line + " <END>")
+    if epoch % 50 == 0:
+        print(f"Pretrain Epoch {epoch}, Loss: {total_loss:.4f}")
+# === FINE-TUNE DENGAN Q&A ===
+for epoch in range(200):
+    total_loss = 0
+    for q, a in qa_data.items():
+        total_loss += train_step(q + " " + a)
+    if epoch % 50 == 0:
+        print(f"Finetune Epoch {epoch}, Loss: {total_loss:.4f}")
+# === GENERATE JAWABAN ===
+def generate_reply(question, max_len=30):
+    words = question.split()
+    if words[0] not in word2id:
+        return "Maaf, aku belum ngerti kata itu 🥺"
+    x = one_hot(words[0])
+    reply = []
+    for _ in range(max_len):
+        h = np.dot(x, W1)
+        o = np.dot(h, W2)
+        y_pred = softmax(o)
+        pred_id = np.argmax(y_pred)  # argmax biar konsisten
+        pred_word = id2word[pred_id]
+        if pred_word == "<END>":
+            break
+        reply.append(pred_word)
+        x = one_hot(pred_word)
+    return " ".join(reply)
+# === GRADIO INTERFACE ===
+def chatbot(input_text):
+    return generate_reply(input_text)
+demo = gr.Interface(fn=chatbot,
+                    inputs="text",
+                    outputs="text",
+                    title="Chatbot Numpy ala Cici 🤭",
+                    description="Mini chatbot dengan training 2 tahap: corpus + Q&A")
+if __name__ == "__main__":
+    demo.launch()