Spaces:

huang342
/

LLM_Solace

Runtime error

App Files Files Community

huang342 commited on Dec 14, 2024

Commit

7729193

verified ·

1 Parent(s): 9630e2d

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -189

app.py CHANGED Viewed

@@ -1,201 +1,59 @@
-# -*- coding: utf-8 -*-
-"""app
-Automatically generated by Colab.
-Original file is located at
-    https://colab.research.google.com/drive/1OBF9xRogFp1BlMVFwZX6R-0jD8yU_tEk
-"""
-import torch
-import torch.nn as nn
-from torch.nn import functional as F
-import requests
-# hyperparameters
-batch_size = 16
-block_size = 32
-n_embd = 64
-n_head = 4
-n_layer = 4
-dropout = 0.0
-device = 'cuda' if torch.cuda.is_available() else 'cpu'
-learning_rate = 1e-3
-max_iters = 5000  # Number of training iterations
-# File path for saving the Book of Mormon text
-file_path = "Book of Mormon.txt"
-# Download and save the file
-url = "https://raw.githubusercontent.com/huang-0505/LLM/refs/heads/main/Book%20of%20Mormon.txt"
-response = requests.get(url)
-with open("Book of Mormon.txt", "w", encoding="utf-8") as f:
-    f.write(response.text)
-# Read the content of the file into the `text` variable
-with open("Book of Mormon.txt", "r", encoding="utf-8") as f:
-    text = f.read()
-# Tokenizer setup
-chars = sorted(list(set(text)))
-stoi = {ch: i for i, ch in enumerate(chars)}
-itos = {i: ch for i, ch in enumerate(chars)}
-encode = lambda s: [stoi[c] for c in s]
-decode = lambda l: ''.join([itos[i] for i in l])
-# Encode the dataset
-data = torch.tensor(encode(text), dtype=torch.long)
-# Split into training and validation sets
-n = int(0.9 * len(data))  # 90% training, 10% validation
-train_data = data[:n]
-val_data = data[n:]
-# Function to get batches of data
-def get_batch(split):
-    data = train_data if split == "train" else val_data
-    ix = torch.randint(len(data) - block_size, (batch_size,))
-    x = torch.stack([data[i:i + block_size] for i in ix])
-    y = torch.stack([data[i + 1:i + block_size + 1] for i in ix])
-    return x.to(device), y.to(device)
-# Model definition
-class BigramLanguageModel(nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.token_embedding_table = nn.Embedding(len(chars), n_embd)
-        self.position_embedding_table = nn.Embedding(block_size, n_embd)
-        self.blocks = nn.Sequential(*[Block(n_embd, n_head=n_head) for _ in range(n_layer)])
-        self.ln_f = nn.LayerNorm(n_embd)
-        self.lm_head = nn.Linear(n_embd, len(chars))
-    def forward(self, idx, targets=None):
-        tok_emb = self.token_embedding_table(idx)
-        pos_emb = self.position_embedding_table(torch.arange(idx.shape[1], device=device))
-        x = tok_emb + pos_emb
-        x = self.blocks(x)
-        x = self.ln_f(x)
-        logits = self.lm_head(x)
-        loss = F.cross_entropy(logits.view(-1, logits.size(-1)), targets.view(-1)) if targets is not None else None
-        return logits, loss
-    def generate(self, idx, max_new_tokens):
-        for _ in range(max_new_tokens):
-            idx_cond = idx[:, -block_size:]
-            logits, _ = self(idx_cond)
-            logits = logits[:, -1, :]
-            probs = F.softmax(logits, dim=-1)
-            idx_next = torch.multinomial(probs, num_samples=1)
-            idx = torch.cat((idx, idx_next), dim=1)
-        return idx
-class Block(nn.Module):
-    def __init__(self, n_embd, n_head):
-        super().__init__()
-        head_size = n_embd // n_head
-        self.sa = MultiHeadAttention(n_head, head_size)
-        self.ffwd = FeedForward(n_embd)
-        self.ln1 = nn.LayerNorm(n_embd)
-        self.ln2 = nn.LayerNorm(n_embd)
-    def forward(self, x):
-        x = x + self.sa(self.ln1(x))
-        x = x + self.ffwd(self.ln2(x))
-        return x
-class MultiHeadAttention(nn.Module):
-    def __init__(self, num_heads, head_size):
-        super().__init__()
-        self.heads = nn.ModuleList([Head(head_size) for _ in range(num_heads)])
-        self.proj = nn.Linear(n_embd, n_embd)
-        self.dropout = nn.Dropout(dropout)
-    def forward(self, x):
-        out = torch.cat([h(x) for h in self.heads], dim=-1)
-        out = self.dropout(self.proj(out))
-        return out
-class Head(nn.Module):
-    def __init__(self, head_size):
-        super().__init__()
-        self.key = nn.Linear(n_embd, head_size, bias=False)
-        self.query = nn.Linear(n_embd, head_size, bias=False)
-        self.value = nn.Linear(n_embd, head_size, bias=False)
-        self.register_buffer('tril', torch.tril(torch.ones(block_size, block_size)))
-    def forward(self, x):
-        k, q, v = self.key(x), self.query(x), self.value(x)
-        wei = q @ k.transpose(-2, -1) * (k.size(-1) ** -0.5)
-        wei = wei.masked_fill(self.tril[:x.size(1), :x.size(1)] == 0, float('-inf'))
-        wei = F.softmax(wei, dim=-1)
-        return wei @ v
-class FeedForward(nn.Module):
-    def __init__(self, n_embd):
-        super().__init__()
-        self.net = nn.Sequential(
-            nn.Linear(n_embd, 4 * n_embd),
-            nn.ReLU(),
-            nn.Linear(4 * n_embd, n_embd),
-            nn.Dropout(dropout)
-        )
-    def forward(self, x):
-        return self.net(x)
-# Initialize model and optimizer
-model = BigramLanguageModel().to(device)
-optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
-# Training loop
-for iter in range(max_iters):
-    xb, yb = get_batch("train")
-    logits, loss = model(xb, yb)
-    optimizer.zero_grad()
-    loss.backward()
-    optimizer.step()
-    if iter % 100 == 0:
-        print(f"Step {iter}: Loss = {loss.item()}")
-# Save the model
-torch.save(model.state_dict(), "model.pth")
-print("Model trained and saved as 'model.pth'")
 import gradio as gr
-def ask_question(question, max_new_tokens=100):
-    # Format the input context
-    context_text = f"Q: {question}\nA:"
-    context_tokens = torch.tensor([encode(context_text)], dtype=torch.long, device=device)
-    # Generate the response
-    generated_tokens = model.generate(context_tokens, max_new_tokens=max_new_tokens)
-    # Decode the generated tokens into text
-    generated_text = decode(generated_tokens[0].tolist())
-    # Extract the answer (after "A:")
-    answer = generated_text.split("A:")[1].strip()
-    return answer
-# Function to process the question
 def chatbot_response(question):
-  try:
-     answer = ask_question(question)
-     return f"Q: {question}\nA: {answer}"
-  except Exception as e:
-     return f"Error: {e}"
-# Create a Gradio interface
 demo = gr.Interface(
     fn=chatbot_response,
     inputs="text",
     outputs="text",
     title="Religious Chatbot",
-    description="Ask questions about the book of Mormon, and the chatbot will generate answers based on its knowledge."
 )
 # Launch the app
-demo.launch(share=True)

 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from sentence_transformers import SentenceTransformer, util
+import pandas as pd
+import torch
+# Load the knowledge base
+knowledge_base_df = pd.read_csv('knowledge_base.csv')
+knowledge_base = knowledge_base_df['content'].tolist()
+# Initialize models
+embedder = SentenceTransformer('all-MiniLM-L6-v2')
+knowledge_embeddings = embedder.encode(knowledge_base, convert_to_tensor=True)
+tokenizer = AutoTokenizer.from_pretrained('gpt2')
+model = AutoModelForCausalLM.from_pretrained('gpt2')
+# Retrieval function
+def retrieve_documents(query, top_k=3):
+    query_embedding = embedder.encode(query, convert_to_tensor=True)
+    scores = util.cos_sim(query_embedding, knowledge_embeddings)[0]
+    top_results = torch.topk(scores, k=top_k)
+    return [knowledge_base[i] for i in top_results.indices]
+# Generate a response
+def ask_question(question, top_k=3, max_new_tokens=50, device='cpu'):
+    retrieved_docs = retrieve_documents(question, top_k=top_k)
+    context = "<retrieval> " + " ".join(retrieved_docs) + " <query> " + question
+    context_text = f"<bos> Q: {context}\nA: "
+    input_ids = tokenizer.encode(context_text, return_tensors="pt")
+    generated_outputs = model.generate(
+        input_ids=input_ids,
+        max_length=input_ids.shape[1] + max_new_tokens,
+        do_sample=True,
+        temperature=0.7,
+        eos_token_id=tokenizer.eos_token_id
+    )
+    generated_text = tokenizer.decode(generated_outputs[0], skip_special_tokens=True)
+    return generated_text.split("A:")[1].strip() if "A:" in generated_text else generated_text.strip()
+# Gradio function
 def chatbot_response(question):
+    try:
+        answer = ask_question(question)
+        return f"Q: {question}\nA: {answer}"
+    except Exception as e:
+        return f"Error: {e}"
+# Gradio app
 demo = gr.Interface(
     fn=chatbot_response,
     inputs="text",
     outputs="text",
     title="Religious Chatbot",
+    description="Ask questions about religious texts, and the chatbot will generate answers based on its knowledge."
 )
 # Launch the app
+demo.launch()