Spaces:

admin08077
/

model

Sleeping

App Files Files Community

admin08077 commited on Dec 20, 2024

Commit

7c5377d

verified ·

1 Parent(s): 56060c4

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -93

app.py CHANGED Viewed

@@ -4,109 +4,90 @@ import torch.nn as nn
 import torch.optim as optim
 import streamlit as st
 import pandas as pd
-from sklearn.preprocessing import LabelEncoder
-# === Alphabet Gate Definition ===
-class AlphabetGate(nn.Module):
-    def __init__(self):
-        super(AlphabetGate, self).__init__()
-        alphabet = list("abcdefghijklmnopqrstuvwxyz")
-        matrix_size = len(alphabet)
-        alphabet_matrix = [[ord(char) - ord('a') for char in alphabet[i:] + alphabet[:i]] for i in range(matrix_size)]
-        self.alphabet_gate = torch.tensor(alphabet_matrix, dtype=torch.float)
-    def forward(self, x):
-        # Randomly select a row from the alphabet gate matrix
-        batch_size = x.size(0)
-        selected_rows = torch.randint(0, 26, (batch_size,)).to(x.device)
-        transformed = torch.stack([self.alphabet_gate[row] for row in selected_rows])
-        return torch.matmul(transformed, x.unsqueeze(-1)).squeeze(-1)
-# === Neural Network with Alphabet Gate ===
-class AlphabetGateNN(nn.Module):
     def __init__(self, input_dim, hidden_dim, output_dim):
-        super(AlphabetGateNN, self).__init__()
-        self.alphabet_gate = AlphabetGate()
         self.fc1 = nn.Linear(input_dim, hidden_dim)
         self.fc2 = nn.Linear(hidden_dim, output_dim)
         self.activation = nn.ReLU()
-        self.softmax = nn.Softmax(dim=1)
     def forward(self, x):
-        x = self.alphabet_gate(x)  # Apply Alphabet Gate
         x = self.activation(self.fc1(x))
-        x = self.softmax(self.fc2(x))
         return x
 # === Helper Functions ===
-def process_text_file(file_path, default_label="neutral"):
     """
-    Process a plain text file and convert it into training-ready tensors.
-    Each line in the text file is treated as one training example.
     """
     with open(file_path, "r") as f:
         lines = f.readlines()
-    # Strip whitespace and assign the default label
-    texts = [line.strip().lower() for line in lines if line.strip()]
-    labels = [default_label] * len(texts)
-    # Encode labels
-    le = LabelEncoder()
-    labels = le.fit_transform(labels)
-    # Transform texts into 26-dimensional alphabet-based feature vectors
-    def text_to_vector(text):
-        vector = [0] * 26
-        for char in text:
-            if 'a' <= char <= 'z':
-                vector[ord(char) - ord('a')] += 1
-        return vector
-    # Apply transformation to all text data
-    X = torch.tensor([text_to_vector(text) for text in texts], dtype=torch.float)
-    y = torch.tensor(labels, dtype=torch.long)
-    return X, y
-def train_on_text_file(file_path, model, optimizer, criterion, default_label="neutral"):
     """
-    Train the model on data from a plain text file.
     """
-    X, y = process_text_file(file_path, default_label=default_label)
-    dataset = torch.utils.data.TensorDataset(X, y)
-    dataloader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=True)
-    model.train()
-    for epoch in range(3):  # Train for 3 epochs per file
-        for batch_X, batch_y in dataloader:
-            optimizer.zero_grad()
-            outputs = model(batch_X)
-            loss = criterion(outputs, batch_y)
-            loss.backward()
-            optimizer.step()
-    return model
 # === Streamlit App ===
-# Initialize model, optimizer, and loss function
-input_dim = 26  # 26 alphabet features
-hidden_dim = 16
-output_dim = 3  # 3 classes: positive, negative, neutral
-model = AlphabetGateNN(input_dim=input_dim, hidden_dim=hidden_dim, output_dim=output_dim)
-optimizer = optim.Adam(model.parameters(), lr=0.01)
-criterion = nn.CrossEntropyLoss()
-st.title("Train on Text Files with Alphabet Gate")
 st.write("""
 ### How it Works:
-1. Upload a plain text file (.txt) where:
-   - Each line represents a training example (e.g., a sentence or phrase).
-   - If labels are missing, all lines will be assigned a default label.
-2. The model will train incrementally on each file you upload.
-3. You can download the updated model once training is complete.
 """)
-uploaded_file = st.file_uploader("Upload a text file (.txt format)", type="txt")
 if uploaded_file is not None:
     # Save the uploaded file locally
@@ -115,23 +96,16 @@ if uploaded_file is not None:
     with open(file_path, "wb") as f:
         f.write(uploaded_file.getbuffer())
-    # Train the model on the uploaded file
-    st.write("Processing and training on the uploaded text file...")
-    model = train_on_text_file(file_path, model, optimizer, criterion)
     st.success("Training complete!")
-    # Save the updated model
-    model_path = "trained_model.pth"
-    torch.save(model.state_dict(), model_path)
-    st.write("Model updated and saved.")
-    # Provide a download button for the trained model
-    with open(model_path, "rb") as f:
-        st.download_button(
-            label="Download Trained Model",
-            data=f,
-            file_name="trained_model.pth",
-            mime="application/octet-stream"
-        )
-st.write("Upload more files to continue training!")

 import torch.optim as optim
 import streamlit as st
 import pandas as pd
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
+# === Neural Network for Chatbot ===
+class ChatBotNN(nn.Module):
     def __init__(self, input_dim, hidden_dim, output_dim):
+        super(ChatBotNN, self).__init__()
         self.fc1 = nn.Linear(input_dim, hidden_dim)
         self.fc2 = nn.Linear(hidden_dim, output_dim)
         self.activation = nn.ReLU()
     def forward(self, x):
         x = self.activation(self.fc1(x))
+        x = self.fc2(x)
         return x
 # === Helper Functions ===
+def process_text_file(file_path):
     """
+    Process a plain text file into a list of sentences.
+    Each line in the text file is treated as one sentence.
     """
     with open(file_path, "r") as f:
         lines = f.readlines()
+    return [line.strip() for line in lines if line.strip()]
+def process_csv_file(file_path):
+    """
+    Process a CSV file into a list of sentences.
+    Assumes the CSV has a column named 'text'.
+    """
+    data = pd.read_csv(file_path)
+    if 'text' in data.columns:
+        return data['text'].dropna().tolist()
+    else:
+        raise ValueError("CSV file must have a 'text' column.")
+# === Training Data ===
+corpus = []
+vectorizer = TfidfVectorizer()
+def train_bot(file_path, file_type):
+    """
+    Train the chatbot by adding content from the uploaded file to the corpus.
+    """
+    global corpus
+    if file_type == "txt":
+        corpus += process_text_file(file_path)
+    elif file_type == "csv":
+        corpus += process_csv_file(file_path)
+    else:
+        raise ValueError("Unsupported file type. Use .txt or .csv.")
+    # Fit the vectorizer to the updated corpus
+    vectorizer.fit(corpus)
+def generate_response(user_input):
     """
+    Generate a chatbot response based on the trained corpus using cosine similarity.
     """
+    if not corpus:
+        return "I don't know much yet. Please upload some files to teach me!"
+    # Vectorize user input and the corpus
+    user_vector = vectorizer.transform([user_input])
+    corpus_vectors = vectorizer.transform(corpus)
+    # Compute cosine similarity
+    similarities = cosine_similarity(user_vector, corpus_vectors)
+    most_similar_idx = similarities.argmax()
+    return corpus[most_similar_idx]
 # === Streamlit App ===
+st.title("Chatbot Trainer with File Uploads")
 st.write("""
 ### How it Works:
+1. Upload `.txt` or `.csv` files to teach the chatbot.
+   - **.txt**: Each line represents one training sentence.
+   - **.csv**: Must have a column named `text` for training sentences.
+2. Interact with the chatbot in real-time.
+3. Watch the chatbot improve as you train it with more files!
 """)
+uploaded_file = st.file_uploader("Upload a file (.txt or .csv)", type=["txt", "csv"])
 if uploaded_file is not None:
     # Save the uploaded file locally
     with open(file_path, "wb") as f:
         f.write(uploaded_file.getbuffer())
+    # Train the chatbot on the uploaded file
+    st.write("Training the chatbot with the uploaded file...")
+    file_extension = uploaded_file.name.split(".")[-1]
+    train_bot(file_path, file_extension)
     st.success("Training complete!")
+# Chat Interface
+st.write("### Chat with the Bot!")
+user_input = st.text_input("You:", placeholder="Type something to chat...")
+if user_input:
+    response = generate_response(user_input)
+    st.write(f"**Bot:** {response}")