admin08077 commited on
Commit
7c5377d
·
verified ·
1 Parent(s): 56060c4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -93
app.py CHANGED
@@ -4,109 +4,90 @@ import torch.nn as nn
4
  import torch.optim as optim
5
  import streamlit as st
6
  import pandas as pd
7
- from sklearn.preprocessing import LabelEncoder
 
8
 
9
- # === Alphabet Gate Definition ===
10
- class AlphabetGate(nn.Module):
11
- def __init__(self):
12
- super(AlphabetGate, self).__init__()
13
- alphabet = list("abcdefghijklmnopqrstuvwxyz")
14
- matrix_size = len(alphabet)
15
- alphabet_matrix = [[ord(char) - ord('a') for char in alphabet[i:] + alphabet[:i]] for i in range(matrix_size)]
16
- self.alphabet_gate = torch.tensor(alphabet_matrix, dtype=torch.float)
17
-
18
- def forward(self, x):
19
- # Randomly select a row from the alphabet gate matrix
20
- batch_size = x.size(0)
21
- selected_rows = torch.randint(0, 26, (batch_size,)).to(x.device)
22
- transformed = torch.stack([self.alphabet_gate[row] for row in selected_rows])
23
- return torch.matmul(transformed, x.unsqueeze(-1)).squeeze(-1)
24
-
25
- # === Neural Network with Alphabet Gate ===
26
- class AlphabetGateNN(nn.Module):
27
  def __init__(self, input_dim, hidden_dim, output_dim):
28
- super(AlphabetGateNN, self).__init__()
29
- self.alphabet_gate = AlphabetGate()
30
  self.fc1 = nn.Linear(input_dim, hidden_dim)
31
  self.fc2 = nn.Linear(hidden_dim, output_dim)
32
  self.activation = nn.ReLU()
33
- self.softmax = nn.Softmax(dim=1)
34
 
35
  def forward(self, x):
36
- x = self.alphabet_gate(x) # Apply Alphabet Gate
37
  x = self.activation(self.fc1(x))
38
- x = self.softmax(self.fc2(x))
39
  return x
40
 
41
  # === Helper Functions ===
42
- def process_text_file(file_path, default_label="neutral"):
43
  """
44
- Process a plain text file and convert it into training-ready tensors.
45
- Each line in the text file is treated as one training example.
46
  """
47
  with open(file_path, "r") as f:
48
  lines = f.readlines()
49
-
50
- # Strip whitespace and assign the default label
51
- texts = [line.strip().lower() for line in lines if line.strip()]
52
- labels = [default_label] * len(texts)
53
-
54
- # Encode labels
55
- le = LabelEncoder()
56
- labels = le.fit_transform(labels)
57
-
58
- # Transform texts into 26-dimensional alphabet-based feature vectors
59
- def text_to_vector(text):
60
- vector = [0] * 26
61
- for char in text:
62
- if 'a' <= char <= 'z':
63
- vector[ord(char) - ord('a')] += 1
64
- return vector
65
 
66
- # Apply transformation to all text data
67
- X = torch.tensor([text_to_vector(text) for text in texts], dtype=torch.float)
68
- y = torch.tensor(labels, dtype=torch.long)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
- return X, y
 
71
 
72
- def train_on_text_file(file_path, model, optimizer, criterion, default_label="neutral"):
73
  """
74
- Train the model on data from a plain text file.
75
  """
76
- X, y = process_text_file(file_path, default_label=default_label)
77
- dataset = torch.utils.data.TensorDataset(X, y)
78
- dataloader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=True)
79
 
80
- model.train()
81
- for epoch in range(3): # Train for 3 epochs per file
82
- for batch_X, batch_y in dataloader:
83
- optimizer.zero_grad()
84
- outputs = model(batch_X)
85
- loss = criterion(outputs, batch_y)
86
- loss.backward()
87
- optimizer.step()
88
- return model
89
 
90
  # === Streamlit App ===
91
- # Initialize model, optimizer, and loss function
92
- input_dim = 26 # 26 alphabet features
93
- hidden_dim = 16
94
- output_dim = 3 # 3 classes: positive, negative, neutral
95
- model = AlphabetGateNN(input_dim=input_dim, hidden_dim=hidden_dim, output_dim=output_dim)
96
- optimizer = optim.Adam(model.parameters(), lr=0.01)
97
- criterion = nn.CrossEntropyLoss()
98
-
99
- st.title("Train on Text Files with Alphabet Gate")
100
  st.write("""
101
  ### How it Works:
102
- 1. Upload a plain text file (.txt) where:
103
- - Each line represents a training example (e.g., a sentence or phrase).
104
- - If labels are missing, all lines will be assigned a default label.
105
- 2. The model will train incrementally on each file you upload.
106
- 3. You can download the updated model once training is complete.
107
  """)
108
 
109
- uploaded_file = st.file_uploader("Upload a text file (.txt format)", type="txt")
110
 
111
  if uploaded_file is not None:
112
  # Save the uploaded file locally
@@ -115,23 +96,16 @@ if uploaded_file is not None:
115
  with open(file_path, "wb") as f:
116
  f.write(uploaded_file.getbuffer())
117
 
118
- # Train the model on the uploaded file
119
- st.write("Processing and training on the uploaded text file...")
120
- model = train_on_text_file(file_path, model, optimizer, criterion)
 
121
  st.success("Training complete!")
122
 
123
- # Save the updated model
124
- model_path = "trained_model.pth"
125
- torch.save(model.state_dict(), model_path)
126
- st.write("Model updated and saved.")
127
-
128
- # Provide a download button for the trained model
129
- with open(model_path, "rb") as f:
130
- st.download_button(
131
- label="Download Trained Model",
132
- data=f,
133
- file_name="trained_model.pth",
134
- mime="application/octet-stream"
135
- )
136
 
137
- st.write("Upload more files to continue training!")
 
 
 
4
  import torch.optim as optim
5
  import streamlit as st
6
  import pandas as pd
7
+ from sklearn.feature_extraction.text import TfidfVectorizer
8
+ from sklearn.metrics.pairwise import cosine_similarity
9
 
10
+ # === Neural Network for Chatbot ===
11
+ class ChatBotNN(nn.Module):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  def __init__(self, input_dim, hidden_dim, output_dim):
13
+ super(ChatBotNN, self).__init__()
 
14
  self.fc1 = nn.Linear(input_dim, hidden_dim)
15
  self.fc2 = nn.Linear(hidden_dim, output_dim)
16
  self.activation = nn.ReLU()
 
17
 
18
  def forward(self, x):
 
19
  x = self.activation(self.fc1(x))
20
+ x = self.fc2(x)
21
  return x
22
 
23
  # === Helper Functions ===
24
+ def process_text_file(file_path):
25
  """
26
+ Process a plain text file into a list of sentences.
27
+ Each line in the text file is treated as one sentence.
28
  """
29
  with open(file_path, "r") as f:
30
  lines = f.readlines()
31
+ return [line.strip() for line in lines if line.strip()]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
+ def process_csv_file(file_path):
34
+ """
35
+ Process a CSV file into a list of sentences.
36
+ Assumes the CSV has a column named 'text'.
37
+ """
38
+ data = pd.read_csv(file_path)
39
+ if 'text' in data.columns:
40
+ return data['text'].dropna().tolist()
41
+ else:
42
+ raise ValueError("CSV file must have a 'text' column.")
43
+
44
+ # === Training Data ===
45
+ corpus = []
46
+ vectorizer = TfidfVectorizer()
47
+
48
+ def train_bot(file_path, file_type):
49
+ """
50
+ Train the chatbot by adding content from the uploaded file to the corpus.
51
+ """
52
+ global corpus
53
+ if file_type == "txt":
54
+ corpus += process_text_file(file_path)
55
+ elif file_type == "csv":
56
+ corpus += process_csv_file(file_path)
57
+ else:
58
+ raise ValueError("Unsupported file type. Use .txt or .csv.")
59
 
60
+ # Fit the vectorizer to the updated corpus
61
+ vectorizer.fit(corpus)
62
 
63
+ def generate_response(user_input):
64
  """
65
+ Generate a chatbot response based on the trained corpus using cosine similarity.
66
  """
67
+ if not corpus:
68
+ return "I don't know much yet. Please upload some files to teach me!"
 
69
 
70
+ # Vectorize user input and the corpus
71
+ user_vector = vectorizer.transform([user_input])
72
+ corpus_vectors = vectorizer.transform(corpus)
73
+
74
+ # Compute cosine similarity
75
+ similarities = cosine_similarity(user_vector, corpus_vectors)
76
+ most_similar_idx = similarities.argmax()
77
+ return corpus[most_similar_idx]
 
78
 
79
  # === Streamlit App ===
80
+ st.title("Chatbot Trainer with File Uploads")
 
 
 
 
 
 
 
 
81
  st.write("""
82
  ### How it Works:
83
+ 1. Upload `.txt` or `.csv` files to teach the chatbot.
84
+ - **.txt**: Each line represents one training sentence.
85
+ - **.csv**: Must have a column named `text` for training sentences.
86
+ 2. Interact with the chatbot in real-time.
87
+ 3. Watch the chatbot improve as you train it with more files!
88
  """)
89
 
90
+ uploaded_file = st.file_uploader("Upload a file (.txt or .csv)", type=["txt", "csv"])
91
 
92
  if uploaded_file is not None:
93
  # Save the uploaded file locally
 
96
  with open(file_path, "wb") as f:
97
  f.write(uploaded_file.getbuffer())
98
 
99
+ # Train the chatbot on the uploaded file
100
+ st.write("Training the chatbot with the uploaded file...")
101
+ file_extension = uploaded_file.name.split(".")[-1]
102
+ train_bot(file_path, file_extension)
103
  st.success("Training complete!")
104
 
105
+ # Chat Interface
106
+ st.write("### Chat with the Bot!")
107
+ user_input = st.text_input("You:", placeholder="Type something to chat...")
 
 
 
 
 
 
 
 
 
 
108
 
109
+ if user_input:
110
+ response = generate_response(user_input)
111
+ st.write(f"**Bot:** {response}")