admin08077 commited on
Commit
56060c4
·
verified ·
1 Parent(s): 806b790

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -20
app.py CHANGED
@@ -2,8 +2,8 @@ import os
2
  import torch
3
  import torch.nn as nn
4
  import torch.optim as optim
5
- import pandas as pd
6
  import streamlit as st
 
7
  from sklearn.preprocessing import LabelEncoder
8
 
9
  # === Alphabet Gate Definition ===
@@ -39,20 +39,23 @@ class AlphabetGateNN(nn.Module):
39
  return x
40
 
41
  # === Helper Functions ===
42
- def process_file(file_path):
43
  """
44
- Process a CSV file and convert it into training-ready tensors.
45
- Assumes the file has columns 'text' and 'label'.
46
  """
47
- data = pd.read_csv(file_path)
48
- texts = data['text'].str.lower() # Convert to lowercase
49
- labels = data['label']
 
 
 
50
 
51
  # Encode labels
52
  le = LabelEncoder()
53
  labels = le.fit_transform(labels)
54
 
55
- # Transform texts into alphabet-based feature vectors
56
  def text_to_vector(text):
57
  vector = [0] * 26
58
  for char in text:
@@ -60,16 +63,17 @@ def process_file(file_path):
60
  vector[ord(char) - ord('a')] += 1
61
  return vector
62
 
 
63
  X = torch.tensor([text_to_vector(text) for text in texts], dtype=torch.float)
64
  y = torch.tensor(labels, dtype=torch.long)
65
 
66
  return X, y
67
 
68
- def train_on_file(file_path, model, optimizer, criterion):
69
  """
70
- Train the model on data from a given file.
71
  """
72
- X, y = process_file(file_path)
73
  dataset = torch.utils.data.TensorDataset(X, y)
74
  dataloader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=True)
75
 
@@ -85,24 +89,24 @@ def train_on_file(file_path, model, optimizer, criterion):
85
 
86
  # === Streamlit App ===
87
  # Initialize model, optimizer, and loss function
88
- input_dim = 26
89
  hidden_dim = 16
90
- output_dim = 3
91
  model = AlphabetGateNN(input_dim=input_dim, hidden_dim=hidden_dim, output_dim=output_dim)
92
  optimizer = optim.Adam(model.parameters(), lr=0.01)
93
  criterion = nn.CrossEntropyLoss()
94
 
95
- st.title("Enterprise-Ready Continuous Training App with Alphabet Gate")
96
  st.write("""
97
  ### How it Works:
98
- 1. Upload a CSV file with the following format:
99
- - **text**: Text column containing input strings.
100
- - **label**: Target labels for classification.
101
  2. The model will train incrementally on each file you upload.
102
  3. You can download the updated model once training is complete.
103
  """)
104
 
105
- uploaded_file = st.file_uploader("Upload a training file (CSV format)", type="csv")
106
 
107
  if uploaded_file is not None:
108
  # Save the uploaded file locally
@@ -112,8 +116,8 @@ if uploaded_file is not None:
112
  f.write(uploaded_file.getbuffer())
113
 
114
  # Train the model on the uploaded file
115
- st.write("Processing and training on the uploaded file...")
116
- model = train_on_file(file_path, model, optimizer, criterion)
117
  st.success("Training complete!")
118
 
119
  # Save the updated model
 
2
  import torch
3
  import torch.nn as nn
4
  import torch.optim as optim
 
5
  import streamlit as st
6
+ import pandas as pd
7
  from sklearn.preprocessing import LabelEncoder
8
 
9
  # === Alphabet Gate Definition ===
 
39
  return x
40
 
41
  # === Helper Functions ===
42
+ def process_text_file(file_path, default_label="neutral"):
43
  """
44
+ Process a plain text file and convert it into training-ready tensors.
45
+ Each line in the text file is treated as one training example.
46
  """
47
+ with open(file_path, "r") as f:
48
+ lines = f.readlines()
49
+
50
+ # Strip whitespace and assign the default label
51
+ texts = [line.strip().lower() for line in lines if line.strip()]
52
+ labels = [default_label] * len(texts)
53
 
54
  # Encode labels
55
  le = LabelEncoder()
56
  labels = le.fit_transform(labels)
57
 
58
+ # Transform texts into 26-dimensional alphabet-based feature vectors
59
  def text_to_vector(text):
60
  vector = [0] * 26
61
  for char in text:
 
63
  vector[ord(char) - ord('a')] += 1
64
  return vector
65
 
66
+ # Apply transformation to all text data
67
  X = torch.tensor([text_to_vector(text) for text in texts], dtype=torch.float)
68
  y = torch.tensor(labels, dtype=torch.long)
69
 
70
  return X, y
71
 
72
+ def train_on_text_file(file_path, model, optimizer, criterion, default_label="neutral"):
73
  """
74
+ Train the model on data from a plain text file.
75
  """
76
+ X, y = process_text_file(file_path, default_label=default_label)
77
  dataset = torch.utils.data.TensorDataset(X, y)
78
  dataloader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=True)
79
 
 
89
 
90
  # === Streamlit App ===
91
  # Initialize model, optimizer, and loss function
92
+ input_dim = 26 # 26 alphabet features
93
  hidden_dim = 16
94
+ output_dim = 3 # 3 classes: positive, negative, neutral
95
  model = AlphabetGateNN(input_dim=input_dim, hidden_dim=hidden_dim, output_dim=output_dim)
96
  optimizer = optim.Adam(model.parameters(), lr=0.01)
97
  criterion = nn.CrossEntropyLoss()
98
 
99
+ st.title("Train on Text Files with Alphabet Gate")
100
  st.write("""
101
  ### How it Works:
102
+ 1. Upload a plain text file (.txt) where:
103
+ - Each line represents a training example (e.g., a sentence or phrase).
104
+ - If labels are missing, all lines will be assigned a default label.
105
  2. The model will train incrementally on each file you upload.
106
  3. You can download the updated model once training is complete.
107
  """)
108
 
109
+ uploaded_file = st.file_uploader("Upload a text file (.txt format)", type="txt")
110
 
111
  if uploaded_file is not None:
112
  # Save the uploaded file locally
 
116
  f.write(uploaded_file.getbuffer())
117
 
118
  # Train the model on the uploaded file
119
+ st.write("Processing and training on the uploaded text file...")
120
+ model = train_on_text_file(file_path, model, optimizer, criterion)
121
  st.success("Training complete!")
122
 
123
  # Save the updated model