Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,8 +2,8 @@ import os
|
|
| 2 |
import torch
|
| 3 |
import torch.nn as nn
|
| 4 |
import torch.optim as optim
|
| 5 |
-
import pandas as pd
|
| 6 |
import streamlit as st
|
|
|
|
| 7 |
from sklearn.preprocessing import LabelEncoder
|
| 8 |
|
| 9 |
# === Alphabet Gate Definition ===
|
|
@@ -39,20 +39,23 @@ class AlphabetGateNN(nn.Module):
|
|
| 39 |
return x
|
| 40 |
|
| 41 |
# === Helper Functions ===
|
| 42 |
-
def
|
| 43 |
"""
|
| 44 |
-
Process a
|
| 45 |
-
|
| 46 |
"""
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
# Encode labels
|
| 52 |
le = LabelEncoder()
|
| 53 |
labels = le.fit_transform(labels)
|
| 54 |
|
| 55 |
-
# Transform texts into alphabet-based feature vectors
|
| 56 |
def text_to_vector(text):
|
| 57 |
vector = [0] * 26
|
| 58 |
for char in text:
|
|
@@ -60,16 +63,17 @@ def process_file(file_path):
|
|
| 60 |
vector[ord(char) - ord('a')] += 1
|
| 61 |
return vector
|
| 62 |
|
|
|
|
| 63 |
X = torch.tensor([text_to_vector(text) for text in texts], dtype=torch.float)
|
| 64 |
y = torch.tensor(labels, dtype=torch.long)
|
| 65 |
|
| 66 |
return X, y
|
| 67 |
|
| 68 |
-
def
|
| 69 |
"""
|
| 70 |
-
Train the model on data from a
|
| 71 |
"""
|
| 72 |
-
X, y =
|
| 73 |
dataset = torch.utils.data.TensorDataset(X, y)
|
| 74 |
dataloader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=True)
|
| 75 |
|
|
@@ -85,24 +89,24 @@ def train_on_file(file_path, model, optimizer, criterion):
|
|
| 85 |
|
| 86 |
# === Streamlit App ===
|
| 87 |
# Initialize model, optimizer, and loss function
|
| 88 |
-
input_dim = 26
|
| 89 |
hidden_dim = 16
|
| 90 |
-
output_dim = 3
|
| 91 |
model = AlphabetGateNN(input_dim=input_dim, hidden_dim=hidden_dim, output_dim=output_dim)
|
| 92 |
optimizer = optim.Adam(model.parameters(), lr=0.01)
|
| 93 |
criterion = nn.CrossEntropyLoss()
|
| 94 |
|
| 95 |
-
st.title("
|
| 96 |
st.write("""
|
| 97 |
### How it Works:
|
| 98 |
-
1. Upload a
|
| 99 |
-
-
|
| 100 |
-
-
|
| 101 |
2. The model will train incrementally on each file you upload.
|
| 102 |
3. You can download the updated model once training is complete.
|
| 103 |
""")
|
| 104 |
|
| 105 |
-
uploaded_file = st.file_uploader("Upload a
|
| 106 |
|
| 107 |
if uploaded_file is not None:
|
| 108 |
# Save the uploaded file locally
|
|
@@ -112,8 +116,8 @@ if uploaded_file is not None:
|
|
| 112 |
f.write(uploaded_file.getbuffer())
|
| 113 |
|
| 114 |
# Train the model on the uploaded file
|
| 115 |
-
st.write("Processing and training on the uploaded file...")
|
| 116 |
-
model =
|
| 117 |
st.success("Training complete!")
|
| 118 |
|
| 119 |
# Save the updated model
|
|
|
|
| 2 |
import torch
|
| 3 |
import torch.nn as nn
|
| 4 |
import torch.optim as optim
|
|
|
|
| 5 |
import streamlit as st
|
| 6 |
+
import pandas as pd
|
| 7 |
from sklearn.preprocessing import LabelEncoder
|
| 8 |
|
| 9 |
# === Alphabet Gate Definition ===
|
|
|
|
| 39 |
return x
|
| 40 |
|
| 41 |
# === Helper Functions ===
|
| 42 |
+
def process_text_file(file_path, default_label="neutral"):
|
| 43 |
"""
|
| 44 |
+
Process a plain text file and convert it into training-ready tensors.
|
| 45 |
+
Each line in the text file is treated as one training example.
|
| 46 |
"""
|
| 47 |
+
with open(file_path, "r") as f:
|
| 48 |
+
lines = f.readlines()
|
| 49 |
+
|
| 50 |
+
# Strip whitespace and assign the default label
|
| 51 |
+
texts = [line.strip().lower() for line in lines if line.strip()]
|
| 52 |
+
labels = [default_label] * len(texts)
|
| 53 |
|
| 54 |
# Encode labels
|
| 55 |
le = LabelEncoder()
|
| 56 |
labels = le.fit_transform(labels)
|
| 57 |
|
| 58 |
+
# Transform texts into 26-dimensional alphabet-based feature vectors
|
| 59 |
def text_to_vector(text):
|
| 60 |
vector = [0] * 26
|
| 61 |
for char in text:
|
|
|
|
| 63 |
vector[ord(char) - ord('a')] += 1
|
| 64 |
return vector
|
| 65 |
|
| 66 |
+
# Apply transformation to all text data
|
| 67 |
X = torch.tensor([text_to_vector(text) for text in texts], dtype=torch.float)
|
| 68 |
y = torch.tensor(labels, dtype=torch.long)
|
| 69 |
|
| 70 |
return X, y
|
| 71 |
|
| 72 |
+
def train_on_text_file(file_path, model, optimizer, criterion, default_label="neutral"):
|
| 73 |
"""
|
| 74 |
+
Train the model on data from a plain text file.
|
| 75 |
"""
|
| 76 |
+
X, y = process_text_file(file_path, default_label=default_label)
|
| 77 |
dataset = torch.utils.data.TensorDataset(X, y)
|
| 78 |
dataloader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=True)
|
| 79 |
|
|
|
|
| 89 |
|
| 90 |
# === Streamlit App ===
|
| 91 |
# Initialize model, optimizer, and loss function
|
| 92 |
+
input_dim = 26 # 26 alphabet features
|
| 93 |
hidden_dim = 16
|
| 94 |
+
output_dim = 3 # 3 classes: positive, negative, neutral
|
| 95 |
model = AlphabetGateNN(input_dim=input_dim, hidden_dim=hidden_dim, output_dim=output_dim)
|
| 96 |
optimizer = optim.Adam(model.parameters(), lr=0.01)
|
| 97 |
criterion = nn.CrossEntropyLoss()
|
| 98 |
|
| 99 |
+
st.title("Train on Text Files with Alphabet Gate")
|
| 100 |
st.write("""
|
| 101 |
### How it Works:
|
| 102 |
+
1. Upload a plain text file (.txt) where:
|
| 103 |
+
- Each line represents a training example (e.g., a sentence or phrase).
|
| 104 |
+
- If labels are missing, all lines will be assigned a default label.
|
| 105 |
2. The model will train incrementally on each file you upload.
|
| 106 |
3. You can download the updated model once training is complete.
|
| 107 |
""")
|
| 108 |
|
| 109 |
+
uploaded_file = st.file_uploader("Upload a text file (.txt format)", type="txt")
|
| 110 |
|
| 111 |
if uploaded_file is not None:
|
| 112 |
# Save the uploaded file locally
|
|
|
|
| 116 |
f.write(uploaded_file.getbuffer())
|
| 117 |
|
| 118 |
# Train the model on the uploaded file
|
| 119 |
+
st.write("Processing and training on the uploaded text file...")
|
| 120 |
+
model = train_on_text_file(file_path, model, optimizer, criterion)
|
| 121 |
st.success("Training complete!")
|
| 122 |
|
| 123 |
# Save the updated model
|