Spaces:

admin08077
/

model

Sleeping

App Files Files Community

admin08077 commited on Dec 20, 2024

Commit

56060c4

verified ·

1 Parent(s): 806b790

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -20

app.py CHANGED Viewed

@@ -2,8 +2,8 @@ import os
 import torch
 import torch.nn as nn
 import torch.optim as optim
-import pandas as pd
 import streamlit as st
 from sklearn.preprocessing import LabelEncoder
 # === Alphabet Gate Definition ===
@@ -39,20 +39,23 @@ class AlphabetGateNN(nn.Module):
         return x
 # === Helper Functions ===
-def process_file(file_path):
     """
-    Process a CSV file and convert it into training-ready tensors.
-    Assumes the file has columns 'text' and 'label'.
     """
-    data = pd.read_csv(file_path)
-    texts = data['text'].str.lower()  # Convert to lowercase
-    labels = data['label']
     # Encode labels
     le = LabelEncoder()
     labels = le.fit_transform(labels)
-    # Transform texts into alphabet-based feature vectors
     def text_to_vector(text):
         vector = [0] * 26
         for char in text:
@@ -60,16 +63,17 @@ def process_file(file_path):
                 vector[ord(char) - ord('a')] += 1
         return vector
     X = torch.tensor([text_to_vector(text) for text in texts], dtype=torch.float)
     y = torch.tensor(labels, dtype=torch.long)
     return X, y
-def train_on_file(file_path, model, optimizer, criterion):
     """
-    Train the model on data from a given file.
     """
-    X, y = process_file(file_path)
     dataset = torch.utils.data.TensorDataset(X, y)
     dataloader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=True)
@@ -85,24 +89,24 @@ def train_on_file(file_path, model, optimizer, criterion):
 # === Streamlit App ===
 # Initialize model, optimizer, and loss function
-input_dim = 26
 hidden_dim = 16
-output_dim = 3
 model = AlphabetGateNN(input_dim=input_dim, hidden_dim=hidden_dim, output_dim=output_dim)
 optimizer = optim.Adam(model.parameters(), lr=0.01)
 criterion = nn.CrossEntropyLoss()
-st.title("Enterprise-Ready Continuous Training App with Alphabet Gate")
 st.write("""
 ### How it Works:
-1. Upload a CSV file with the following format:
-   - **text**: Text column containing input strings.
-   - **label**: Target labels for classification.
 2. The model will train incrementally on each file you upload.
 3. You can download the updated model once training is complete.
 """)
-uploaded_file = st.file_uploader("Upload a training file (CSV format)", type="csv")
 if uploaded_file is not None:
     # Save the uploaded file locally
@@ -112,8 +116,8 @@ if uploaded_file is not None:
         f.write(uploaded_file.getbuffer())
     # Train the model on the uploaded file
-    st.write("Processing and training on the uploaded file...")
-    model = train_on_file(file_path, model, optimizer, criterion)
     st.success("Training complete!")
     # Save the updated model

 import torch
 import torch.nn as nn
 import torch.optim as optim
 import streamlit as st
+import pandas as pd
 from sklearn.preprocessing import LabelEncoder
 # === Alphabet Gate Definition ===
         return x
 # === Helper Functions ===
+def process_text_file(file_path, default_label="neutral"):
     """
+    Process a plain text file and convert it into training-ready tensors.
+    Each line in the text file is treated as one training example.
     """
+    with open(file_path, "r") as f:
+        lines = f.readlines()
+    # Strip whitespace and assign the default label
+    texts = [line.strip().lower() for line in lines if line.strip()]
+    labels = [default_label] * len(texts)
     # Encode labels
     le = LabelEncoder()
     labels = le.fit_transform(labels)
+    # Transform texts into 26-dimensional alphabet-based feature vectors
     def text_to_vector(text):
         vector = [0] * 26
         for char in text:
                 vector[ord(char) - ord('a')] += 1
         return vector
+    # Apply transformation to all text data
     X = torch.tensor([text_to_vector(text) for text in texts], dtype=torch.float)
     y = torch.tensor(labels, dtype=torch.long)
     return X, y
+def train_on_text_file(file_path, model, optimizer, criterion, default_label="neutral"):
     """
+    Train the model on data from a plain text file.
     """
+    X, y = process_text_file(file_path, default_label=default_label)
     dataset = torch.utils.data.TensorDataset(X, y)
     dataloader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=True)
 # === Streamlit App ===
 # Initialize model, optimizer, and loss function
+input_dim = 26  # 26 alphabet features
 hidden_dim = 16
+output_dim = 3  # 3 classes: positive, negative, neutral
 model = AlphabetGateNN(input_dim=input_dim, hidden_dim=hidden_dim, output_dim=output_dim)
 optimizer = optim.Adam(model.parameters(), lr=0.01)
 criterion = nn.CrossEntropyLoss()
+st.title("Train on Text Files with Alphabet Gate")
 st.write("""
 ### How it Works:
+1. Upload a plain text file (.txt) where:
+   - Each line represents a training example (e.g., a sentence or phrase).
+   - If labels are missing, all lines will be assigned a default label.
 2. The model will train incrementally on each file you upload.
 3. You can download the updated model once training is complete.
 """)
+uploaded_file = st.file_uploader("Upload a text file (.txt format)", type="txt")
 if uploaded_file is not None:
     # Save the uploaded file locally
         f.write(uploaded_file.getbuffer())
     # Train the model on the uploaded file
+    st.write("Processing and training on the uploaded text file...")
+    model = train_on_text_file(file_path, model, optimizer, criterion)
     st.success("Training complete!")
     # Save the updated model