LLM-Tuner / app.py
prasenjeet099's picture
Update app.py
254a40b verified
import streamlit as st
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments
from datasets import load_dataset
import os
import pandas as pd
import time
# Set up Streamlit page
st.set_page_config(page_title="AutoTrain AI", page_icon="πŸš€", layout="wide")
st.title("AutoTrain AI πŸš€")
st.subheader("Train AI models using PyTorch & Hugging Face Transformers")
# Sidebar Configuration
st.sidebar.header("Configuration")
hf_user = st.sidebar.selectbox("Hugging Face User", ["hennings1984"])
task = st.sidebar.selectbox("Select Task", ["Text Classification", "Sentiment Analysis", "Text Generation", "Translation"])
hardware = st.sidebar.selectbox("Hardware", ["CPU", "Single GPU", "Multi-GPU", "TPU"])
model_choice = st.sidebar.selectbox("Choose Model", ["bert-base-uncased", "distilbert-base-uncased", "roberta-base", "Custom Model"])
dataset_source = st.sidebar.selectbox("Dataset Source", ["glue/sst2", "imdb", "ag_news", "Custom"])
# Training Parameters
epochs = st.sidebar.slider("Number of Epochs", 1, 10, 3)
batch_size = st.sidebar.selectbox("Batch Size", [8, 16, 32, 64], index=1)
learning_rate = st.sidebar.slider("Learning Rate", 1e-6, 1e-3, 2e-5, format="%.6f")
# Check if GPU/TPU is available
device = "cuda" if torch.cuda.is_available() and hardware in ["Single GPU", "Multi-GPU"] else "cpu"
if hardware == "TPU":
device = "tpu"
st.sidebar.write(f"**Using Device:** {device.upper()}")
# Checkpoint Handling
resume_training = st.sidebar.checkbox("Resume Training from Checkpoint")
checkpoint_path = "checkpoint.pth" if resume_training else None
# File Paths
log_file = "train_log.txt"
metrics_file = "metrics.csv"
# Training Buttons
st.write("### Model Training Control")
start_train = st.button("Start Training πŸš€")
stop_train = st.button("Stop Training β›”")
# Live Logs Display
st.write("### Training Logs (Live Updates)")
log_area = st.empty()
# Live Training Metrics
st.write("### Training Metrics πŸ“Š")
# Training Function
def train_model():
st.success(f"Training started for {task} with {model_choice} on {device.upper()}")
# Load model & tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_choice) if model_choice != "Custom Model" else None
model = AutoModelForSequenceClassification.from_pretrained(model_choice, num_labels=2) if model_choice != "Custom Model" else None
# Load dataset
if dataset_source == "Custom":
uploaded_file = st.sidebar.file_uploader("Upload your dataset", type=["csv", "json"])
if uploaded_file is not None:
dataset = pd.read_csv(uploaded_file) if uploaded_file.name.endswith(".csv") else pd.read_json(uploaded_file)
dataset = dataset.to_dict(orient="records")
else:
dataset = load_dataset(dataset_source)
# Check available columns and ask user for the column name to tokenize
if "train" in dataset:
train_data = dataset["train"]
columns = train_data.features.keys()
text_column = st.sidebar.selectbox("Select Text Column", list(columns))
label_column = st.sidebar.selectbox("Select Label Column", list(columns))
else:
st.error("Dataset not in correct format. Ensure it has a 'train' split.")
return
# Tokenization function
def tokenize_function(examples):
return tokenizer(examples[text_column], truncation=True, padding="max_length")
# Apply tokenization
tokenized_datasets = dataset.map(tokenize_function, batched=True)
train_dataset = tokenized_datasets["train"]
eval_dataset = tokenized_datasets["test"] if "test" in dataset else tokenized_datasets["validation"]
# Checkpoint Handling
if resume_training and os.path.exists(checkpoint_path):
model.load_state_dict(torch.load(checkpoint_path))
# Move model to device
model.to(torch.device(device))
# Training arguments
training_args = TrainingArguments(
output_dir="./results",
evaluation_strategy="epoch",
logging_dir="./logs",
logging_steps=5,
per_device_train_batch_size=batch_size,
per_device_eval_batch_size=batch_size,
num_train_epochs=epochs,
save_strategy="epoch",
learning_rate=learning_rate
)
# Trainer setup
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
)
# Training Loop
metrics = []
with open(log_file, "w") as log_file_handle:
log_file_handle.write("Starting training...\n")
log_file_handle.flush()
for epoch in range(epochs):
trainer.train()
results = trainer.evaluate()
# Save Checkpoint
torch.save(model.state_dict(), f"checkpoint_epoch_{epoch+1}.pth")
# Log results
log_text = f"Epoch {epoch+1}: Loss = {results['eval_loss']:.4f}, Accuracy = {results.get('eval_accuracy', 0):.4f}\n"
log_file_handle.write(log_text)
log_file_handle.flush()
# Save metrics
metrics.append({"epoch": epoch+1, "loss": results["eval_loss"], "accuracy": results.get("eval_accuracy", 0)})
pd.DataFrame(metrics).to_csv(metrics_file, index=False)
# Update logs & metrics in UI
log_area.text(log_text)
st.line_chart(pd.DataFrame(metrics).set_index("epoch"))
time.sleep(2)
# Start Training
if start_train:
train_model()
# Stop Training
if stop_train:
st.warning("Training stopped manually.")