Spaces:
Sleeping
Sleeping
File size: 4,936 Bytes
b756bbd e04363f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
# app_type: streamlit
import streamlit as st
from finetune_lora import train_lora
from evaluate_model import generate_review, compute_metrics, evaluate_perplexity
import matplotlib.pyplot as plt
import time
import os
st.set_page_config(page_title="LLM LoRA Fine-Tuning App", layout="wide")
st.title("π§ LoRA Fine-Tuned LLM for Product Review Generation & Evaluation")
tab1, tab2 = st.tabs(["βοΈ Fine-Tuning", "πͺΆ Generate & Evaluate Reviews"])
# ----------- TAB 1: FINE-TUNING -----------
with tab1:
st.header("βοΈ Fine-Tune Model with LoRA")
model_choice = st.selectbox("Select Base Model", ["gpt2", "Qwen/Qwen2.5-1.5B", "meta-llama/Llama-3.2-1B"])
epochs = st.slider("Epochs", 1, 5, 2)
lr = st.number_input("Learning Rate", value=1e-4, step=1e-5, format="%.5f")
metrics = None
if st.button("π Start Fine-Tuning"):
st.info(f"Starting LoRA fine-tuning on `{model_choice}` for {epochs} epochs...")
progress_bar = st.progress(0)
status_text = st.empty()
for epoch in range(epochs):
status_text.text(f"Training... Epoch {epoch+1}/{epochs}")
time.sleep(1.5) # simulate training progress
progress_bar.progress(int((epoch + 1) / epochs * 100))
with st.spinner("Finalizing and saving adapter..."):
metrics = train_lora(model_choice, epochs, lr)
st.success("β
Fine-tuning Completed Successfully!")
st.balloons()
if metrics:
st.write("### π Training Summary")
st.json(metrics)
if "train_loss" in metrics:
fig, ax = plt.subplots()
ax.plot(metrics["train_loss"], label="Training Loss", marker='o')
ax.set_xlabel("Steps")
ax.set_ylabel("Loss")
ax.legend()
ax.grid(True)
st.pyplot(fig)
progress_bar.empty()
status_text.text("Training finished βοΈ")
# ----------- TAB 2: GENERATION & EVALUATION -----------
with tab2:
st.header("πͺΆ Generate & Evaluate Product Reviews")
model_choice = st.selectbox("Select Fine-Tuned Model", ["gpt2", "Qwen/Qwen2.5-1.5B", "meta-llama/Llama-3.2-1B"], key="gen_model")
product = st.text_input("Product Name", "Amazon Kindle E-Reader")
category = st.text_input("Category", "E-Reader")
features = st.text_area("Features", "6-inch display, WiFi, lightweight")
rating = st.slider("Rating", 1, 5, 5)
tone = st.selectbox("Tone", ["enthusiastic", "critical", "balanced"])
if "generated_reviews" not in st.session_state:
st.session_state.generated_reviews = []
if st.button("β¨ Generate Review"):
with st.spinner("Generating review..."):
review = generate_review(model_choice, product, category, features, rating, tone)
st.success("β
Generated Review:")
st.write(review)
st.session_state.generated_reviews.append(review)
count = len(st.session_state.generated_reviews)
st.info(f"π§© Total Generated Reviews: {count}")
# --------- Evaluation Dashboard (Tracking) ---------
st.subheader("π Evaluation Dashboard")
# Initialize storage for evaluation history
if "evaluation_history" not in st.session_state:
st.session_state.evaluation_history = []
# -------- Evaluate every 10 reviews --------
if count % 10 == 0:
st.warning("π Evaluating model performance after 10 reviews...")
with st.spinner("Running metrics evaluation..."):
metrics = compute_metrics(st.session_state.generated_reviews, requested_tone=tone)
ppl = evaluate_perplexity(model_choice, test_csv="dataset/amazon_product_reviews.csv")
st.subheader("π Evaluation Metrics")
st.write(f"- **Average Length:** {metrics['avg_length']:.2f} words")
st.write(f"- **Tone Match Ratio:** {metrics['tone_match_ratio']*100:.1f}%")
st.write(f"- **Perplexity (β better):** {ppl:.2f}")
# Save current evaluation data
eval_data = {
"reviews_count": count,
"avg_length": metrics["avg_length"],
"tone_match": metrics["tone_match_ratio"] * 100,
"perplexity": ppl
}
st.session_state.evaluation_history.append(eval_data)
import pandas as pd
df = pd.DataFrame(st.session_state.evaluation_history)
# Show line trends over evaluations
if not df.empty:
st.line_chart(df.set_index("reviews_count")[["avg_length", "tone_match", "perplexity"]])
st.dataframe(df.style.format({"avg_length": "{:.2f}", "tone_match": "{:.1f}", "perplexity": "{:.2f}"}))
st.markdown("β
Model evaluation updates automatically after every 10 generated reviews β showing live performance improvements.")
|