Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,119 +1,121 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
import
|
| 6 |
-
import
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
ax.
|
| 48 |
-
ax.
|
| 49 |
-
ax.
|
| 50 |
-
ax.
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
st.
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
st.
|
| 78 |
-
|
| 79 |
-
st.
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
st.
|
| 98 |
-
st.write(f"- **
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
"
|
| 105 |
-
"
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
|
|
|
|
|
|
|
|
| 1 |
+
# app_type: streamlit
|
| 2 |
+
|
| 3 |
+
import streamlit as st
|
| 4 |
+
from finetune_lora import train_lora
|
| 5 |
+
from evaluate_model import generate_review, compute_metrics, evaluate_perplexity
|
| 6 |
+
import matplotlib.pyplot as plt
|
| 7 |
+
import time
|
| 8 |
+
import os
|
| 9 |
+
|
| 10 |
+
st.set_page_config(page_title="LLM LoRA Fine-Tuning App", layout="wide")
|
| 11 |
+
st.title("π§ LoRA Fine-Tuned LLM for Product Review Generation & Evaluation")
|
| 12 |
+
|
| 13 |
+
tab1, tab2 = st.tabs(["βοΈ Fine-Tuning", "πͺΆ Generate & Evaluate Reviews"])
|
| 14 |
+
|
| 15 |
+
# ----------- TAB 1: FINE-TUNING -----------
|
| 16 |
+
with tab1:
|
| 17 |
+
st.header("βοΈ Fine-Tune Model with LoRA")
|
| 18 |
+
|
| 19 |
+
model_choice = st.selectbox("Select Base Model", ["gpt2", "Qwen/Qwen2.5-1.5B", "meta-llama/Llama-3.2-1B"])
|
| 20 |
+
epochs = st.slider("Epochs", 1, 5, 2)
|
| 21 |
+
lr = st.number_input("Learning Rate", value=1e-4, step=1e-5, format="%.5f")
|
| 22 |
+
|
| 23 |
+
metrics = None
|
| 24 |
+
|
| 25 |
+
if st.button("π Start Fine-Tuning"):
|
| 26 |
+
st.info(f"Starting LoRA fine-tuning on `{model_choice}` for {epochs} epochs...")
|
| 27 |
+
|
| 28 |
+
progress_bar = st.progress(0)
|
| 29 |
+
status_text = st.empty()
|
| 30 |
+
|
| 31 |
+
for epoch in range(epochs):
|
| 32 |
+
status_text.text(f"Training... Epoch {epoch+1}/{epochs}")
|
| 33 |
+
time.sleep(1.5) # simulate training progress
|
| 34 |
+
progress_bar.progress(int((epoch + 1) / epochs * 100))
|
| 35 |
+
|
| 36 |
+
with st.spinner("Finalizing and saving adapter..."):
|
| 37 |
+
metrics = train_lora(model_choice, epochs, lr)
|
| 38 |
+
|
| 39 |
+
st.success("β
Fine-tuning Completed Successfully!")
|
| 40 |
+
st.balloons()
|
| 41 |
+
|
| 42 |
+
if metrics:
|
| 43 |
+
st.write("### π Training Summary")
|
| 44 |
+
st.json(metrics)
|
| 45 |
+
|
| 46 |
+
if "train_loss" in metrics:
|
| 47 |
+
fig, ax = plt.subplots()
|
| 48 |
+
ax.plot(metrics["train_loss"], label="Training Loss", marker='o')
|
| 49 |
+
ax.set_xlabel("Steps")
|
| 50 |
+
ax.set_ylabel("Loss")
|
| 51 |
+
ax.legend()
|
| 52 |
+
ax.grid(True)
|
| 53 |
+
st.pyplot(fig)
|
| 54 |
+
|
| 55 |
+
progress_bar.empty()
|
| 56 |
+
status_text.text("Training finished βοΈ")
|
| 57 |
+
|
| 58 |
+
# ----------- TAB 2: GENERATION & EVALUATION -----------
|
| 59 |
+
with tab2:
|
| 60 |
+
st.header("πͺΆ Generate & Evaluate Product Reviews")
|
| 61 |
+
|
| 62 |
+
model_choice = st.selectbox("Select Fine-Tuned Model", ["gpt2", "Qwen/Qwen2.5-1.5B", "meta-llama/Llama-3.2-1B"], key="gen_model")
|
| 63 |
+
|
| 64 |
+
product = st.text_input("Product Name", "Amazon Kindle E-Reader")
|
| 65 |
+
category = st.text_input("Category", "E-Reader")
|
| 66 |
+
features = st.text_area("Features", "6-inch display, WiFi, lightweight")
|
| 67 |
+
rating = st.slider("Rating", 1, 5, 5)
|
| 68 |
+
tone = st.selectbox("Tone", ["enthusiastic", "critical", "balanced"])
|
| 69 |
+
|
| 70 |
+
if "generated_reviews" not in st.session_state:
|
| 71 |
+
st.session_state.generated_reviews = []
|
| 72 |
+
|
| 73 |
+
if st.button("β¨ Generate Review"):
|
| 74 |
+
with st.spinner("Generating review..."):
|
| 75 |
+
review = generate_review(model_choice, product, category, features, rating, tone)
|
| 76 |
+
st.success("β
Generated Review:")
|
| 77 |
+
st.write(review)
|
| 78 |
+
|
| 79 |
+
st.session_state.generated_reviews.append(review)
|
| 80 |
+
count = len(st.session_state.generated_reviews)
|
| 81 |
+
st.info(f"π§© Total Generated Reviews: {count}")
|
| 82 |
+
|
| 83 |
+
# --------- Evaluation Dashboard (Tracking) ---------
|
| 84 |
+
st.subheader("π Evaluation Dashboard")
|
| 85 |
+
|
| 86 |
+
# Initialize storage for evaluation history
|
| 87 |
+
if "evaluation_history" not in st.session_state:
|
| 88 |
+
st.session_state.evaluation_history = []
|
| 89 |
+
|
| 90 |
+
# -------- Evaluate every 10 reviews --------
|
| 91 |
+
if count % 10 == 0:
|
| 92 |
+
st.warning("π Evaluating model performance after 10 reviews...")
|
| 93 |
+
with st.spinner("Running metrics evaluation..."):
|
| 94 |
+
metrics = compute_metrics(st.session_state.generated_reviews, requested_tone=tone)
|
| 95 |
+
ppl = evaluate_perplexity(model_choice, test_csv="dataset/amazon_product_reviews.csv")
|
| 96 |
+
|
| 97 |
+
st.subheader("π Evaluation Metrics")
|
| 98 |
+
st.write(f"- **Average Length:** {metrics['avg_length']:.2f} words")
|
| 99 |
+
st.write(f"- **Tone Match Ratio:** {metrics['tone_match_ratio']*100:.1f}%")
|
| 100 |
+
st.write(f"- **Perplexity (β better):** {ppl:.2f}")
|
| 101 |
+
|
| 102 |
+
# Save current evaluation data
|
| 103 |
+
eval_data = {
|
| 104 |
+
"reviews_count": count,
|
| 105 |
+
"avg_length": metrics["avg_length"],
|
| 106 |
+
"tone_match": metrics["tone_match_ratio"] * 100,
|
| 107 |
+
"perplexity": ppl
|
| 108 |
+
}
|
| 109 |
+
st.session_state.evaluation_history.append(eval_data)
|
| 110 |
+
|
| 111 |
+
import pandas as pd
|
| 112 |
+
df = pd.DataFrame(st.session_state.evaluation_history)
|
| 113 |
+
|
| 114 |
+
# Show line trends over evaluations
|
| 115 |
+
if not df.empty:
|
| 116 |
+
st.line_chart(df.set_index("reviews_count")[["avg_length", "tone_match", "perplexity"]])
|
| 117 |
+
st.dataframe(df.style.format({"avg_length": "{:.2f}", "tone_match": "{:.1f}", "perplexity": "{:.2f}"}))
|
| 118 |
+
|
| 119 |
+
st.markdown("β
Model evaluation updates automatically after every 10 generated reviews β showing live performance improvements.")
|
| 120 |
+
|
| 121 |
+
|