Spaces:
Build error
Build error
Commit
·
2bdd84f
1
Parent(s):
a997aba
New Framework Change
Browse files- __pycache__/utils.cpython-313.pyc +0 -0
- app.py +12 -349
- datasets/train_data.csv +373 -0
- pages/Chat.py +153 -0
- pages/Conversion.py +24 -0
- pages/Dataset_Management.py +360 -0
- pages/Finetune.py +170 -0
- requirements.txt +7 -1
- utils.py +468 -0
__pycache__/utils.cpython-313.pyc
ADDED
|
Binary file (20.8 kB). View file
|
|
|
app.py
CHANGED
|
@@ -1,355 +1,18 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
-
import pandas as pd
|
| 3 |
-
import numpy as np
|
| 4 |
-
import torch
|
| 5 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 6 |
-
import matplotlib.pyplot as plt
|
| 7 |
-
import time
|
| 8 |
-
import json
|
| 9 |
-
import re
|
| 10 |
-
import os
|
| 11 |
-
import asyncio
|
| 12 |
|
| 13 |
-
|
| 14 |
|
| 15 |
-
|
| 16 |
-
# Utility Functions
|
| 17 |
-
# -------------------------------
|
| 18 |
-
|
| 19 |
-
token = st.secrets["HF_TOKEN"]
|
| 20 |
-
os.environ['CURL_CA_BUNDLE'] = ''
|
| 21 |
-
|
| 22 |
-
@st.cache_resource
|
| 23 |
-
def load_model(model_id: str, token: str):
|
| 24 |
-
"""
|
| 25 |
-
Loads and caches the Gemma model and tokenizer with authentication token.
|
| 26 |
-
"""
|
| 27 |
-
try:
|
| 28 |
-
# Create and run an event loop explicitly
|
| 29 |
-
asyncio.run(async_load(model_id, token))
|
| 30 |
-
|
| 31 |
-
# Ensure torch classes path is valid (optional)
|
| 32 |
-
if not hasattr(torch, "classes") or not torch.classes:
|
| 33 |
-
torch.classes = torch._C._get_python_module("torch.classes")
|
| 34 |
-
|
| 35 |
-
tokenizer = AutoTokenizer.from_pretrained(model_id, token=token)
|
| 36 |
-
model = AutoModelForCausalLM.from_pretrained(model_id, token=token)
|
| 37 |
-
|
| 38 |
-
return tokenizer, model
|
| 39 |
-
|
| 40 |
-
except Exception as e:
|
| 41 |
-
print(f"An error occurred: {e}")
|
| 42 |
-
st.error(f"Model loading failed: {e}")
|
| 43 |
-
return None, None
|
| 44 |
-
|
| 45 |
-
async def async_load(model_id, token):
|
| 46 |
-
"""
|
| 47 |
-
Dummy async function to initialize the event loop.
|
| 48 |
-
"""
|
| 49 |
-
await asyncio.sleep(0.1) # Dummy async operation
|
| 50 |
-
|
| 51 |
-
def preprocess_data(uploaded_file, file_extension):
|
| 52 |
-
"""
|
| 53 |
-
Reads the uploaded file and returns a processed version.
|
| 54 |
-
Supports CSV, JSONL, and TXT.
|
| 55 |
-
"""
|
| 56 |
-
data = None
|
| 57 |
-
try:
|
| 58 |
-
if file_extension == "csv":
|
| 59 |
-
data = pd.read_csv(uploaded_file)
|
| 60 |
-
elif file_extension == "jsonl":
|
| 61 |
-
# Each line is a JSON object.
|
| 62 |
-
data = [json.loads(line) for line in uploaded_file.readlines()]
|
| 63 |
-
try:
|
| 64 |
-
data = pd.DataFrame(data)
|
| 65 |
-
except Exception:
|
| 66 |
-
st.warning("Unable to convert JSONL to a table. Previewing raw JSON objects.")
|
| 67 |
-
elif file_extension == "txt":
|
| 68 |
-
text_data = uploaded_file.read().decode("utf-8")
|
| 69 |
-
data = text_data.splitlines()
|
| 70 |
-
except Exception as e:
|
| 71 |
-
st.error(f"Error processing file: {e}")
|
| 72 |
-
return data
|
| 73 |
-
|
| 74 |
-
def clean_text(text, lowercase=True, remove_punctuation=True):
|
| 75 |
-
"""
|
| 76 |
-
Cleans text data by applying basic normalization.
|
| 77 |
-
"""
|
| 78 |
-
if lowercase:
|
| 79 |
-
text = text.lower()
|
| 80 |
-
if remove_punctuation:
|
| 81 |
-
text = re.sub(r'[^\w\s]', '', text)
|
| 82 |
-
return text
|
| 83 |
-
|
| 84 |
-
def plot_training_metrics(epochs, loss_values, accuracy_values):
|
| 85 |
-
"""
|
| 86 |
-
Returns a matplotlib figure plotting training loss and accuracy.
|
| 87 |
-
"""
|
| 88 |
-
fig, ax = plt.subplots(1, 2, figsize=(12, 4))
|
| 89 |
-
ax[0].plot(range(1, epochs+1), loss_values, marker='o', color='red')
|
| 90 |
-
ax[0].set_title("Training Loss")
|
| 91 |
-
ax[0].set_xlabel("Epoch")
|
| 92 |
-
ax[0].set_ylabel("Loss")
|
| 93 |
-
|
| 94 |
-
ax[1].plot(range(1, epochs+1), accuracy_values, marker='o', color='green')
|
| 95 |
-
ax[1].set_title("Training Accuracy")
|
| 96 |
-
ax[1].set_xlabel("Epoch")
|
| 97 |
-
ax[1].set_ylabel("Accuracy")
|
| 98 |
-
|
| 99 |
-
return fig
|
| 100 |
-
|
| 101 |
-
def simulate_training(num_epochs):
|
| 102 |
-
"""
|
| 103 |
-
Simulates a training loop for demonstration.
|
| 104 |
-
Yields current epoch, loss values, and accuracy values.
|
| 105 |
-
Replace this with your actual fine-tuning loop.
|
| 106 |
-
"""
|
| 107 |
-
loss_values = []
|
| 108 |
-
accuracy_values = []
|
| 109 |
-
for epoch in range(1, num_epochs + 1):
|
| 110 |
-
loss = np.exp(-epoch) + np.random.random() * 0.1
|
| 111 |
-
acc = 0.5 + (epoch / num_epochs) * 0.5 + np.random.random() * 0.05
|
| 112 |
-
loss_values.append(loss)
|
| 113 |
-
accuracy_values.append(acc)
|
| 114 |
-
yield epoch, loss_values, accuracy_values
|
| 115 |
-
time.sleep(1) # Simulate computation time
|
| 116 |
-
|
| 117 |
-
def quantize_model(model):
|
| 118 |
-
"""
|
| 119 |
-
Applies dynamic quantization for demonstration.
|
| 120 |
-
In practice, adjust this based on your model and target hardware.
|
| 121 |
-
"""
|
| 122 |
-
quantized_model = torch.quantization.quantize_dynamic(
|
| 123 |
-
model, {torch.nn.Linear}, dtype=torch.qint8
|
| 124 |
-
)
|
| 125 |
-
return quantized_model
|
| 126 |
-
|
| 127 |
-
def convert_to_torchscript(model):
|
| 128 |
-
"""
|
| 129 |
-
Converts the model to TorchScript format.
|
| 130 |
-
"""
|
| 131 |
-
example_input = torch.randint(0, 100, (1, 10))
|
| 132 |
-
traced_model = torch.jit.trace(model, example_input)
|
| 133 |
-
return traced_model
|
| 134 |
-
|
| 135 |
-
def convert_to_onnx(model, output_path="model.onnx"):
|
| 136 |
-
"""
|
| 137 |
-
Converts the model to ONNX format.
|
| 138 |
-
"""
|
| 139 |
-
dummy_input = torch.randint(0, 100, (1, 10))
|
| 140 |
-
torch.onnx.export(model, dummy_input, output_path, input_names=["input"], output_names=["output"])
|
| 141 |
-
return output_path
|
| 142 |
-
|
| 143 |
-
def load_finetuned_model(model, checkpoint_path="fine_tuned_model.pt"):
|
| 144 |
-
"""
|
| 145 |
-
Loads the fine-tuned model from the checkpoint.
|
| 146 |
-
"""
|
| 147 |
-
if os.path.exists(checkpoint_path):
|
| 148 |
-
model.load_state_dict(torch.load(checkpoint_path, map_location=torch.device('cpu')))
|
| 149 |
-
model.eval()
|
| 150 |
-
st.success("Fine-tuned model loaded successfully!")
|
| 151 |
-
else:
|
| 152 |
-
st.error(f"Checkpoint not found: {checkpoint_path}")
|
| 153 |
-
return model
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
def generate_response(prompt, model, tokenizer, max_length=200):
|
| 157 |
-
"""
|
| 158 |
-
Generates a response using the fine-tuned model.
|
| 159 |
-
"""
|
| 160 |
-
# Tokenize the prompt
|
| 161 |
-
inputs = tokenizer(prompt, return_tensors="pt").input_ids
|
| 162 |
-
|
| 163 |
-
# Generate text
|
| 164 |
-
with torch.no_grad():
|
| 165 |
-
outputs = model.generate(inputs, max_length=max_length, num_return_sequences=1, temperature=0.7)
|
| 166 |
-
|
| 167 |
-
# Decode the output
|
| 168 |
-
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 169 |
-
return response
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
# -------------------------------
|
| 173 |
-
# Application Layout
|
| 174 |
-
# -------------------------------
|
| 175 |
-
|
| 176 |
-
st.title("One-Stop Gemma Model Fine-tuning, Quantization & Conversion UI")
|
| 177 |
-
st.markdown("""
|
| 178 |
-
This application is designed for beginners in generative AI.
|
| 179 |
-
It allows you to fine-tune, quantize, and convert Gemma models with an intuitive UI.
|
| 180 |
-
You can upload your dataset, clean and preview your data, configure training parameters, and export your model in different formats.
|
| 181 |
-
""")
|
| 182 |
-
|
| 183 |
-
# Sidebar: Model selection and data upload
|
| 184 |
-
st.sidebar.header("Configuration")
|
| 185 |
-
|
| 186 |
-
# Model Selection
|
| 187 |
-
selected_model = st.sidebar.selectbox("Select Gemma Model", options=["Gemma-Small", "Gemma-Medium", "Gemma-Large"])
|
| 188 |
-
if selected_model == "google/gemma-3-1b-it":
|
| 189 |
-
model_id = "google/gemma-3-1b-it"
|
| 190 |
-
elif selected_model == "google/gemma-3-4b-it":
|
| 191 |
-
model_id = "google/gemma-3-4b-it"
|
| 192 |
-
else:
|
| 193 |
-
model_id = "google/gemma-3-1b-it"
|
| 194 |
-
|
| 195 |
-
loading_placeholder = st.sidebar.empty()
|
| 196 |
-
loading_placeholder.info("Loading model...")
|
| 197 |
-
tokenizer, model = load_model(model_id, token)
|
| 198 |
-
loading_placeholder.success("Model loaded.")
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
# Dataset Upload
|
| 202 |
-
uploaded_file = st.sidebar.file_uploader("Upload Dataset (CSV, JSONL, TXT)", type=["csv", "jsonl", "txt"])
|
| 203 |
-
data = None
|
| 204 |
-
if uploaded_file is not None:
|
| 205 |
-
file_ext = uploaded_file.name.split('.')[-1].lower()
|
| 206 |
-
data = preprocess_data(uploaded_file, file_ext)
|
| 207 |
-
st.sidebar.subheader("Dataset Preview:")
|
| 208 |
-
if isinstance(data, pd.DataFrame):
|
| 209 |
-
st.sidebar.dataframe(data.head())
|
| 210 |
-
elif isinstance(data, list):
|
| 211 |
-
st.sidebar.write(data[:5])
|
| 212 |
-
else:
|
| 213 |
-
st.sidebar.write(data)
|
| 214 |
-
else:
|
| 215 |
-
st.sidebar.info("Awaiting dataset upload.")
|
| 216 |
-
|
| 217 |
-
# Data Cleaning Options (for TXT files)
|
| 218 |
-
if uploaded_file is not None and file_ext == "txt":
|
| 219 |
-
st.sidebar.subheader("Data Cleaning Options")
|
| 220 |
-
lowercase_option = st.sidebar.checkbox("Convert to lowercase", value=True)
|
| 221 |
-
remove_punct = st.sidebar.checkbox("Remove punctuation", value=True)
|
| 222 |
-
cleaned_data = [clean_text(line, lowercase=lowercase_option, remove_punctuation=remove_punct) for line in data]
|
| 223 |
-
st.sidebar.text_area("Cleaned Data Preview", value="\n".join(cleaned_data[:5]), height=150)
|
| 224 |
-
|
| 225 |
-
# Main Tabs for Different Operations
|
| 226 |
-
tabs = st.tabs(["Fine-tuning", "Quantization", "Model Conversion"])
|
| 227 |
-
|
| 228 |
-
# -------------------------------
|
| 229 |
-
# Fine-tuning Tab
|
| 230 |
-
# -------------------------------
|
| 231 |
-
with tabs[0]:
|
| 232 |
-
st.header("Fine-tuning")
|
| 233 |
-
st.markdown("Configure hyperparameters and start fine-tuning your Gemma model.")
|
| 234 |
-
|
| 235 |
-
col1, col2, col3 = st.columns(3)
|
| 236 |
-
with col1:
|
| 237 |
-
learning_rate = st.number_input("Learning Rate", value=1e-4, format="%.5f")
|
| 238 |
-
with col2:
|
| 239 |
-
batch_size = st.number_input("Batch Size", value=16, step=1)
|
| 240 |
-
with col3:
|
| 241 |
-
epochs = st.number_input("Epochs", value=3, step=1)
|
| 242 |
-
|
| 243 |
-
if st.button("Start Fine-tuning"):
|
| 244 |
-
if data is None:
|
| 245 |
-
st.error("Please upload a dataset first!")
|
| 246 |
-
else:
|
| 247 |
-
st.info("Starting fine-tuning...")
|
| 248 |
-
progress_bar = st.progress(0)
|
| 249 |
-
training_placeholder = st.empty()
|
| 250 |
-
loss_values = []
|
| 251 |
-
accuracy_values = []
|
| 252 |
-
|
| 253 |
-
# Simulate training loop (replace with your actual training code)
|
| 254 |
-
for epoch, losses, accs in simulate_training(epochs):
|
| 255 |
-
fig = plot_training_metrics(epoch, losses, accs)
|
| 256 |
-
training_placeholder.pyplot(fig)
|
| 257 |
-
progress_bar.progress(epoch/epochs)
|
| 258 |
-
st.success("Fine-tuning completed!")
|
| 259 |
-
|
| 260 |
-
# Save the fine-tuned model (for demonstration, saving state_dict)
|
| 261 |
-
if model:
|
| 262 |
-
torch.save(model.state_dict(), "fine_tuned_model.pt")
|
| 263 |
-
with open("fine_tuned_model.pt", "rb") as f:
|
| 264 |
-
st.download_button("Download Fine-tuned Model", data=f, file_name="fine_tuned_model.pt", mime="application/octet-stream")
|
| 265 |
-
else:
|
| 266 |
-
st.error("Model not loaded. Cannot save.")
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
# -------------------------------
|
| 270 |
-
# Quantization Tab
|
| 271 |
-
# -------------------------------
|
| 272 |
-
with tabs[1]:
|
| 273 |
-
st.header("Model Quantization")
|
| 274 |
-
st.markdown("Quantize your model to optimize for inference performance.")
|
| 275 |
-
quantize_choice = st.radio("Select Quantization Type", options=["Dynamic Quantization"], index=0)
|
| 276 |
-
|
| 277 |
-
if st.button("Apply Quantization"):
|
| 278 |
-
with st.spinner("Applying quantization..."):
|
| 279 |
-
quantized_model = quantize_model(model)
|
| 280 |
-
st.success("Model quantized successfully!")
|
| 281 |
-
torch.save(quantized_model.state_dict(), "quantized_model.pt")
|
| 282 |
-
with open("quantized_model.pt", "rb") as f:
|
| 283 |
-
st.download_button("Download Quantized Model", data=f, file_name="quantized_model.pt", mime="application/octet-stream")
|
| 284 |
-
|
| 285 |
-
# -------------------------------
|
| 286 |
-
# Model Conversion Tab
|
| 287 |
-
# -------------------------------
|
| 288 |
-
with tabs[2]:
|
| 289 |
-
st.header("Model Conversion")
|
| 290 |
-
st.markdown("Convert your model to a different format for deployment or optimization.")
|
| 291 |
-
conversion_option = st.selectbox("Select Conversion Format", options=["TorchScript", "ONNX"])
|
| 292 |
-
|
| 293 |
-
if st.button("Convert Model"):
|
| 294 |
-
if conversion_option == "TorchScript":
|
| 295 |
-
with st.spinner("Converting to TorchScript..."):
|
| 296 |
-
ts_model = convert_to_torchscript(model)
|
| 297 |
-
ts_model.save("model_ts.pt")
|
| 298 |
-
st.success("Converted to TorchScript!")
|
| 299 |
-
with open("model_ts.pt", "rb") as f:
|
| 300 |
-
st.download_button("Download TorchScript Model", data=f, file_name="model_ts.pt", mime="application/octet-stream")
|
| 301 |
-
elif conversion_option == "ONNX":
|
| 302 |
-
with st.spinner("Converting to ONNX..."):
|
| 303 |
-
onnx_path = convert_to_onnx(model, "model.onnx")
|
| 304 |
-
st.success("Converted to ONNX!")
|
| 305 |
-
with open(onnx_path, "rb") as f:
|
| 306 |
-
st.download_button("Download ONNX Model", data=f, file_name="model.onnx", mime="application/octet-stream")
|
| 307 |
-
|
| 308 |
-
# -------------------------------
|
| 309 |
-
# Response Generation Section
|
| 310 |
-
# -------------------------------
|
| 311 |
-
st.header("Generate Responses with Fine-Tuned Model")
|
| 312 |
-
st.markdown("Use the fine-tuned model to generate text responses based on your prompts.")
|
| 313 |
-
|
| 314 |
-
# Check if the fine-tuned model exists
|
| 315 |
-
if os.path.exists("fine_tuned_model.pt"):
|
| 316 |
-
# Load the fine-tuned model
|
| 317 |
-
model = load_finetuned_model(model, "fine_tuned_model.pt")
|
| 318 |
-
|
| 319 |
-
# Input prompt for generating responses
|
| 320 |
-
prompt = st.text_area("Enter a prompt:", "Once upon a time...")
|
| 321 |
-
|
| 322 |
-
# Max length slider
|
| 323 |
-
max_length = st.slider("Max Response Length", min_value=50, max_value=500, value=200, step=10)
|
| 324 |
-
|
| 325 |
-
if st.button("Generate Response"):
|
| 326 |
-
with st.spinner("Generating response..."):
|
| 327 |
-
response = generate_response(prompt, model, tokenizer, max_length)
|
| 328 |
-
st.success("Generated Response:")
|
| 329 |
-
st.write(response)
|
| 330 |
-
|
| 331 |
-
else:
|
| 332 |
-
st.warning("Fine-tuned model not found. Please fine-tune the model first.")
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
# -------------------------------
|
| 336 |
-
# Optional: Cloud Integration Snippet
|
| 337 |
-
# -------------------------------
|
| 338 |
-
st.header("Cloud Integration")
|
| 339 |
st.markdown("""
|
| 340 |
-
|
| 341 |
-
|
|
|
|
|
|
|
|
|
|
| 342 |
""")
|
| 343 |
-
st.code("""
|
| 344 |
-
from google.cloud import storage
|
| 345 |
-
|
| 346 |
-
def upload_to_gcs(bucket_name, source_file_name, destination_blob_name):
|
| 347 |
-
storage_client = storage.Client()
|
| 348 |
-
bucket = storage_client.bucket(bucket_name)
|
| 349 |
-
blob = bucket.blob(destination_blob_name)
|
| 350 |
-
blob.upload_from_filename(source_file_name)
|
| 351 |
-
print(f"Uploaded {source_file_name} to {destination_blob_name}")
|
| 352 |
|
| 353 |
-
#
|
| 354 |
-
#
|
| 355 |
-
""
|
|
|
|
|
|
|
|
|
| 1 |
import streamlit as st
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
+
st.set_page_config(page_title="Gemma LLM Fine-Tuning UI", layout="wide")
|
| 4 |
|
| 5 |
+
st.title("Gemma LLM Fine-Tuning Suite 🚀")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
st.markdown("""
|
| 7 |
+
### 🔥 **Multi-page AI Model Trainer**
|
| 8 |
+
- **Chat**: Interact with the model.
|
| 9 |
+
- **Fine-tuning**: Train on `train_data.csv` or upload new datasets.
|
| 10 |
+
- **Conversion**: Export models to TorchScript and ONNX.
|
| 11 |
+
- **Dataset Management**: View and add to your training data.
|
| 12 |
""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
+
# st.sidebar.title("Navigation")
|
| 15 |
+
# st.sidebar.page_link("pages/Chat.py", label="🔹 Chat")
|
| 16 |
+
# st.sidebar.page_link("pages/Finetune.py", label="🔹 Fine-tuning")
|
| 17 |
+
# st.sidebar.page_link("pages/Conversion.py", label="🔹 Model Conversion")
|
| 18 |
+
# st.sidebar.page_link("pages/Dataset_Management.py", label="🔹 Dataset Management")
|
datasets/train_data.csv
ADDED
|
@@ -0,0 +1,373 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
prompt,response
|
| 2 |
+
"Bhai, kal movie dekhne chale?","Haan bhai, kaunsi dekhni hai?"
|
| 3 |
+
Kya haal hai bhai?,"Bas bhai, zindagi chal rahi hai."
|
| 4 |
+
Tu kal gym gaya tha?,"Haan bhai, lekin bas selfie kheechi thi 😎."
|
| 5 |
+
Tere paas charger hai?,"Haan bhai, par battery khatam hai 😬."
|
| 6 |
+
Tu itna late kyun aaya?,"Bhai, traffic ne jaan le li!"
|
| 7 |
+
Tune pizza order kiya?,"Haan bhai, extra cheese bhi maang liya 😎."
|
| 8 |
+
Office ka workload kaisa chal raha hai?,"Bhai, workload nahi, dukh chal raha hai 😩."
|
| 9 |
+
Tune WhatsApp group chhod diya?,"Haan bhai, bas drama zyada ho gaya tha 😂."
|
| 10 |
+
Weekend pe kya plan hai?,"Bhai, full chill maarne ka mood hai 🥂."
|
| 11 |
+
Kya tu aaj free hai?,"Free nahi bhai, bas zindagi ki maar kha raha hoon 😎."
|
| 12 |
+
Exam ka preparation kaisa hai?,"Bhai, bas TikTok scrolling chal raha hai 😂."
|
| 13 |
+
Tere paas notes hain?,"Haan bhai, par padhai se zyada memes hain usme 😂."
|
| 14 |
+
Kal cricket khelne chale?,"Bhai, fielding nahi karunga 😎."
|
| 15 |
+
Shaadi kab kar raha hai bhai?,"Bhai, pehle job toh lagne de 😂."
|
| 16 |
+
Tera phone kitna slow hai!,"Bhai, processor nahi, emotions chala raha hai 😂."
|
| 17 |
+
Tune uska reply dekha?,"Haan bhai, bas seen maar diya 😢."
|
| 18 |
+
Bhai tu kab aayega ghar?,Bhai jab zindagi thoda sudhar jayegi 😂.
|
| 19 |
+
Tera boss kaisa hai?,"Bhai, sach bolu? Zindagi ka villain hai 😂."
|
| 20 |
+
Tera laptop hang ho gaya?,"Bhai, meri zindagi bhi hang ho gayi 😂."
|
| 21 |
+
Khana kha liya?,"Haan bhai, lekin mann nahi bhara 😂."
|
| 22 |
+
Tu kal party mein aaya nahi?,"Bhai, neend zyada zaroori thi 😂."
|
| 23 |
+
Tu kal gym nahi gaya?,"Bhai, motivation so raha tha 😎."
|
| 24 |
+
Tune uska Insta story dekha?,"Haan bhai, dil toot gaya 😂."
|
| 25 |
+
"Bhai, kal salary aayi?","Haan bhai, aur gayi bhi 😂."
|
| 26 |
+
Tune latest web series dekhi?,"Haan bhai, binge-watching expert ban gaya 😂."
|
| 27 |
+
Tu kal late kyun aaya?,"Bhai, nind ki problem hai 😂."
|
| 28 |
+
Tune ghar ka kaam kiya?,"Bhai, homework nahi, gharwork kar raha hoon 😂."
|
| 29 |
+
"Bhai, tu kahan hai?","Bhai, zindagi ke jhamelon mein 😂."
|
| 30 |
+
"Bhai, kal off hai kya?","Bhai, sapno mein hi hai 😂."
|
| 31 |
+
Tere gharwale strict hain?,"Bhai, Hitler ke fan hain 😂."
|
| 32 |
+
Tune match dekha?,"Haan bhai, heart attack ho gaya tha 😂."
|
| 33 |
+
Tu kal late kyun aaya?,"Bhai, sapno mein tha 😂."
|
| 34 |
+
Tune video banayi?,"Bhai, banayi nahi, viral ho gayi 😂."
|
| 35 |
+
Tu gym ja raha hai?,"Bhai, sirf reels dekh raha hoon 😂."
|
| 36 |
+
Koi naya gaana recommend kar?,"Bhai, Atif ka purana sun le 😂."
|
| 37 |
+
Tu kal kahan tha?,"Bhai, so raha tha 😂."
|
| 38 |
+
Tera net slow hai?,"Bhai, 2G se bhi slow hai 😂."
|
| 39 |
+
Tune online shopping ki?,"Bhai, cart bhar diya, budget nahi 😂."
|
| 40 |
+
Bhai tera crush tera bhai ban gaya 😂,"Bhai, bas dua mein yaad rakhna 😂."
|
| 41 |
+
Tu kal kahan tha?,"Bhai, neend ka band baj raha tha 😂."
|
| 42 |
+
Tune late reply diya?,"Bhai, bas zindagi ka load hai 😂."
|
| 43 |
+
"Bhai, exam ka result aaya?","Haan bhai, bas asar nahi dikha 😂."
|
| 44 |
+
Tu cricket dekh raha hai?,"Bhai, dil thod diya unhone 😂."
|
| 45 |
+
"Bhai, kal ka plan cancel?","Bhai, neend ko priority di 😂."
|
| 46 |
+
Tune job apply ki?,"Bhai, apply nahi, try kar raha hoon 😂."
|
| 47 |
+
Tu kal pakda gaya?,"Bhai, meme share karte hue 😂."
|
| 48 |
+
Tera dost tujhse zyada cool hai?,"Bhai, thoda dukh hua 😂."
|
| 49 |
+
Tu zyada coffee peeta hai?,"Bhai, stress ka side effect hai 😂."
|
| 50 |
+
"Bhai, tera birthday aaya?","Haan bhai, par gift nahi aaya 😂."
|
| 51 |
+
Tu ghar pe hai?,"Haan bhai, ghar hi zindagi hai 😂."
|
| 52 |
+
Tu kal gaya tha?,"Bhai, gaya tha, bhool gaya 😂."
|
| 53 |
+
Tera dukh kya hai bhai?,"Bhai, bus Monday kaam pe jana hai 😂."
|
| 54 |
+
Tu kal kahan gaya?,"Bhai, sapno mein ghoom raha tha 😂."
|
| 55 |
+
Tu kal off tha?,"Bhai, bas dil se 😂."
|
| 56 |
+
Tune wo video dekha?,"Bhai, repeat pe chal raha hai 😂."
|
| 57 |
+
Tera dukh kya hai?,"Bhai, salary khatam ho gayi 😂."
|
| 58 |
+
Tu kal kaam pe gaya?,"Bhai, bas sochta hi reh gaya 😂."
|
| 59 |
+
"Bhai, kal milne chale?","Bhai, ghar hi safe hai 😂."
|
| 60 |
+
Tu kal late aaya?,"Bhai, traffic se dosti ho gayi 😂."
|
| 61 |
+
"Bhai, kal gym chal?","Gym? Bhai, humara toh 'jimmedari' kaafi hai! 😂"
|
| 62 |
+
Tune uska Insta story dekha?,"Haan bhai, lagta hai woh bhi filter ki dukaan khol li hai! 😆"
|
| 63 |
+
"Yaar, tu itna busy kyun rehta hai?","Kya karoon bhai, zindagi ne 'busy' button daba diya hai! 😅"
|
| 64 |
+
Aaj kal kya chal raha hai?,"Bas bhai, life ka 'software update' pending hai! 🤖"
|
| 65 |
+
Tera boss kaisa hai?,"Bhai, woh toh 'mood swing' ka live example hai! 😜"
|
| 66 |
+
Tu diet pe hai kya?,"Haan, bas 'momos' aur 'pizza' ko diet plan mein adjust karna hai! 🍕"
|
| 67 |
+
Kal party mein kyun nahi aaya?,"Bhai, mera 'Netflix' aur 'bed' ke saath commitment tha! 📺"
|
| 68 |
+
Tune naya web series dekha?,"Haan, ab toh 'binge-watching' mera naya talent ban gaya hai! 🎬"
|
| 69 |
+
Tera phone itna slow kyun hai?,"Bhai, yeh phone nahi, 'tortoise' hai! 🐢"
|
| 70 |
+
Tu itna late kyun aaya?,Traffic ne aaj phir se 'surprise test' le liya! 🚗
|
| 71 |
+
Kya haal hai bhai?,"Bas bhai, zindagi 'buffering' mode mein hai! ⏳"
|
| 72 |
+
Tu kal gym gaya tha?,"Haan bhai, lekin bas 'selfie' kheechi thi! 📸"
|
| 73 |
+
Tere paas charger hai?,"Haan bhai, par battery khatam hai! 🔋"
|
| 74 |
+
Tune pizza order kiya?,"Haan bhai, extra cheese bhi maang liya! 🧀"
|
| 75 |
+
Office ka workload kaisa chal raha hai?,"Bhai, workload nahi, dukh chal raha hai! 😩"
|
| 76 |
+
Tune WhatsApp group chhod diya?,"Haan bhai, drama zyada ho gaya tha! 🎭"
|
| 77 |
+
Weekend pe kya plan hai?,"Bhai, full chill maarne ka mood hai! 🥂"
|
| 78 |
+
Kya tu aaj free hai?,"Free nahi bhai, bas zindagi ki maar kha raha hoon! 😎"
|
| 79 |
+
Exam ka preparation kaisa hai?,"Bhai, bas TikTok scrolling chal raha hai! 📱"
|
| 80 |
+
Tere paas notes hain?,"Haan bhai, par padhai se zyada memes hain usme! 😂"
|
| 81 |
+
Kal cricket khelne chale?,"Bhai, fielding nahi karunga! 🏏"
|
| 82 |
+
Shaadi kab kar raha hai bhai?,"Bhai, pehle job toh lagne de! 💼"
|
| 83 |
+
Tera phone kitna slow hai!,"Bhai, processor nahi, emotions chala raha hai! 🤖"
|
| 84 |
+
Tune uska reply dekha?,"Haan bhai, bas seen maar diya! 😢"
|
| 85 |
+
Bhai tu kab aayega ghar?,Bhai jab zindagi thoda sudhar jayegi! 🏠
|
| 86 |
+
Tera boss kaisa hai?,"Bhai, sach bolu? Zindagi ka villain hai! 😈"
|
| 87 |
+
Tera laptop hang ho gaya?,"Bhai, meri zindagi bhi hang ho gayi! 💻"
|
| 88 |
+
Khana kha liya?,"Haan bhai, lekin mann nahi bhara! 🍛"
|
| 89 |
+
Tu kal party mein aaya nahi?,"Bhai, neend zyada zaroori thi! 😴"
|
| 90 |
+
Tu kal gym nahi gaya?,"Bhai, motivation so raha tha! 🛌"
|
| 91 |
+
Tune uska Insta story dekha?,"Haan bhai, dil toot gaya! 💔"
|
| 92 |
+
"Bhai, kal salary aayi?","Haan bhai, aur gayi bhi! 💸"
|
| 93 |
+
Tune latest web series dekhi?,"Haan bhai, binge-watching expert ban gaya! 📺"
|
| 94 |
+
Tu kal late kyun aaya?,"Bhai, neend ki problem hai! 😪"
|
| 95 |
+
Tune ghar ka kaam kiya?,"Bhai, homework nahi, gharwork kar raha hoon! 🏡"
|
| 96 |
+
"Bhai, tu kahan hai?","Bhai, zindagi ke jhamelon mein! 🎢"
|
| 97 |
+
"Bhai, kal off hai kya?","Bhai, sapno mein hi hai! 💤"
|
| 98 |
+
Tere gharwale strict hain?,"Bhai, Hitler ke fan hain! 👨✈️"
|
| 99 |
+
Tune match dekha?,"Haan bhai, heart attack ho gaya tha! ⚽"
|
| 100 |
+
Tu kal late kyun aaya?,"Bhai, sapno mein tha! 🌌"
|
| 101 |
+
Tune video banayi?,"Bhai, banayi nahi, viral ho gayi! 🎥"
|
| 102 |
+
Tu gym ja raha hai?,"Bhai, sirf reels dekh raha hoon! 📱"
|
| 103 |
+
Koi naya gaana recommend kar?,"Bhai, Atif ka purana sun le! 🎶"
|
| 104 |
+
Tu kal kahan tha?,"Bhai, so raha tha! 🛌"
|
| 105 |
+
Tera net slow hai?,"Bhai, 2G se bhi slow hai! 🐢"
|
| 106 |
+
Tune online shopping ki?,"Bhai, cart bhar diya, budget nahi! 🛒"
|
| 107 |
+
Bhai tera crush tera bhai ban gaya!,"Bhai, bas dua mein yaad rakhna! 🙏"
|
| 108 |
+
Tu kal late kyun aaya?,"Bhai, alarm ne bhi haath utha diya tha! 😴"
|
| 109 |
+
Tera net itna slow kyun hai?,"Bhai, turtle race chal rahi hai! 🐢"
|
| 110 |
+
Tune kal ka match dekha?,"Haan bhai, dil ke saath umeed bhi tut gayi! 💔"
|
| 111 |
+
Tu office mein late kyun pahucha?,"Bhai, traffic nahi, zindagi slow chal rahi thi! 🚶♂️"
|
| 112 |
+
Tune naya phone liya?,"Haan bhai, EMI ke saath zindagi bhi le li! 💸"
|
| 113 |
+
Tu diet par hai kya?,"Bhai, sirf naam ka, pet ka nahi! 🍕"
|
| 114 |
+
Tu kal gym gaya tha?,"Haan bhai, bas treadmill dekh ke wapas aa gaya! 😂"
|
| 115 |
+
Tera boss kaisa hai?,"Bhai, uske face par hamesha Monday rehta hai! 😩"
|
| 116 |
+
Tune latest movie dekhi?,"Haan bhai, aur story khatam hone se pehle neend aa gayi! 😴"
|
| 117 |
+
"Bhai, kal ka plan pakka?","Bhai, bas mood ka bharosa nahi! 😎"
|
| 118 |
+
Tu kal party kyun nahi aaya?,"Bhai, ghar ka wifi chhod kar jaana nahi chahta tha! 😂"
|
| 119 |
+
Tune job apply ki?,"Bhai, apply nahi, bas try kar raha hoon! 🤞"
|
| 120 |
+
Tu cricket dekh raha hai?,"Bhai, dil hi tod diya unhone! 💔🏏"
|
| 121 |
+
Tu kal kaha gaya tha?,"Bhai, zindagi se milne gaya tha! 🤯"
|
| 122 |
+
Tera phone hang ho gaya?,"Bhai, phone nahi, patience hang ho gaya! 😫"
|
| 123 |
+
"Bhai, kal gym chale?","Bhai, bas protein shake ka sapna dekh raha hoon! 💪😎"
|
| 124 |
+
Tune new song suna?,"Haan bhai, ab toh playlist repeat pe chal rahi hai! 🎧"
|
| 125 |
+
"Bhai, kal milne chale?","Bhai, ghar ka wifi chod kar jaana nahi chahta! 😂"
|
| 126 |
+
Tu kal late kyun aaya?,"Bhai, neend ka overdose ho gaya tha! 😴"
|
| 127 |
+
Tu kal bike se gaya tha?,"Haan bhai, bas hawa se race laga raha tha! 🏍️💨"
|
| 128 |
+
"Bhai, tera pet kaisa hai?","Bhai, mujhse zyada royal treat mil raha hai usko! 🐾😂"
|
| 129 |
+
Tune latest web series dekhi?,"Bhai, binge-watching ke chakkar mein neend ud gayi! 📺😵"
|
| 130 |
+
Tu kal concert gaya tha?,"Haan bhai, awaaz gayab leke wapas aaya! 🎤😂"
|
| 131 |
+
Tu itna busy kyun hai?,"Bhai, zindagi ne full-time job de di hai! 😎"
|
| 132 |
+
Tune naya laptop liya?,"Haan bhai, EMI wali zindagi shuru ho gayi! 💻💸"
|
| 133 |
+
"Bhai, kal kaun sa movie dekhte hain?","Bhai, pehle budget check kar lete hain! 💰😂"
|
| 134 |
+
Tera data khatam ho gaya?,"Bhai, reels ka talent hi yeh hai! 📱😂"
|
| 135 |
+
Tu kal late kyun aaya?,"Bhai, bed ne chodhne se mana kar diya tha! 😂"
|
| 136 |
+
Tune ghar ka kaam kiya?,"Bhai, ghar ka kaam nahi, dukh pocha tha! 😂"
|
| 137 |
+
"Bhai, exam ka preparation kaisa hai?","Bhai, bas syllabus ke sapne dekh raha hoon! 📚😎"
|
| 138 |
+
Tu kal gym gaya tha?,"Haan bhai, lekin sirf water cooler tak! 🚶♂️😂"
|
| 139 |
+
Tune video banayi?,"Bhai, banayi nahi, viral ho gayi! 🎥🔥"
|
| 140 |
+
"Bhai, tu shopping gaya?","Haan bhai, window shopping expert ban gaya! 🛒😂"
|
| 141 |
+
Tune ghar ka kaam kiya?,"Bhai, bas mobile ka safai abhiyan chal raha tha! 📱😂"
|
| 142 |
+
Tu kal kahan tha?,"Bhai, khayalon mein ghoom raha tha! 🤯😂"
|
| 143 |
+
Tera boss kaisa hai?,"Bhai, Monday ke mood mein hi rehta hai! 😩"
|
| 144 |
+
Tu kal party gaya tha?,"Haan bhai, DJ se zyada khana baja raha tha! 🍕🥂😂"
|
| 145 |
+
Tune latest web series dekhi?,"Bhai, dekh ke neend ki yaad aa gayi! 😴📺"
|
| 146 |
+
Tere paas charger hai?,"Haan bhai, lekin khud bhi charging dhund raha hai! 🔋��"
|
| 147 |
+
"Bhai, weekend pe kya plan hai?","Bhai, bas sapno mein trip karne ka plan hai! ✈️😂"
|
| 148 |
+
Tu kal gym gaya tha?,"Haan bhai, bas mirror selfies li thi! 📸😎"
|
| 149 |
+
Tune uska Insta dekha?,"Bhai, filter se zyada kuch nahi dikha! 😎😂"
|
| 150 |
+
Tu kal late kyun aaya?,"Bhai, traffic aur zindagi dono slow thi! 🚗😂"
|
| 151 |
+
Tera phone slow hai?,"Bhai, snail bhi sharma jaye is speed se! 🐌😂"
|
| 152 |
+
Tu diet pe hai kya?,"Haan bhai, par pizza diet plan mein fit nahi ho raha! 🍕😎"
|
| 153 |
+
"Bhai, kal milne chale?","Bhai, ghar ka wifi nahi chhod sakta! 😂"
|
| 154 |
+
Tu kal kaam pe gaya?,"Bhai, bas neend ka load leke gaya tha! 😴😂"
|
| 155 |
+
Tune shopping ki?,"Haan bhai, cart bhar diya, budget nahi! 🛒💸😂"
|
| 156 |
+
Tera crush online tha?,"Haan bhai, par reply nahi aaya! 😢😂"
|
| 157 |
+
Tune kal ka match dekha?,"Haan bhai, lagta hai team ne bhi hope chhod di! 😭🏏"
|
| 158 |
+
Tu kal late kyun aaya?,"Bhai, neend se break-up nahi ho raha tha! 😴😂"
|
| 159 |
+
Tune naya phone liya?,"Haan bhai, aur EMI leke zindagi bhi le li! 💸📱"
|
| 160 |
+
"Bhai, tu kitna busy rehta hai?","Bhai, zindagi full-time job ban gayi hai! 😂"
|
| 161 |
+
Tune gym join kiya?,"Haan bhai, par membership card hi exercise kar raha hai! 🏋️♂️😂"
|
| 162 |
+
Tera dukh kya hai bhai?,"Bhai, salary aayi thi, chali bhi gayi! 💸😩"
|
| 163 |
+
Tu kal kahan tha?,"Bhai, bed ke saath relationship strong ho raha tha! 🛏️❤️"
|
| 164 |
+
Tune ghar ka kaam kiya?,"Haan bhai, remote dhundhne ka kaam! 😂📺"
|
| 165 |
+
Tu kal movie gaya tha?,"Haan bhai, par ticket se zyada popcorn mehenga tha! 🍿💸"
|
| 166 |
+
Tune online shopping ki?,"Bhai, cart bhar diya, wallet khali hai! 😭🛒"
|
| 167 |
+
Tu kal cricket khelne gaya?,"Haan bhai, par fielding se allergy ho gayi thi! 😂🏏"
|
| 168 |
+
Tera net slow hai?,"Bhai, lagta hai wifi bhi break le raha hai! 🐢😂"
|
| 169 |
+
"Bhai, weekend pe kya plan hai?","Bhai, bas sapno mein Maldives jaane ka plan hai! 🏝️😂"
|
| 170 |
+
Tu gym gaya tha?,"Haan bhai, par sirf dumbbell dekh ke wapas aa gaya! 💪😂"
|
| 171 |
+
Tune uska Insta dekha?,"Bhai, filter se chehra nahi, zindagi badal gayi! 😂📸"
|
| 172 |
+
"Bhai, kal ka plan confirm?","Bhai, bas mood ke upar depend karta hai! 😂"
|
| 173 |
+
Tune cricket dekha?,"Haan bhai, dil bhi toota aur TV bhi! 😂📺"
|
| 174 |
+
Tu kal date pe gaya tha?,"Haan bhai, par sirf bill bharne gaya tha! 😂💸"
|
| 175 |
+
Tera dukh kya hai bhai?,"Bhai, paise khatam aur mahina baaki hai! 😂💸"
|
| 176 |
+
"Bhai, tera boss kaisa hai?","Bhai, Monday ka live version hai! 😭😎"
|
| 177 |
+
Tune naya gaana suna?,"Haan bhai, ab repeat pe chal raha hai! 🎧🔥"
|
| 178 |
+
Tu kal late kyun aaya?,"Bhai, neend aur traffic dono se panga ho gaya! 😎🚗"
|
| 179 |
+
"Bhai, kal salary mili?","Haan bhai, aur khatam bhi ho gayi! 😂💸"
|
| 180 |
+
Tune ghar ka kaam kiya?,"Bhai, bas mobile ka storage saaf kiya! 📱😂"
|
| 181 |
+
Tera crush online tha?,"Haan bhai, par bas status update kiya! 😭😂"
|
| 182 |
+
"Bhai, kal kaam pe gaya?","Bhai, bas attendance dene gaya tha! 😂💼"
|
| 183 |
+
Tune movie dekhi?,"Haan bhai, story se zyada neend achhi thi! 😴🎥"
|
| 184 |
+
Tu diet pe hai kya?,"Haan bhai, par pizza diet plan mein nahi aata! 🍕😂"
|
| 185 |
+
Tera phone slow hai?,"Bhai, lagta hai 2G ka comeback ho gaya! 🐢📱"
|
| 186 |
+
Tu kal shopping gaya tha?,"Haan bhai, lekin sirf mannequins dekhe! 😂🛍️"
|
| 187 |
+
"Bhai, weekend pe kya plan hai?","Bhai, bed aur blanket se relationship strong karna hai! 😂🛏️"
|
| 188 |
+
Tu kal bike se gaya tha?,"Haan bhai, hawa se race laga raha tha! 🏍️💨"
|
| 189 |
+
Tune ghar ka kaam kiya?,"Bhai, bas remote dhundh raha tha! 😂📺"
|
| 190 |
+
Tu cricket khelta hai?,"Bhai, bas fielding avoid karta hoon! 😂🏏"
|
| 191 |
+
Tera boss strict hai?,"Bhai, usko smile bhi paid leave pe milti hai! 😂😎"
|
| 192 |
+
Tu kal late kyun aaya?,"Bhai, neend ka over-time ho gaya tha! 😴😂"
|
| 193 |
+
Tune naya web series dekha?,"Haan bhai, binge-watching expert ban gaya hoon! 📺🔥"
|
| 194 |
+
"Bhai, kal ka plan fix?","Bhai, bas neend se permission leni baaki hai! 😂"
|
| 195 |
+
Tera dukh kya hai bhai?,"Bhai, zindagi ne no refund policy laga di hai! 😂"
|
| 196 |
+
Tu kal kahaan tha?,"Bhai, khayalon mein ghoom raha tha! 🌌😂"
|
| 197 |
+
"Bhai, kal cricket khelne chale?","Haan bhai, par batting hi karunga! 🏏😎"
|
| 198 |
+
Tune naya phone liya?,"Haan bhai, ab data nahi, EMI khatam ho rahi hai! 💸😂"
|
| 199 |
+
Tu kal movie dekhne gaya?,"Haan bhai, lekin ending se pehle neend aa gayi! 😴🎥"
|
| 200 |
+
Tera net slow hai?,"Bhai, turtle race chal rahi hai! 🐢📶"
|
| 201 |
+
Tune ghar ka kaam kiya?,"Haan bhai, bas fridge kholne ka kaam! 😂🍕"
|
| 202 |
+
"Bhai, kal gym chale?","Gym? Bhai, humara toh 'jimmedari' kaafi hai! 😂"
|
| 203 |
+
Tune latest movie dekhi?,"Haan bhai, story se zyada neend interesting thi! 😴🎥"
|
| 204 |
+
Tu kal cricket khelne gaya?,"Haan bhai, par sirf toss jeeta! 😂🏏"
|
| 205 |
+
"Bhai, tera boss strict hai?","Bhai, usko toh chhutti ka spelling bhi nahi aata! 😂😎"
|
| 206 |
+
Tu kal late kyun aaya?,"Bhai, bed se alag hone ka mann nahi tha! 🛏️😂"
|
| 207 |
+
Tune naya song suna?,"Haan bhai, ab repeat pe chal raha hai! 🎧🔥"
|
| 208 |
+
Tera phone slow hai?,"Bhai, lagta hai snail bhi sharma jaye! 🐌📱😂"
|
| 209 |
+
Tu kal shopping gaya tha?,"Haan bhai, lekin sirf mannequins ko dekha! 😂🛍️"
|
| 210 |
+
"Bhai, kal gym gaya tha?","Nahi bhai, bas reels dekh ke calories jala raha hoon 😂📱"
|
| 211 |
+
Tune uska Insta story dekha?,"Haan bhai, full 'main character energy' thi! 🤩📸"
|
| 212 |
+
Tu kal late kyun aaya?,"Bhai, alarm aur meri dosti thodi toxic ho gayi hai 😴⏰"
|
| 213 |
+
Kal office mein kya scene tha?,"Bhai, same drama, different day! 😩💼"
|
| 214 |
+
Tera net slow hai?,"Bhai, lagta hai Airtel ne 2G ka throwback de diya! 🐢📶"
|
| 215 |
+
Tune naya phone liya?,"Haan bhai, EMI dekh ke ro raha hoon 💸😭"
|
| 216 |
+
Tu kal date pe gaya tha?,"Haan bhai, par bill bharne mein hi pyaar khatam ho gaya 😂💀"
|
| 217 |
+
Tune ghar ka kaam kiya?,"Bhai, bas online shopping ke cart saaf kiya 😂🛒"
|
| 218 |
+
Weekend pe kya scene hai?,"Bhai, bas bed aur Netflix ka serious relationship hai 🍿❤️"
|
| 219 |
+
Tune naya gaana suna?,"Haan bhai, ab repeat pe chal raha hai, neighbors pareshaan hai 😂🎧"
|
| 220 |
+
Tera dukh kya hai bhai?,"Bhai, dukh nahi… bas 'low battery' wali zindagi hai 😭🔋"
|
| 221 |
+
Tu kal gym gaya tha?,"Haan bhai, bas cardio ke naam pe water cooler tak chala 😂🚶♂️"
|
| 222 |
+
Bhai tera crush tera bhai ban gaya 😂,"Bhai, ab bas rakhi ki tayyari kar raha hoon 😂😭"
|
| 223 |
+
Tune naya web series dekha?,"Haan bhai, binge-watching Olympic level pe hai! 🎯📺"
|
| 224 |
+
Tera phone slow hai?,"Bhai, snail bhi sharma jaye aisi speed hai 🐌📱😂"
|
| 225 |
+
Bhai kal party mein kyun nahi aaya?,"Bhai, mera bed se commitment tha 🛏️❤️"
|
| 226 |
+
Tune ghar ka kaam kiya?,"Bhai, bas fridge open-close wala cardio kiya 😂🍕"
|
| 227 |
+
Tu kal cricket khelne gaya?,"Haan bhai, par ball se dosti nahi ho paayi 🏏😂"
|
| 228 |
+
Tera boss kaisa hai?,"Bhai, pura 'mood swing' ka calendar hai 😂😎"
|
| 229 |
+
Bhai kal kaam pe gaya?,"Haan bhai, attendance dene gaya tha bas 😂💼"
|
| 230 |
+
Tune naya phone liya?,"Haan bhai, EMI dekh ke laga phone nahi, loan liya hai 😂📱"
|
| 231 |
+
Tu kal gym gaya?,"Haan bhai, par bas mirror ke saamne flex kiya 😂💪"
|
| 232 |
+
"Bhai, weekend pe kya scene?","Bhai, bas 'napflix' and chill! 🛏️😎"
|
| 233 |
+
Tu kal shopping gaya tha?,"Haan bhai, lekin mannequins hi dekhe 😂🛍️"
|
| 234 |
+
Tune uska reply dekha?,"Haan bhai, bas 'seen' maar diya 💔😢"
|
| 235 |
+
Bhai kal milne chale?,"Bhai, ghar pe hi 'soft launch' ho raha hoon 😂🛋️"
|
| 236 |
+
Tu itna busy kyun hai?,"Bhai, life 'do not disturb' mode pe hai 😂📵"
|
| 237 |
+
Tu kal cricket khelne gaya?,"Haan bhai, par sirf shadow practice ki 😂🏏"
|
| 238 |
+
Tune ghar ka kaam kiya?,"Bhai, bas remote dhundhne ka kaam 😂📺"
|
| 239 |
+
Tera boss strict hai?,"Bhai, usko chhutti ka spelling bhi nahi aata 😂😎"
|
| 240 |
+
"Bhai, kal ka plan fix?","Bhai, bas neend se permission leni baaki hai 😂😴"
|
| 241 |
+
Tune naya song suna?,"Haan bhai, ab toh ringtone bhi wahi hai 😂🎵"
|
| 242 |
+
Tera phone slow hai?,"Bhai, tortoise ko bhi sharam aa jaye 😂🐢"
|
| 243 |
+
Tu kal movie gaya tha?,"Haan bhai, par ending se pehle neend aa gayi 😂😴"
|
| 244 |
+
Tune naya meme dekha?,"Haan bhai, share karte karte battery khatam ho gayi 😂📱"
|
| 245 |
+
"Bhai, kal salary aayi?","Haan bhai, aur khatam bhi ho gayi 😂💸"
|
| 246 |
+
Tune naya gaana suna?,"Haan bhai, ab toh playlist ka raja ban gaya 😂🎧"
|
| 247 |
+
Tu kal late kyun aaya?,"Bhai, sapno ka 'overtime' ho gaya 😂💤"
|
| 248 |
+
Tune online shopping ki?,"Haan bhai, cart full, wallet empty 😂🛒"
|
| 249 |
+
"Bhai, weekend pe kya scene?","Bhai, chill maarne ka full mood hai 🥂😂"
|
| 250 |
+
Tune cricket dekha?,"Haan bhai, player se zyada umpire dekha 😂🏏"
|
| 251 |
+
"Bhai, tera boss strict hai?","Bhai, Monday ka human version hai 😂💼"
|
| 252 |
+
Tune movie dekhi?,"Haan bhai, par neend zyada interesting thi 😂😴"
|
| 253 |
+
Tu kal late kyun aaya?,"Bhai, bed ne break-up nahi diya 😂🛏️"
|
| 254 |
+
Tu cricket dekh raha hai?,"Haan bhai, TV ka remote nahi dekh raha 😂📺"
|
| 255 |
+
Tu kal gym gaya?,"Haan bhai, bas selfie kheechne 😂📸"
|
| 256 |
+
Tune ghar ka kaam kiya?,"Bhai, bas meme banane ka kaam 😂💻"
|
| 257 |
+
Tu kal shopping gaya?,"Haan bhai, lekin mannequins se hi baat ho gayi 😂🛍️"
|
| 258 |
+
Tera boss strict hai?,"Bhai, usko toh smile bhi paid leave pe milti hai 😂😎"
|
| 259 |
+
"Bhai, weekend pe kya scene?","Bhai, full 'ghar se hi ghar wapsi' 😂🏠"
|
| 260 |
+
Tune naya gaana suna?,"Haan bhai, ab toh lyrics bhi ratti ho gayi 😂🎵"
|
| 261 |
+
"Bhai, kal ka plan?","Bhai, neend ki 'booking' full hai 😂🛌"
|
| 262 |
+
Tere boss ka mood kaisa hai aaj?,"Bhai, pura 'Monday on steroids' lag raha hai 😂💀"
|
| 263 |
+
Tu kal date pe gaya tha?,"Haan bhai, par bill bharte hi break-up soch raha tha 💸😭"
|
| 264 |
+
Bhai kal ka meeting kaisa tha?,"Bhai, bas Zoom ka background enjoy kiya 😂💻"
|
| 265 |
+
Tune uska message dekha?,"Haan bhai, reply nahi, bas 'seen' maar diya 💔👀"
|
| 266 |
+
"Bhai, exam kaisa gaya?","Bhai, bas pen chal raha tha… dimaag nahi 😂🧠"
|
| 267 |
+
Tere boss ne kuch bola?,"Haan bhai, lagta hai unka breakup hua hai 😂💀"
|
| 268 |
+
Tu weekend pe kya kar raha hai?,"Bhai, bas 'Netflix and snore' mode on hai 😂🍿😴"
|
| 269 |
+
Tune salary check ki?,"Haan bhai, but lagta hai HR ne 'prank' kiya hai 😂💸"
|
| 270 |
+
Tera WiFi slow hai?,"Bhai, snail bhi race jeet jayega 😂🐢"
|
| 271 |
+
Kal ka gym scene hai kya?,"Gym nahi bhai, sirf 'gymmedariyan' hai 😂💼"
|
| 272 |
+
"Bhai, kal ka cricket match dekha?","Haan bhai, par umpire zyada entertaining tha 😂🤦♂️"
|
| 273 |
+
Tune naya filter try kiya?,"Haan bhai, asli se zyada sundar lag raha tha 😂📸"
|
| 274 |
+
Tera crush online hai kya?,"Haan bhai, par bas 'last seen' ka ehsaas de rahi hai 💔😂"
|
| 275 |
+
"Bhai, kal movie ka plan hai?","Bhai, 'pockets empty' ka plan hai 😂💸"
|
| 276 |
+
Tu gym join karega kya?,"Bhai, bas 'intentions fit' hai, body nahi 😂💪"
|
| 277 |
+
Tere boss ne kuch poocha?,"Haan bhai, bas HR jaisa dikh raha tha 😂💀"
|
| 278 |
+
Tune ghar ka kaam kiya?,"Bhai, bas dishes aur dreams donon dhoye 😂🍽️😴"
|
| 279 |
+
Tere weekend plans kya hai?,"Bhai, bas neend aur napka date hai 😂🛌"
|
| 280 |
+
Tu kal office gaya tha?,"Haan bhai, par attendance dene gaya tha bas 😂💼"
|
| 281 |
+
Tune ghar ka renovation kiya?,"Bhai, bas sofa ka position badla 😂🛋️"
|
| 282 |
+
Tu kal late kyun aaya?,"Bhai, traffic nahi, neend heavy thi 😂😴"
|
| 283 |
+
Tera net slow hai kya?,"Bhai, pigeon se bhi slow chal raha hai 😂🐦"
|
| 284 |
+
"Bhai, tu cricket khelta hai?","Haan bhai, bas fielding se dushmani hai 😂🏏"
|
| 285 |
+
Tu kal party gaya tha?,"Haan bhai, par free snacks pe zyada dhyan tha 😂🍕"
|
| 286 |
+
"Bhai, tere paas charger hai?","Haan bhai, par battery khatam hai 😂🔋"
|
| 287 |
+
Tune weekend pe kya kiya?,"Bhai, bas bed ke saath commitment nibha raha tha 😂🛏️"
|
| 288 |
+
"Bhai, salary gayi?","Haan bhai, udti chidiya bhi nahi thi, bas ud gayi 😂💸"
|
| 289 |
+
Tune naya meme dekha?,"Haan bhai, do baar share bhi kiya 😂📱"
|
| 290 |
+
"Bhai, tera pet bhag gaya?","Haan bhai, EMI dekh ke 😂🐕🦺"
|
| 291 |
+
Tera phone slow hai?,"Bhai, turtle bhi inspire ho jaye 😂🐢"
|
| 292 |
+
"Bhai, tu naya laptop le raha hai?","Haan bhai, EMI lene ka plan hai 😂💻"
|
| 293 |
+
Tune uska tweet dekha?,"Haan bhai, lagta hai usne Elon Musk ko hire kar liya 😂🐦"
|
| 294 |
+
"Bhai, kal late kyun aaya?","Bhai, neend ka heavy dose ho gaya tha 😂😴"
|
| 295 |
+
Tu kal gym gaya tha?,"Haan bhai, par bas locker ka lock khol ke aa gaya 😂🔒"
|
| 296 |
+
"Bhai, tera dukh kya hai?","Bhai, 'low balance' aur 'low battery' ek saath hai 😂💸🔋"
|
| 297 |
+
Tere gharwale strict hai?,"Bhai, 'WiFi password' se bhi zyada 😂🔒"
|
| 298 |
+
Tune naya filter try kiya?,"Haan bhai, asli se zyada fake lag raha tha 😂📸"
|
| 299 |
+
Tera dost cool hai?,"Bhai, uski coolness dekh ke AC bhi sharma jaye 😂❄️"
|
| 300 |
+
Tune ghar ka kaam kiya?,"Bhai, bas 'undo' button dhoond raha hoon 😂⏪"
|
| 301 |
+
Tu kal cricket khelne gaya?,"Haan bhai, par bas fielding se dushmani ho gayi 😂🏏"
|
| 302 |
+
"Bhai, kal kaam pe gaya?","Haan bhai, attendance dene gaya tha bas 😂💼"
|
| 303 |
+
"Bhai, kal ka cricket match dekha?","Haan bhai, bas 'last over' mein dil toota 😂💔🏏"
|
| 304 |
+
Tune naya song suna?,"Haan bhai, repeat pe chal raha hai 😂🎧"
|
| 305 |
+
Tera net slow hai?,"Bhai, pigeon se bhi slow chal raha hai 😂🐦"
|
| 306 |
+
"Bhai, tune naya meme dekha?","Haan bhai, battery khatam kar diya share karte 😂📱"
|
| 307 |
+
"Bhai, tu aaj kal busy rehta hai?","Haan bhai, bas 'workload' se zyada 'overthinkload' hai 😂💀"
|
| 308 |
+
Tune kal gym join kiya?,"Haan bhai, par bas 'selfie membership' li hai 📸😂"
|
| 309 |
+
"Bhai, tu diet pe hai?","Haan bhai, bas 'pizza' aur 'biryani' ko healthy maan liya 😂🍕🍗"
|
| 310 |
+
Tere boss ka mood kaisa hai?,"Bhai, lagta hai unke 'data plan' ka bhi expiry ho gaya 😂📉"
|
| 311 |
+
Tu kal kitne baje soya?,"Bhai, bas 'Netflix' ne raat ka 'delete button' daba diya 😂📺"
|
| 312 |
+
"Bhai, tera crush tujhe bhool gaya?","Haan bhai, par Instagram memories nahi 😂💔📱"
|
| 313 |
+
Tune weekend pe kya kiya?,"Bhai, bas 'bed' se zyada serious relationship mein tha 😂🛏️"
|
| 314 |
+
Tu kal late kyun aaya?,"Bhai, traffic nahi, bas 'mood swing' heavy tha 😂🚦"
|
| 315 |
+
Tune naya song suna?,"Haan bhai, ab toh 'repeat' se bhi dosti ho gayi 😂🎧"
|
| 316 |
+
Tera dukh kya hai?,"Bhai, salary bhi 'fast forward' mode mein chali gayi 😂💸"
|
| 317 |
+
Tu kal cricket khel raha tha?,"Haan bhai, par fielding se 'breakup' ho gaya 😂🏏"
|
| 318 |
+
Tere gharwale strict hai?,"Bhai, WiFi password se bhi zyada 😂🔒"
|
| 319 |
+
Tune naya filter try kiya?,"Haan bhai, asli se zyada 'animated' lag raha tha 😂📸"
|
| 320 |
+
Tu kal date pe gaya tha?,"Haan bhai, par bas 'pocket money' ka breakup ho gaya 😂💸"
|
| 321 |
+
"Bhai, weekend pe kya plan hai?","Bhai, full 'bedflix and nap' mode on hai 😂🛌🍿"
|
| 322 |
+
Tu gym nahi gaya?,"Bhai, bas 'motivation' ne leave le liya 😂💪"
|
| 323 |
+
Tera net slow hai?,"Bhai, turtle bhi 'fast and furious' lag raha hai 😂🐢"
|
| 324 |
+
"Bhai, tu cricket dekh raha hai?","Haan bhai, par umpire zyada entertaining hai 😂👀"
|
| 325 |
+
Tune ghar ka kaam kiya?,"Bhai, bas 'undo button' dhoond raha hoon 😂⏪"
|
| 326 |
+
Tera phone slow hai?,"Bhai, snail bhi jeet jayega race mein 😂🐌"
|
| 327 |
+
Tune naya reel banaya?,"Haan bhai, viral nahi hua, bas family ne dekha 😂📱"
|
| 328 |
+
Tere boss ne kuch bola?,"Haan bhai, bas 'mental gym' karwa rahe the 😂💀"
|
| 329 |
+
"Bhai, weekend pe outing?","Bhai, bas 'ghar se terrace' tak travel hoga 😂🏠"
|
| 330 |
+
Tune naya job apply kiya?,"Haan bhai, par HR ne 'seen' maar diya 😂💼"
|
| 331 |
+
Tu aaj kal busy hai?,"Haan bhai, bas 'meme scrolling' mein busy hoon 😂📱"
|
| 332 |
+
"Bhai, tu kal late kyun aaya?","Bhai, bas 'alarm snooze' mode mein chala gaya 😂⏰"
|
| 333 |
+
Tu naya phone le raha hai?,"Haan bhai, par EMI ka dukh zyada hai 😂📱💸"
|
| 334 |
+
"Bhai, tera laptop slow hai?","Bhai, lagta hai 'Windows 95' par chal raha hai 😂💻"
|
| 335 |
+
Tune ghar ka renovation kiya?,"Bhai, bas 'sofa' ka angle change kiya 😂🛋️"
|
| 336 |
+
"Bhai, kal movie dekhi?","Haan bhai, par bas popcorn ka bill yaad hai 😂🍿💸"
|
| 337 |
+
Tera dukh kya hai?,"Bhai, salary aur battery dono low hai 😂💸🔋"
|
| 338 |
+
Tune naya meme dekha?,"Haan bhai, share karte hi battery gayab 😂📱⚡"
|
| 339 |
+
Tu kal party gaya tha?,"Haan bhai, par free snacks hi target tha 😂🍕"
|
| 340 |
+
Tera boss kaisa hai?,"Bhai, 'Monday' se bhi zyada toxic hai 😂💀"
|
| 341 |
+
Tune ghar ka kaam kiya?,"Bhai, bas 'mind cleaning' kar raha tha 😂🧠"
|
| 342 |
+
"Bhai, kal cricket khela?","Haan bhai, bas ball dhoondhne gaya tha 😂🏏"
|
| 343 |
+
Tu kal late kyun aaya?,"Bhai, sapno mein fast forward ho gaya 😂💤"
|
| 344 |
+
"Bhai, tera net slow hai?","Haan bhai, '2G' bhi racer lag raha hai 😂🐢"
|
| 345 |
+
Tune naya game try kiya?,"Haan bhai, par boss ka 'deadline game' jeet gaya 😂🎮"
|
| 346 |
+
Tu aaj kal busy hai?,"Haan bhai, bas 'reality check' mein busy hoon 😂🔍"
|
| 347 |
+
"Bhai, kal kya kiya?","Bhai, bas 'procrastination' ka world record banaya 😂🏅"
|
| 348 |
+
Tu kal gym gaya tha?,"Haan bhai, bas 'reels' dekhne 😂📱"
|
| 349 |
+
"Bhai, tune naya reel dekha?","Haan bhai, copy paste bhi kar diya 😂🎥"
|
| 350 |
+
Tera dukh kya hai?,"Bhai, salary aayi bhi nahi gayi bhi 😂💸"
|
| 351 |
+
Tu kal cricket khel raha tha?,"Haan bhai, par sirf 'sledging' mein expert ho gaya 😂🏏"
|
| 352 |
+
"Bhai, weekend pe kya plan hai?","Bhai, bas 'ghar se sofa' tak travel hai 😂🛋️"
|
| 353 |
+
Tune kal movie dekhi?,"Haan bhai, bas ending hi nahi samjhi 😂🎬"
|
| 354 |
+
Tera net slow hai?,"Bhai, 'loading' se zyada 'buffering' hai 😂📶"
|
| 355 |
+
"Bhai, tu naya phone le raha hai?","Haan bhai, par 'battery life' ka breakup ho gaya 😂📱"
|
| 356 |
+
Tune ghar ka kaam kiya?,"Bhai, bas 'remote' ka workout kiya 😂📺"
|
| 357 |
+
Tu kal late kyun aaya?,"Bhai, bas 'snooze' pe 'snooze' maar raha tha 😂⏰"
|
| 358 |
+
Tune naya song suna?,"Haan bhai, ab toh 'repeat' pe chal raha hai 😂🎧"
|
| 359 |
+
Tera boss strict hai?,"Bhai, usko toh 'deadline' se bhi zyada 'deadline' ka darr hai 😂💼"
|
| 360 |
+
"Bhai, tu aaj kal busy hai?","Haan bhai, bas 'meme scrolling' mein busy hoon 😂📱"
|
| 361 |
+
"Bhai, kal kaam pe gaya?","Haan bhai, attendance dene gaya tha bas"
|
| 362 |
+
"Bhai, kal cricket match dekha?","Haan bhai, bas 'last over' mein dil toota"
|
| 363 |
+
"Bhai, weekend pe outing?","Bhai, bas 'ghar se terrace' tak travel hoga"
|
| 364 |
+
"Bhai, tu cricket khelta hai?","Haan bhai, bas fielding se dushmani hai"
|
| 365 |
+
"Bhai, kal ka plan?","Bhai, neend ki 'booking' full hai"
|
| 366 |
+
"Bhai, tu kal late kyun aaya?","Bhai, bas 'alarm snooze' mode mein chala gaya"
|
| 367 |
+
"Bhai, kal movie dekhi?","Haan bhai, par bas popcorn ka bill yaad hai"
|
| 368 |
+
"Bhai, tu aaj kal busy rehta hai?","Haan bhai, bas 'workload' se zyada 'overthinkload' hai"
|
| 369 |
+
"Bhai, kal ka cricket match dekha?","Haan bhai, par umpire zyada entertaining tha"
|
| 370 |
+
"Bhai, weekend pe kya plan hai?","Bhai, bas sapno mein Maldives jaane ka plan hai"
|
| 371 |
+
"Bhai, weekend pe kya scene?","Bhai, chill maarne ka full mood hai"
|
| 372 |
+
"Bhai, kal kaam pe gaya?","Haan bhai, bas attendance dene gaya tha"
|
| 373 |
+
"Bhai, kal ka cricket match dekha?","Haan bhai, bas 'last over' mein dil toota"
|
pages/Chat.py
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
from utils import (
|
| 3 |
+
load_model,
|
| 4 |
+
load_finetuned_model,
|
| 5 |
+
generate_response,
|
| 6 |
+
get_hf_token
|
| 7 |
+
)
|
| 8 |
+
import os
|
| 9 |
+
import json
|
| 10 |
+
from datetime import datetime
|
| 11 |
+
|
| 12 |
+
st.set_page_config(page_title="Gemma Chat", layout="wide")
|
| 13 |
+
|
| 14 |
+
# -------------------------------
|
| 15 |
+
# 💡 Theme Toggle
|
| 16 |
+
# -------------------------------
|
| 17 |
+
dark_mode = st.sidebar.toggle("🌙 Dark Mode", value=False)
|
| 18 |
+
|
| 19 |
+
if dark_mode:
|
| 20 |
+
st.markdown(
|
| 21 |
+
"""
|
| 22 |
+
<style>
|
| 23 |
+
body { background-color: #1e1e1e; color: #ffffff; }
|
| 24 |
+
.stTextInput, .stTextArea, .stSelectbox, .stSlider { color: #ffffff !important; }
|
| 25 |
+
</style>
|
| 26 |
+
""", unsafe_allow_html=True
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
st.title("💬 Chat with Gemma Model")
|
| 30 |
+
|
| 31 |
+
# -------------------------------
|
| 32 |
+
# 📌 Model Source Selection
|
| 33 |
+
# -------------------------------
|
| 34 |
+
model_source = st.sidebar.radio("📌 Select Model Source", ["Local (.pt)", "Hugging Face"])
|
| 35 |
+
|
| 36 |
+
# -------------------------------
|
| 37 |
+
# 🔥 Dynamic Model List
|
| 38 |
+
# -------------------------------
|
| 39 |
+
if model_source == "Local (.pt)":
|
| 40 |
+
model_dir = "models"
|
| 41 |
+
if not os.path.exists(model_dir):
|
| 42 |
+
os.makedirs(model_dir)
|
| 43 |
+
|
| 44 |
+
local_models = [f for f in os.listdir(model_dir) if f.endswith(".pt")]
|
| 45 |
+
|
| 46 |
+
if local_models:
|
| 47 |
+
selected_model = st.sidebar.selectbox("🛠️ Select Local Model", local_models)
|
| 48 |
+
model_path = os.path.join(model_dir, selected_model)
|
| 49 |
+
else:
|
| 50 |
+
st.warning("⚠️ No fine-tuned models found. Fine-tune a model first.")
|
| 51 |
+
st.stop()
|
| 52 |
+
|
| 53 |
+
else:
|
| 54 |
+
hf_models = [
|
| 55 |
+
"google/gemma-3-1b-it",
|
| 56 |
+
"google/gemma-3-4b-pt",
|
| 57 |
+
"google/gemma-3-4b-it",
|
| 58 |
+
"google/gemma-3-12b-pt",
|
| 59 |
+
"google/gemma-3-12b-it",
|
| 60 |
+
"google/gemma-3-27b-pt",
|
| 61 |
+
"google/gemma-3-27b-it"
|
| 62 |
+
]
|
| 63 |
+
selected_model = st.sidebar.selectbox("🛠️ Select Hugging Face Model", hf_models)
|
| 64 |
+
model_path = None
|
| 65 |
+
|
| 66 |
+
# -------------------------------
|
| 67 |
+
# 🔥 Model Loading
|
| 68 |
+
# -------------------------------
|
| 69 |
+
hf_token = get_hf_token()
|
| 70 |
+
|
| 71 |
+
if model_source == "Local (.pt)":
|
| 72 |
+
tokenizer, model = load_model("google/gemma-3-1b-it", hf_token) # Base model first
|
| 73 |
+
model = load_finetuned_model(model, model_path)
|
| 74 |
+
if model:
|
| 75 |
+
st.success(f"✅ Local fine-tuned model loaded: `{selected_model}`")
|
| 76 |
+
else:
|
| 77 |
+
st.error("❌ Failed to load local model.")
|
| 78 |
+
st.stop()
|
| 79 |
+
|
| 80 |
+
else:
|
| 81 |
+
tokenizer, model = load_model(selected_model, hf_token)
|
| 82 |
+
if model:
|
| 83 |
+
st.success(f"✅ Hugging Face model loaded: `{selected_model}`")
|
| 84 |
+
else:
|
| 85 |
+
st.error("❌ Failed to load Hugging Face model.")
|
| 86 |
+
st.stop()
|
| 87 |
+
|
| 88 |
+
# -------------------------------
|
| 89 |
+
# ⚙️ Model Configuration Panel
|
| 90 |
+
# -------------------------------
|
| 91 |
+
st.sidebar.header("⚙️ Model Configuration")
|
| 92 |
+
temperature = st.sidebar.slider("🔥 Temperature", 0.1, 1.5, 0.7, 0.1)
|
| 93 |
+
top_p = st.sidebar.slider("🎯 Top-p", 0.1, 1.0, 0.9, 0.1)
|
| 94 |
+
repetition_penalty = st.sidebar.slider("🔁 Repetition Penalty", 0.5, 2.0, 1.0, 0.1)
|
| 95 |
+
|
| 96 |
+
# -------------------------------
|
| 97 |
+
# 💬 Chat Interface
|
| 98 |
+
# -------------------------------
|
| 99 |
+
if "conversation" not in st.session_state:
|
| 100 |
+
st.session_state.conversation = []
|
| 101 |
+
|
| 102 |
+
prompt = st.text_area("💬 Enter your message:", "Hello, how are you?", key="prompt", height=100)
|
| 103 |
+
max_length = st.slider("📏 Max Response Length", min_value=50, max_value=1000, value=300, step=50)
|
| 104 |
+
|
| 105 |
+
# -------------------------------
|
| 106 |
+
# 🚀 Streaming Response Function
|
| 107 |
+
# -------------------------------
|
| 108 |
+
def stream_response():
|
| 109 |
+
"""
|
| 110 |
+
Streams the response token by token.
|
| 111 |
+
"""
|
| 112 |
+
response = generate_response(prompt, model, tokenizer, max_length)
|
| 113 |
+
|
| 114 |
+
if response:
|
| 115 |
+
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
| 116 |
+
st.session_state.conversation.append({"sender": "👤 You", "message": prompt, "timestamp": timestamp})
|
| 117 |
+
st.session_state.conversation.append({"sender": "🤖 AI", "message": response, "timestamp": timestamp})
|
| 118 |
+
return response
|
| 119 |
+
else:
|
| 120 |
+
st.error("❌ Failed to generate response.")
|
| 121 |
+
return None
|
| 122 |
+
|
| 123 |
+
# -------------------------------
|
| 124 |
+
# 🎯 Conversation Controls
|
| 125 |
+
# -------------------------------
|
| 126 |
+
col1, col2, col3 = st.columns([1, 1, 1])
|
| 127 |
+
|
| 128 |
+
if col1.button("🚀 Generate (CTRL+Enter)", help="Use CTRL + Enter to generate"):
|
| 129 |
+
stream_response()
|
| 130 |
+
|
| 131 |
+
if col2.button("🗑️ Clear Conversation"):
|
| 132 |
+
st.session_state.conversation = []
|
| 133 |
+
|
| 134 |
+
# Export & Import
|
| 135 |
+
if col3.download_button("📥 Export Chat", json.dumps(st.session_state.conversation, indent=4), "chat_history.json"):
|
| 136 |
+
st.success("✅ Chat exported successfully!")
|
| 137 |
+
|
| 138 |
+
uploaded_file = st.file_uploader("📤 Import Conversation", type=["json"])
|
| 139 |
+
|
| 140 |
+
if uploaded_file is not None:
|
| 141 |
+
st.session_state.conversation = json.load(uploaded_file)
|
| 142 |
+
st.success("✅ Conversation imported successfully!")
|
| 143 |
+
|
| 144 |
+
# -------------------------------
|
| 145 |
+
# 🛠️ Display Conversation
|
| 146 |
+
# -------------------------------
|
| 147 |
+
st.subheader("📜 Conversation History")
|
| 148 |
+
|
| 149 |
+
for msg in st.session_state.conversation:
|
| 150 |
+
with st.container():
|
| 151 |
+
st.markdown(f"**{msg['sender']}** \n🕒 {msg['timestamp']}")
|
| 152 |
+
st.write(msg['message'])
|
| 153 |
+
|
pages/Conversion.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
from utils import load_model, convert_to_torchscript, convert_to_onnx, get_hf_token
|
| 3 |
+
|
| 4 |
+
st.title("🔧 Model Conversion")
|
| 5 |
+
|
| 6 |
+
# Load the HF token from utils
|
| 7 |
+
hf_token = get_hf_token()
|
| 8 |
+
|
| 9 |
+
# Load the model
|
| 10 |
+
model_path = "fine_tuned_model.pt"
|
| 11 |
+
tokenizer, model = load_model("google/gemma-3-1b-it", hf_token, model_path)
|
| 12 |
+
|
| 13 |
+
conversion_option = st.selectbox("Select Conversion Format", ["TorchScript", "ONNX"])
|
| 14 |
+
|
| 15 |
+
if st.button("Convert Model"):
|
| 16 |
+
if conversion_option == "TorchScript":
|
| 17 |
+
with st.spinner("Converting to TorchScript..."):
|
| 18 |
+
ts_model = convert_to_torchscript(model)
|
| 19 |
+
st.success("Model converted to TorchScript!")
|
| 20 |
+
|
| 21 |
+
elif conversion_option == "ONNX":
|
| 22 |
+
with st.spinner("Converting to ONNX..."):
|
| 23 |
+
onnx_path = convert_to_onnx(model)
|
| 24 |
+
st.success("Model converted to ONNX!")
|
pages/Dataset_Management.py
ADDED
|
@@ -0,0 +1,360 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
import matplotlib.pyplot as plt
|
| 5 |
+
import seaborn as sns
|
| 6 |
+
import os
|
| 7 |
+
from utils import (
|
| 8 |
+
load_dataset,
|
| 9 |
+
save_dataset,
|
| 10 |
+
clean_dataset,
|
| 11 |
+
compute_dataset_score,
|
| 12 |
+
detect_outliers,
|
| 13 |
+
apply_transformation,
|
| 14 |
+
list_datasets,
|
| 15 |
+
detect_inconsistent_types
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
# -------------------------------
|
| 19 |
+
# Constants & Setup
|
| 20 |
+
# -------------------------------
|
| 21 |
+
DATASET_DIR = "datasets"
|
| 22 |
+
DEFAULT_DATASET = "train_data.csv"
|
| 23 |
+
os.makedirs(DATASET_DIR, exist_ok=True) # Ensure directory exists
|
| 24 |
+
|
| 25 |
+
# -------------------------------
|
| 26 |
+
# Sidebar: Dataset Selection
|
| 27 |
+
# -------------------------------
|
| 28 |
+
st.sidebar.header("📊 Dataset Selection")
|
| 29 |
+
|
| 30 |
+
# List available datasets from the datasets folder
|
| 31 |
+
available_datasets = list_datasets(DATASET_DIR)
|
| 32 |
+
dataset_choice = st.sidebar.radio("Choose Dataset Source:", ["Select Existing Dataset", "Upload New Dataset"])
|
| 33 |
+
|
| 34 |
+
dataset_path = None
|
| 35 |
+
|
| 36 |
+
if dataset_choice == "Select Existing Dataset":
|
| 37 |
+
if available_datasets:
|
| 38 |
+
selected_dataset = st.sidebar.selectbox("Select Dataset:", available_datasets)
|
| 39 |
+
dataset_path = os.path.join(DATASET_DIR, selected_dataset)
|
| 40 |
+
st.sidebar.success(f"Using `{selected_dataset}` dataset.")
|
| 41 |
+
else:
|
| 42 |
+
st.sidebar.warning("No datasets found. Please upload a new dataset.")
|
| 43 |
+
elif dataset_choice == "Upload New Dataset":
|
| 44 |
+
uploaded_file = st.sidebar.file_uploader("Upload Dataset (CSV, JSON, or Excel)", type=["csv", "json", "xlsx"])
|
| 45 |
+
if uploaded_file:
|
| 46 |
+
file_ext = uploaded_file.name.split('.')[-1].lower()
|
| 47 |
+
try:
|
| 48 |
+
if file_ext == "csv":
|
| 49 |
+
new_df = pd.read_csv(uploaded_file)
|
| 50 |
+
elif file_ext == "json":
|
| 51 |
+
new_df = pd.json_normalize(json.load(uploaded_file))
|
| 52 |
+
elif file_ext == "xlsx":
|
| 53 |
+
new_df = pd.read_excel(uploaded_file)
|
| 54 |
+
else:
|
| 55 |
+
st.error("Unsupported file format.")
|
| 56 |
+
st.stop()
|
| 57 |
+
except Exception as e:
|
| 58 |
+
st.error(f"Error reading file: {e}")
|
| 59 |
+
st.stop()
|
| 60 |
+
|
| 61 |
+
# Save the new dataset with its filename
|
| 62 |
+
dataset_path = os.path.join(DATASET_DIR, uploaded_file.name)
|
| 63 |
+
save_dataset(new_df, dataset_path)
|
| 64 |
+
st.sidebar.success(f"Dataset `{uploaded_file.name}` uploaded successfully!")
|
| 65 |
+
available_datasets = list_datasets(DATASET_DIR) # Refresh list
|
| 66 |
+
else:
|
| 67 |
+
st.sidebar.warning("Please upload a dataset.")
|
| 68 |
+
|
| 69 |
+
# -------------------------------
|
| 70 |
+
# Load the Selected Dataset
|
| 71 |
+
# -------------------------------
|
| 72 |
+
if dataset_path:
|
| 73 |
+
df = load_dataset(dataset_path)
|
| 74 |
+
if df.empty:
|
| 75 |
+
st.warning("Dataset is empty or failed to load.")
|
| 76 |
+
else:
|
| 77 |
+
df = pd.DataFrame()
|
| 78 |
+
st.warning("No dataset selected. Please choose or upload a dataset.")
|
| 79 |
+
|
| 80 |
+
# -------------------------------
|
| 81 |
+
# Main App Title & Description
|
| 82 |
+
# -------------------------------
|
| 83 |
+
st.title("📊 The Data Hub")
|
| 84 |
+
|
| 85 |
+
# -------------------------------
|
| 86 |
+
# Tabs for Operations
|
| 87 |
+
# -------------------------------
|
| 88 |
+
tabs = st.tabs([
|
| 89 |
+
"View & Summary", "Clean Data",
|
| 90 |
+
"Visualize Data", "Data Profiling",
|
| 91 |
+
"Outlier Detection", "Custom Transformations",
|
| 92 |
+
"Export"
|
| 93 |
+
])
|
| 94 |
+
|
| 95 |
+
# -------------------------------
|
| 96 |
+
# Tab 1: View & Summary
|
| 97 |
+
# -------------------------------
|
| 98 |
+
with tabs[0]:
|
| 99 |
+
st.subheader("📋 Current Dataset Preview")
|
| 100 |
+
if not df.empty:
|
| 101 |
+
st.dataframe(df.head(10))
|
| 102 |
+
st.markdown("#### 🔎 Basic Statistics")
|
| 103 |
+
st.write(df.describe(include="all"))
|
| 104 |
+
else:
|
| 105 |
+
st.warning("No dataset available. Please choose or upload a dataset.")
|
| 106 |
+
|
| 107 |
+
# -------------------------------
|
| 108 |
+
# Tab 2: Clean Data
|
| 109 |
+
# -------------------------------
|
| 110 |
+
with tabs[1]:
|
| 111 |
+
st.subheader("🧼 Clean Your Dataset")
|
| 112 |
+
if not df.empty:
|
| 113 |
+
remove_duplicates = st.checkbox("Remove Duplicate Rows", value=True)
|
| 114 |
+
fill_missing = st.checkbox("Fill Missing Values", value=False)
|
| 115 |
+
fill_value = st.text_input("Fill missing values with:", value="0")
|
| 116 |
+
|
| 117 |
+
st.markdown("#### Optional: Rename Columns")
|
| 118 |
+
new_names = {}
|
| 119 |
+
for col in df.columns:
|
| 120 |
+
new_names[col] = st.text_input(f"Rename column '{col}'", value=col)
|
| 121 |
+
|
| 122 |
+
if st.button("Clean Dataset"):
|
| 123 |
+
cleaned_df = clean_dataset(df, remove_duplicates, fill_missing, fill_value)
|
| 124 |
+
cleaned_df = cleaned_df.rename(columns=new_names)
|
| 125 |
+
save_dataset(cleaned_df, dataset_path)
|
| 126 |
+
st.success("✅ Dataset cleaned successfully!")
|
| 127 |
+
st.dataframe(cleaned_df.head())
|
| 128 |
+
df = cleaned_df
|
| 129 |
+
else:
|
| 130 |
+
st.warning("No dataset available for cleaning.")
|
| 131 |
+
|
| 132 |
+
# -------------------------------
|
| 133 |
+
# Tab 3: Visualize Data (Fixed KeyError Issue)
|
| 134 |
+
# -------------------------------
|
| 135 |
+
with tabs[2]:
|
| 136 |
+
st.subheader("📊 Visualize Your Data")
|
| 137 |
+
|
| 138 |
+
if not df.empty:
|
| 139 |
+
viz_type = st.selectbox("Select Visualization Type", ["Histogram", "Scatter", "Box Plot", "Heatmap", "Line Chart"])
|
| 140 |
+
numeric_cols = df.select_dtypes(include=["number"]).columns.tolist()
|
| 141 |
+
|
| 142 |
+
if numeric_cols:
|
| 143 |
+
# Validate column selection
|
| 144 |
+
col = st.selectbox("Select Column", numeric_cols)
|
| 145 |
+
|
| 146 |
+
if col: # Ensure valid column selection
|
| 147 |
+
fig, ax = plt.subplots()
|
| 148 |
+
|
| 149 |
+
if viz_type == "Histogram":
|
| 150 |
+
ax.hist(df[col].dropna(), bins=20, color="skyblue", edgecolor="black")
|
| 151 |
+
elif viz_type == "Box Plot":
|
| 152 |
+
sns.boxplot(x=df[col].dropna(), ax=ax)
|
| 153 |
+
elif viz_type == "Scatter":
|
| 154 |
+
x_col = st.selectbox("X-axis", numeric_cols)
|
| 155 |
+
y_col = st.selectbox("Y-axis", numeric_cols)
|
| 156 |
+
if x_col and y_col:
|
| 157 |
+
ax.scatter(df[x_col], df[y_col], color="green")
|
| 158 |
+
elif viz_type == "Heatmap":
|
| 159 |
+
corr = df[numeric_cols].corr()
|
| 160 |
+
sns.heatmap(corr, annot=True, cmap="coolwarm", ax=ax)
|
| 161 |
+
elif viz_type == "Line Chart":
|
| 162 |
+
ax.plot(df.index, df[col], marker="o")
|
| 163 |
+
|
| 164 |
+
st.pyplot(fig)
|
| 165 |
+
else:
|
| 166 |
+
st.warning("Please select a valid column.")
|
| 167 |
+
else:
|
| 168 |
+
st.warning("No numeric columns available for visualization.")
|
| 169 |
+
else:
|
| 170 |
+
st.warning("No dataset available for visualization.")
|
| 171 |
+
|
| 172 |
+
# -------------------------------
|
| 173 |
+
# Tab 4: Data Profiling
|
| 174 |
+
# -------------------------------
|
| 175 |
+
with tabs[3]:
|
| 176 |
+
if not df.empty:
|
| 177 |
+
|
| 178 |
+
# -------------------------------
|
| 179 |
+
# 1. General Dataset Info
|
| 180 |
+
# -------------------------------
|
| 181 |
+
st.markdown("### 🛠️ General Information")
|
| 182 |
+
st.write(f"✅ **Total Rows:** `{df.shape[0]}`")
|
| 183 |
+
st.write(f"✅ **Total Columns:** `{df.shape[1]}`")
|
| 184 |
+
st.write(f"✅ **Memory Usage:** `{df.memory_usage(deep=True).sum() / (1024 ** 2):.2f} MB`")
|
| 185 |
+
st.write(f"✅ **Dataset Shape:** `{df.shape}`")
|
| 186 |
+
|
| 187 |
+
# -------------------------------
|
| 188 |
+
# 2. Dataset Quality Score
|
| 189 |
+
# -------------------------------
|
| 190 |
+
st.markdown("### 📊 Dataset Quality Score")
|
| 191 |
+
score = compute_dataset_score(df)
|
| 192 |
+
st.success(f"💯 Dataset Quality Score: `{score} / 100`")
|
| 193 |
+
|
| 194 |
+
# -------------------------------
|
| 195 |
+
# 3. Column Overview with Stats
|
| 196 |
+
# -------------------------------
|
| 197 |
+
st.markdown("### 🔥 Column Overview")
|
| 198 |
+
|
| 199 |
+
# Numeric and categorical columns
|
| 200 |
+
numeric_cols = df.select_dtypes(include=["number"]).columns
|
| 201 |
+
categorical_cols = df.select_dtypes(include=["object"]).columns
|
| 202 |
+
|
| 203 |
+
profile = pd.DataFrame({
|
| 204 |
+
"Column": df.columns,
|
| 205 |
+
"Data Type": df.dtypes.values,
|
| 206 |
+
"Missing Values": df.isnull().sum().values,
|
| 207 |
+
"Missing %": (df.isnull().sum() / len(df) * 100).values,
|
| 208 |
+
"Unique Values": df.nunique().values
|
| 209 |
+
})
|
| 210 |
+
|
| 211 |
+
# Add numeric statistics
|
| 212 |
+
if len(numeric_cols) > 0:
|
| 213 |
+
numeric_stats = pd.DataFrame({
|
| 214 |
+
"Column": numeric_cols,
|
| 215 |
+
"Min": df[numeric_cols].min().values,
|
| 216 |
+
"Max": df[numeric_cols].max().values,
|
| 217 |
+
"Mean": df[numeric_cols].mean().values,
|
| 218 |
+
"Std Dev": df[numeric_cols].std().values,
|
| 219 |
+
"Skewness": df[numeric_cols].skew().values,
|
| 220 |
+
"Kurtosis": df[numeric_cols].kurt().values
|
| 221 |
+
})
|
| 222 |
+
|
| 223 |
+
# Merge stats with the profile
|
| 224 |
+
profile = profile.merge(numeric_stats, on="Column", how="left")
|
| 225 |
+
|
| 226 |
+
st.dataframe(profile)
|
| 227 |
+
|
| 228 |
+
# -------------------------------
|
| 229 |
+
# 4. Missing Values Visualization
|
| 230 |
+
# -------------------------------
|
| 231 |
+
st.markdown("### 🔎 Missing Values Distribution")
|
| 232 |
+
missing_values = df.isnull().sum()
|
| 233 |
+
missing_values = missing_values[missing_values > 0]
|
| 234 |
+
|
| 235 |
+
if not missing_values.empty:
|
| 236 |
+
fig, ax = plt.subplots(figsize=(12, 5))
|
| 237 |
+
sns.barplot(x=missing_values.index, y=missing_values.values, ax=ax, color="skyblue")
|
| 238 |
+
ax.set_title("Missing Values per Column")
|
| 239 |
+
ax.set_ylabel("Missing Count")
|
| 240 |
+
ax.set_xticklabels(ax.get_xticklabels(), rotation=45)
|
| 241 |
+
st.pyplot(fig)
|
| 242 |
+
else:
|
| 243 |
+
st.success("No missing values found!")
|
| 244 |
+
|
| 245 |
+
# -------------------------------
|
| 246 |
+
# 5. Duplicates Detection
|
| 247 |
+
# -------------------------------
|
| 248 |
+
st.markdown("### 🔥 Duplicates & Constant Columns Detection")
|
| 249 |
+
|
| 250 |
+
# Duplicates
|
| 251 |
+
duplicate_count = df.duplicated().sum()
|
| 252 |
+
st.write(f"🔁 **Duplicate Rows:** `{duplicate_count}`")
|
| 253 |
+
|
| 254 |
+
# Constant Columns
|
| 255 |
+
constant_cols = [col for col in df.columns if df[col].nunique() == 1]
|
| 256 |
+
if constant_cols:
|
| 257 |
+
st.write(f"🚩 **Constant Columns:** `{constant_cols}`")
|
| 258 |
+
else:
|
| 259 |
+
st.success("No constant columns detected!")
|
| 260 |
+
|
| 261 |
+
# -------------------------------
|
| 262 |
+
# 6. Cardinality Analysis
|
| 263 |
+
# -------------------------------
|
| 264 |
+
st.markdown("### 🧬 Cardinality Analysis")
|
| 265 |
+
|
| 266 |
+
high_cardinality = [col for col in df.columns if df[col].nunique() > len(df) * 0.8]
|
| 267 |
+
if high_cardinality:
|
| 268 |
+
st.write(f"🔢 **High-Cardinality Columns:** `{high_cardinality}`")
|
| 269 |
+
else:
|
| 270 |
+
st.success("No high-cardinality columns detected!")
|
| 271 |
+
|
| 272 |
+
# -------------------------------
|
| 273 |
+
# 7. Top Frequent & Rare Values
|
| 274 |
+
# -------------------------------
|
| 275 |
+
st.markdown("### 🎯 Frequent & Rare Values")
|
| 276 |
+
|
| 277 |
+
for col in categorical_cols:
|
| 278 |
+
st.write(f"✅ **{col}**")
|
| 279 |
+
|
| 280 |
+
top_values = df[col].value_counts().nlargest(5)
|
| 281 |
+
rare_values = df[col].value_counts().nsmallest(5)
|
| 282 |
+
|
| 283 |
+
st.write("📊 **Top Frequent Values:**")
|
| 284 |
+
st.dataframe(top_values)
|
| 285 |
+
|
| 286 |
+
st.write("🧪 **Rare Values:**")
|
| 287 |
+
st.dataframe(rare_values)
|
| 288 |
+
|
| 289 |
+
# -------------------------------
|
| 290 |
+
# 8. Correlation Matrix
|
| 291 |
+
# -------------------------------
|
| 292 |
+
st.markdown("### 📊 Correlation Matrix")
|
| 293 |
+
|
| 294 |
+
if len(numeric_cols) > 1:
|
| 295 |
+
corr = df[numeric_cols].corr()
|
| 296 |
+
|
| 297 |
+
fig, ax = plt.subplots(figsize=(12, 8))
|
| 298 |
+
sns.heatmap(corr, annot=True, fmt=".2f", cmap="coolwarm", square=True, ax=ax)
|
| 299 |
+
st.pyplot(fig)
|
| 300 |
+
else:
|
| 301 |
+
st.info("Not enough numeric columns for correlation analysis.")
|
| 302 |
+
|
| 303 |
+
# -------------------------------
|
| 304 |
+
# 9. Pair Plot (Numerical Relationships)
|
| 305 |
+
# -------------------------------
|
| 306 |
+
st.markdown("### 🔥 Pair Plot (Numerical Relationships)")
|
| 307 |
+
|
| 308 |
+
if len(numeric_cols) >= 2:
|
| 309 |
+
pairplot = sns.pairplot(df[numeric_cols], diag_kind='kde')
|
| 310 |
+
st.pyplot(pairplot.fig)
|
| 311 |
+
else:
|
| 312 |
+
st.info("Not enough numeric columns for pair plot visualization.")
|
| 313 |
+
|
| 314 |
+
# -------------------------------
|
| 315 |
+
# 10. Outlier Detection
|
| 316 |
+
# -------------------------------
|
| 317 |
+
st.markdown("### 🚩 Outlier Detection")
|
| 318 |
+
|
| 319 |
+
outliers = detect_outliers(df)
|
| 320 |
+
if outliers:
|
| 321 |
+
st.write("✅ **Outliers Detected:**")
|
| 322 |
+
st.dataframe(pd.DataFrame(outliers.items(), columns=["Column", "Outlier Count"]))
|
| 323 |
+
else:
|
| 324 |
+
st.success("No significant outliers detected!")
|
| 325 |
+
|
| 326 |
+
# -------------------------------
|
| 327 |
+
# 11. Inconsistent Data Types
|
| 328 |
+
# -------------------------------
|
| 329 |
+
st.markdown("### 🚫 Inconsistent Data Types")
|
| 330 |
+
|
| 331 |
+
inconsistent_types = detect_inconsistent_types(df)
|
| 332 |
+
if inconsistent_types:
|
| 333 |
+
st.write("⚠️ **Inconsistent Data Types Detected:**")
|
| 334 |
+
st.write(inconsistent_types)
|
| 335 |
+
else:
|
| 336 |
+
st.success("No inconsistent data types detected!")
|
| 337 |
+
|
| 338 |
+
else:
|
| 339 |
+
st.warning("No dataset available for profiling.")
|
| 340 |
+
|
| 341 |
+
# -------------------------------
|
| 342 |
+
# Tab 5: Outlier Detection
|
| 343 |
+
# -------------------------------
|
| 344 |
+
with tabs[4]:
|
| 345 |
+
st.subheader("🚀 Outlier Detection")
|
| 346 |
+
if not df.empty:
|
| 347 |
+
outliers = detect_outliers(df)
|
| 348 |
+
st.write(outliers)
|
| 349 |
+
else:
|
| 350 |
+
st.warning("No dataset available for outlier detection.")
|
| 351 |
+
|
| 352 |
+
# -------------------------------
|
| 353 |
+
# Tab 6: Export
|
| 354 |
+
# -------------------------------
|
| 355 |
+
with tabs[5]:
|
| 356 |
+
st.subheader("📤 Export Dataset")
|
| 357 |
+
export_format = st.selectbox("Export Format", ["CSV", "Excel", "JSON"])
|
| 358 |
+
if not df.empty:
|
| 359 |
+
st.download_button("Download", df.to_csv(index=False), f"dataset.{export_format.lower()}")
|
| 360 |
+
|
pages/Finetune.py
ADDED
|
@@ -0,0 +1,170 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import json
|
| 4 |
+
import os
|
| 5 |
+
from datetime import datetime
|
| 6 |
+
from utils import (
|
| 7 |
+
load_model,
|
| 8 |
+
get_hf_token,
|
| 9 |
+
simulate_training,
|
| 10 |
+
plot_training_metrics,
|
| 11 |
+
load_finetuned_model,
|
| 12 |
+
save_model
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
st.title("🔥 Fine-tune the Gemma Model")
|
| 16 |
+
|
| 17 |
+
# -------------------------------
|
| 18 |
+
# Finetuning Option Selection
|
| 19 |
+
# -------------------------------
|
| 20 |
+
finetune_option = st.radio("Select Finetuning Option", ["Fine-tune from scratch", "Refinetune existing model"])
|
| 21 |
+
|
| 22 |
+
# -------------------------------
|
| 23 |
+
# Model Selection Logic
|
| 24 |
+
# -------------------------------
|
| 25 |
+
selected_model = None
|
| 26 |
+
saved_model_path = None
|
| 27 |
+
|
| 28 |
+
if finetune_option == "Fine-tune from scratch":
|
| 29 |
+
# Display Hugging Face model list
|
| 30 |
+
model_list = [
|
| 31 |
+
"google/gemma-3-1b-pt",
|
| 32 |
+
"google/gemma-3-1b-it",
|
| 33 |
+
"google/gemma-3-4b-pt",
|
| 34 |
+
"google/gemma-3-4b-it",
|
| 35 |
+
"google/gemma-3-12b-pt",
|
| 36 |
+
"google/gemma-3-12b-it",
|
| 37 |
+
"google/gemma-3-27b-pt",
|
| 38 |
+
"google/gemma-3-27b-it"
|
| 39 |
+
]
|
| 40 |
+
selected_model = st.selectbox("🛠️ Select Gemma Model to Fine-tune", model_list)
|
| 41 |
+
|
| 42 |
+
elif finetune_option == "Refinetune existing model":
|
| 43 |
+
# Dynamically list all saved models from the /models folder
|
| 44 |
+
model_dir = "models"
|
| 45 |
+
|
| 46 |
+
if os.path.exists(model_dir):
|
| 47 |
+
saved_models = [f for f in os.listdir(model_dir) if f.endswith(".pt")]
|
| 48 |
+
else:
|
| 49 |
+
saved_models = []
|
| 50 |
+
|
| 51 |
+
if saved_models:
|
| 52 |
+
saved_model_path = st.selectbox("Select a saved model to re-finetune", saved_models)
|
| 53 |
+
saved_model_path = os.path.join(model_dir, saved_model_path)
|
| 54 |
+
st.success(f"✅ Selected model for refinement: `{saved_model_path}`")
|
| 55 |
+
else:
|
| 56 |
+
st.warning("⚠️ No saved models found! Switching to fine-tuning from scratch.")
|
| 57 |
+
finetune_option = "Fine-tune from scratch"
|
| 58 |
+
|
| 59 |
+
# -------------------------------
|
| 60 |
+
# Dataset Selection
|
| 61 |
+
# -------------------------------
|
| 62 |
+
|
| 63 |
+
st.subheader("📚 Dataset Selection")
|
| 64 |
+
|
| 65 |
+
# Dataset source selection
|
| 66 |
+
dataset_option = st.radio("Choose dataset:", ["Upload New Dataset", "Use Existing Dataset (`train_data.csv`)"])
|
| 67 |
+
|
| 68 |
+
dataset_path = "train_data.csv"
|
| 69 |
+
|
| 70 |
+
if dataset_option == "Upload New Dataset":
|
| 71 |
+
uploaded_file = st.file_uploader("📤 Upload Dataset (CSV or JSON)", type=["csv", "json"])
|
| 72 |
+
|
| 73 |
+
if uploaded_file is not None:
|
| 74 |
+
# Handle CSV or JSON upload
|
| 75 |
+
if uploaded_file.name.endswith(".csv"):
|
| 76 |
+
new_data = pd.read_csv(uploaded_file)
|
| 77 |
+
elif uploaded_file.name.endswith(".json"):
|
| 78 |
+
json_data = json.load(uploaded_file)
|
| 79 |
+
new_data = pd.json_normalize(json_data)
|
| 80 |
+
else:
|
| 81 |
+
st.error("❌ Unsupported file format. Please upload CSV or JSON.")
|
| 82 |
+
st.stop()
|
| 83 |
+
|
| 84 |
+
# Append or create new dataset
|
| 85 |
+
if os.path.exists(dataset_path):
|
| 86 |
+
new_data.to_csv(dataset_path, mode='a', index=False, header=False)
|
| 87 |
+
st.success(f"✅ Data appended to `{dataset_path}`!")
|
| 88 |
+
else:
|
| 89 |
+
new_data.to_csv(dataset_path, index=False)
|
| 90 |
+
st.success(f"✅ Dataset saved as `{dataset_path}`!")
|
| 91 |
+
|
| 92 |
+
elif dataset_option == "Use Existing Dataset (`train_data.csv`)":
|
| 93 |
+
if os.path.exists(dataset_path):
|
| 94 |
+
st.success("✅ Using existing `train_data.csv` for fine-tuning.")
|
| 95 |
+
else:
|
| 96 |
+
st.error("❌ `train_data.csv` not found! Please upload a new dataset.")
|
| 97 |
+
st.stop()
|
| 98 |
+
|
| 99 |
+
# -------------------------------
|
| 100 |
+
# Hyperparameters Configuration
|
| 101 |
+
# -------------------------------
|
| 102 |
+
learning_rate = st.number_input("📊 Learning Rate", value=1e-4, format="%.5f")
|
| 103 |
+
batch_size = st.number_input("🛠️ Batch Size", value=16, step=1)
|
| 104 |
+
epochs = st.number_input("⏱️ Epochs", value=3, step=1)
|
| 105 |
+
|
| 106 |
+
# -------------------------------
|
| 107 |
+
# Fine-tuning Execution
|
| 108 |
+
# -------------------------------
|
| 109 |
+
if st.button("🚀 Start Fine-tuning"):
|
| 110 |
+
st.info(f"Fine-tuning process initiated...")
|
| 111 |
+
|
| 112 |
+
# Retrieve Hugging Face Token
|
| 113 |
+
hf_token = get_hf_token()
|
| 114 |
+
|
| 115 |
+
# Model loading logic
|
| 116 |
+
if finetune_option == "Refinetune existing model" and saved_model_path:
|
| 117 |
+
# Load the base model first
|
| 118 |
+
tokenizer, model = load_model("google/gemma-3-1b-it", hf_token)
|
| 119 |
+
|
| 120 |
+
# Load the saved model checkpoint for re-finetuning
|
| 121 |
+
model = load_finetuned_model(model, saved_model_path)
|
| 122 |
+
|
| 123 |
+
if model:
|
| 124 |
+
st.success(f"✅ Loaded saved model: `{saved_model_path}` for refinement!")
|
| 125 |
+
else:
|
| 126 |
+
st.error("❌ Failed to load the saved model. Aborting.")
|
| 127 |
+
st.stop()
|
| 128 |
+
|
| 129 |
+
else:
|
| 130 |
+
# Fine-tune from scratch (load base model)
|
| 131 |
+
if not selected_model:
|
| 132 |
+
st.error("❌ Please select a model to fine-tune.")
|
| 133 |
+
st.stop()
|
| 134 |
+
|
| 135 |
+
tokenizer, model = load_model(selected_model, hf_token)
|
| 136 |
+
|
| 137 |
+
if model:
|
| 138 |
+
st.success(f"✅ Base model loaded: `{selected_model}`")
|
| 139 |
+
else:
|
| 140 |
+
st.error("❌ Failed to load the base model. Aborting.")
|
| 141 |
+
st.stop()
|
| 142 |
+
|
| 143 |
+
# Simulate fine-tuning loop
|
| 144 |
+
progress_bar = st.progress(0)
|
| 145 |
+
training_placeholder = st.empty()
|
| 146 |
+
|
| 147 |
+
for epoch, losses, accs in simulate_training(epochs):
|
| 148 |
+
fig = plot_training_metrics(epoch, losses, accs)
|
| 149 |
+
training_placeholder.pyplot(fig)
|
| 150 |
+
progress_bar.progress(epoch / epochs)
|
| 151 |
+
|
| 152 |
+
# Save fine-tuned model with timestamp
|
| 153 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 154 |
+
new_model_name = f"models/fine_tuned_model_{selected_model.replace('/', '_')}_{timestamp}.pt"
|
| 155 |
+
|
| 156 |
+
# Save the fine-tuned model
|
| 157 |
+
saved_model_path = save_model(model, new_model_name)
|
| 158 |
+
|
| 159 |
+
if saved_model_path:
|
| 160 |
+
st.success(f"✅ Fine-tuning completed! Model saved as `{saved_model_path}`")
|
| 161 |
+
|
| 162 |
+
# Load the fine-tuned model for immediate inference
|
| 163 |
+
model = load_finetuned_model(model, saved_model_path)
|
| 164 |
+
|
| 165 |
+
if model:
|
| 166 |
+
st.success("🛠️ Fine-tuned model loaded and ready for inference!")
|
| 167 |
+
else:
|
| 168 |
+
st.error("❌ Failed to load the fine-tuned model for inference.")
|
| 169 |
+
else:
|
| 170 |
+
st.error("❌ Failed to save the fine-tuned model.")
|
requirements.txt
CHANGED
|
@@ -3,4 +3,10 @@ pandas==2.2.3
|
|
| 3 |
numpy==2.2.4
|
| 4 |
torch==2.6.0
|
| 5 |
transformers @ git+https://github.com/huggingface/transformers@v4.49.0-Gemma-3
|
| 6 |
-
matplotlib==3.10.1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
numpy==2.2.4
|
| 4 |
torch==2.6.0
|
| 5 |
transformers @ git+https://github.com/huggingface/transformers@v4.49.0-Gemma-3
|
| 6 |
+
matplotlib==3.10.1
|
| 7 |
+
rich>=13.1.0
|
| 8 |
+
FuzzyTM>=0.4.0
|
| 9 |
+
requests>=2.28.0
|
| 10 |
+
xlsxwriter>=3.0.1
|
| 11 |
+
python-dotenv>=0.19.0
|
| 12 |
+
scipy>=1.7.3
|
utils.py
ADDED
|
@@ -0,0 +1,468 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
import torch
|
| 5 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 6 |
+
import matplotlib.pyplot as plt
|
| 7 |
+
import time
|
| 8 |
+
import json
|
| 9 |
+
import re
|
| 10 |
+
import os
|
| 11 |
+
import asyncio
|
| 12 |
+
from dotenv import load_dotenv
|
| 13 |
+
from scipy.stats import skew, kurtosis, zscore
|
| 14 |
+
|
| 15 |
+
# -------------------------------
|
| 16 |
+
# Environment and Token Management
|
| 17 |
+
# -------------------------------
|
| 18 |
+
|
| 19 |
+
# Load environment variables from .env file in local development
|
| 20 |
+
load_dotenv()
|
| 21 |
+
|
| 22 |
+
def get_hf_token():
|
| 23 |
+
"""
|
| 24 |
+
Retrieves HF token from secrets or .env file.
|
| 25 |
+
"""
|
| 26 |
+
token = os.getenv("HF_TOKEN") # Prioritize environment variable
|
| 27 |
+
|
| 28 |
+
# If not found, fallback to Streamlit secrets
|
| 29 |
+
if not token:
|
| 30 |
+
try:
|
| 31 |
+
token = st.secrets["HF_TOKEN"]
|
| 32 |
+
except (FileNotFoundError, KeyError):
|
| 33 |
+
st.error("❌ HF_TOKEN not found. Add it to .env or secrets.toml.")
|
| 34 |
+
return None
|
| 35 |
+
|
| 36 |
+
return token
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
# -------------------------------
|
| 40 |
+
# Model Loading and Management
|
| 41 |
+
# -------------------------------
|
| 42 |
+
|
| 43 |
+
async def async_load(model_id: str):
|
| 44 |
+
"""
|
| 45 |
+
Dummy async function to initialize the event loop.
|
| 46 |
+
"""
|
| 47 |
+
await asyncio.sleep(0.1)
|
| 48 |
+
|
| 49 |
+
@st.cache_resource
|
| 50 |
+
def load_model(model_id: str, token: str, checkpoint_path: str = None):
|
| 51 |
+
"""
|
| 52 |
+
Loads and caches the Gemma model and tokenizer with the Hugging Face token.
|
| 53 |
+
|
| 54 |
+
Args:
|
| 55 |
+
model_id (str): The Hugging Face model ID.
|
| 56 |
+
token (str): The authentication token.
|
| 57 |
+
checkpoint_path (str): Optional path to a fine-tuned model checkpoint.
|
| 58 |
+
|
| 59 |
+
Returns:
|
| 60 |
+
tuple: tokenizer, model
|
| 61 |
+
"""
|
| 62 |
+
try:
|
| 63 |
+
asyncio.run(async_load(model_id))
|
| 64 |
+
|
| 65 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id, token=token)
|
| 66 |
+
model = AutoModelForCausalLM.from_pretrained(model_id, token=token)
|
| 67 |
+
|
| 68 |
+
# Load fine-tuned checkpoint if provided
|
| 69 |
+
if checkpoint_path and os.path.exists(checkpoint_path):
|
| 70 |
+
model.load_state_dict(torch.load(checkpoint_path, map_location=torch.device('cpu')))
|
| 71 |
+
model.eval()
|
| 72 |
+
st.success("✅ Fine-tuned model loaded successfully!")
|
| 73 |
+
|
| 74 |
+
return tokenizer, model
|
| 75 |
+
|
| 76 |
+
except Exception as e:
|
| 77 |
+
st.error(f"❌ Model loading failed: {e}")
|
| 78 |
+
return None, None
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
# -------------------------------
|
| 82 |
+
# Model Saving Function
|
| 83 |
+
# -------------------------------
|
| 84 |
+
|
| 85 |
+
def save_model(model, model_name: str):
|
| 86 |
+
"""
|
| 87 |
+
Saves the fine-tuned model to the specified path.
|
| 88 |
+
|
| 89 |
+
Args:
|
| 90 |
+
model (torch.nn.Module): The PyTorch model instance.
|
| 91 |
+
model_name (str): The file path to save the model.
|
| 92 |
+
|
| 93 |
+
Returns:
|
| 94 |
+
str: The path where the model is saved.
|
| 95 |
+
"""
|
| 96 |
+
try:
|
| 97 |
+
# Ensure the models directory exists
|
| 98 |
+
os.makedirs(os.path.dirname(model_name), exist_ok=True)
|
| 99 |
+
|
| 100 |
+
# Save the model
|
| 101 |
+
torch.save(model.state_dict(), model_name)
|
| 102 |
+
st.success(f"✅ Model saved successfully at `{model_name}`")
|
| 103 |
+
return model_name
|
| 104 |
+
except Exception as e:
|
| 105 |
+
st.error(f"❌ Failed to save model: {e}")
|
| 106 |
+
return None
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
# -------------------------------
|
| 110 |
+
# File Processing and Cleaning
|
| 111 |
+
# -------------------------------
|
| 112 |
+
|
| 113 |
+
def preprocess_data(uploaded_file, file_extension):
|
| 114 |
+
"""
|
| 115 |
+
Reads the uploaded file and returns a processed version.
|
| 116 |
+
Supports CSV, JSONL, and TXT.
|
| 117 |
+
"""
|
| 118 |
+
try:
|
| 119 |
+
if file_extension == "csv":
|
| 120 |
+
return pd.read_csv(uploaded_file)
|
| 121 |
+
|
| 122 |
+
elif file_extension == "jsonl":
|
| 123 |
+
data = [json.loads(line) for line in uploaded_file.readlines()]
|
| 124 |
+
try:
|
| 125 |
+
return pd.DataFrame(data)
|
| 126 |
+
except Exception:
|
| 127 |
+
st.warning("⚠️ Unable to convert JSONL to table. Previewing raw JSON.")
|
| 128 |
+
return data
|
| 129 |
+
|
| 130 |
+
elif file_extension == "txt":
|
| 131 |
+
text_data = uploaded_file.read().decode("utf-8")
|
| 132 |
+
return text_data.splitlines()
|
| 133 |
+
|
| 134 |
+
except Exception as e:
|
| 135 |
+
st.error(f"❌ Error processing file: {e}")
|
| 136 |
+
return None
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
def clean_text(text, lowercase=True, remove_punctuation=True):
|
| 140 |
+
"""
|
| 141 |
+
Cleans text data by applying basic normalization.
|
| 142 |
+
"""
|
| 143 |
+
if lowercase:
|
| 144 |
+
text = text.lower()
|
| 145 |
+
if remove_punctuation:
|
| 146 |
+
text = re.sub(r'[^\w\s]', '', text)
|
| 147 |
+
return text
|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
# -------------------------------
|
| 151 |
+
# Model Conversion and Quantization
|
| 152 |
+
# -------------------------------
|
| 153 |
+
|
| 154 |
+
def quantize_model(model):
|
| 155 |
+
"""
|
| 156 |
+
Applies dynamic quantization.
|
| 157 |
+
"""
|
| 158 |
+
try:
|
| 159 |
+
quantized_model = torch.quantization.quantize_dynamic(
|
| 160 |
+
model, {torch.nn.Linear}, dtype=torch.qint8
|
| 161 |
+
)
|
| 162 |
+
st.success("✅ Model quantized successfully!")
|
| 163 |
+
return quantized_model
|
| 164 |
+
except Exception as e:
|
| 165 |
+
st.error(f"❌ Quantization failed: {e}")
|
| 166 |
+
return model
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
def convert_to_torchscript(model, output_path="model_ts.pt"):
|
| 170 |
+
"""
|
| 171 |
+
Converts the model to TorchScript format.
|
| 172 |
+
"""
|
| 173 |
+
try:
|
| 174 |
+
example_input = torch.randint(0, 100, (1, 10))
|
| 175 |
+
traced_model = torch.jit.trace(model, example_input)
|
| 176 |
+
traced_model.save(output_path)
|
| 177 |
+
return output_path
|
| 178 |
+
except Exception as e:
|
| 179 |
+
st.error(f"❌ TorchScript conversion failed: {e}")
|
| 180 |
+
return None
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
def convert_to_onnx(model, output_path="model.onnx"):
|
| 184 |
+
"""
|
| 185 |
+
Converts the model to ONNX format.
|
| 186 |
+
"""
|
| 187 |
+
try:
|
| 188 |
+
dummy_input = torch.randint(0, 100, (1, 10))
|
| 189 |
+
torch.onnx.export(model, dummy_input, output_path, input_names=["input"], output_names=["output"])
|
| 190 |
+
return output_path
|
| 191 |
+
except Exception as e:
|
| 192 |
+
st.error(f"❌ ONNX conversion failed: {e}")
|
| 193 |
+
return None
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
# -------------------------------
|
| 197 |
+
# Model Inference and Training
|
| 198 |
+
# -------------------------------
|
| 199 |
+
|
| 200 |
+
def simulate_training(num_epochs):
|
| 201 |
+
"""
|
| 202 |
+
Simulates a training loop for demonstration.
|
| 203 |
+
Yields current epoch, loss values, and accuracy values.
|
| 204 |
+
"""
|
| 205 |
+
loss_values = []
|
| 206 |
+
accuracy_values = []
|
| 207 |
+
for epoch in range(1, num_epochs + 1):
|
| 208 |
+
loss = np.exp(-epoch) + np.random.random() * 0.1
|
| 209 |
+
acc = 0.5 + (epoch / num_epochs) * 0.5 + np.random.random() * 0.05
|
| 210 |
+
loss_values.append(loss)
|
| 211 |
+
accuracy_values.append(acc)
|
| 212 |
+
yield epoch, loss_values, accuracy_values
|
| 213 |
+
time.sleep(1)
|
| 214 |
+
|
| 215 |
+
|
| 216 |
+
def plot_training_metrics(epochs, loss_values, accuracy_values):
|
| 217 |
+
"""
|
| 218 |
+
Plots training loss and accuracy.
|
| 219 |
+
"""
|
| 220 |
+
fig, ax = plt.subplots(1, 2, figsize=(12, 4))
|
| 221 |
+
ax[0].plot(range(1, epochs+1), loss_values, marker='o', color='red')
|
| 222 |
+
ax[0].set_title("Training Loss")
|
| 223 |
+
ax[0].set_xlabel("Epoch")
|
| 224 |
+
ax[0].set_ylabel("Loss")
|
| 225 |
+
|
| 226 |
+
ax[1].plot(range(1, epochs+1), accuracy_values, marker='o', color='green')
|
| 227 |
+
ax[1].set_title("Training Accuracy")
|
| 228 |
+
ax[1].set_xlabel("Epoch")
|
| 229 |
+
ax[1].set_ylabel("Accuracy")
|
| 230 |
+
|
| 231 |
+
return fig
|
| 232 |
+
|
| 233 |
+
|
| 234 |
+
def generate_response(prompt, model, tokenizer, max_length=200):
|
| 235 |
+
"""
|
| 236 |
+
Generates a response using the fine-tuned model.
|
| 237 |
+
"""
|
| 238 |
+
try:
|
| 239 |
+
inputs = tokenizer(prompt, return_tensors="pt").input_ids
|
| 240 |
+
|
| 241 |
+
with torch.no_grad():
|
| 242 |
+
outputs = model.generate(inputs, max_length=max_length, num_return_sequences=1, temperature=0.7)
|
| 243 |
+
|
| 244 |
+
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 245 |
+
|
| 246 |
+
except Exception as e:
|
| 247 |
+
st.error(f"❌ Response generation failed: {e}")
|
| 248 |
+
return ""
|
| 249 |
+
|
| 250 |
+
|
| 251 |
+
# -------------------------------
|
| 252 |
+
# Model Loading for Inference
|
| 253 |
+
# -------------------------------
|
| 254 |
+
|
| 255 |
+
def load_finetuned_model(model, checkpoint_path="fine_tuned_model.pt"):
|
| 256 |
+
"""
|
| 257 |
+
Loads a fine-tuned model from a checkpoint.
|
| 258 |
+
"""
|
| 259 |
+
if os.path.exists(checkpoint_path):
|
| 260 |
+
model.load_state_dict(torch.load(checkpoint_path, map_location=torch.device('cpu')))
|
| 261 |
+
model.eval()
|
| 262 |
+
st.success("✅ Fine-tuned model loaded successfully!")
|
| 263 |
+
else:
|
| 264 |
+
st.error(f"❌ Checkpoint not found: {checkpoint_path}")
|
| 265 |
+
return model
|
| 266 |
+
|
| 267 |
+
|
| 268 |
+
|
| 269 |
+
import pandas as pd
|
| 270 |
+
import os
|
| 271 |
+
import pyarrow as pa
|
| 272 |
+
import numpy as np
|
| 273 |
+
from scipy.stats import zscore, kurtosis, skew
|
| 274 |
+
|
| 275 |
+
|
| 276 |
+
# ======================================
|
| 277 |
+
# Dataset Operations
|
| 278 |
+
# ======================================
|
| 279 |
+
def load_dataset(path: str) -> pd.DataFrame:
|
| 280 |
+
"""Load dataset from CSV with error handling."""
|
| 281 |
+
try:
|
| 282 |
+
df = pd.read_csv(path)
|
| 283 |
+
return make_arrow_compatible(df)
|
| 284 |
+
except Exception as e:
|
| 285 |
+
print(f"Error loading dataset: {e}")
|
| 286 |
+
return pd.DataFrame()
|
| 287 |
+
|
| 288 |
+
|
| 289 |
+
def save_dataset(df: pd.DataFrame, path: str):
|
| 290 |
+
"""Save dataset to CSV with error handling."""
|
| 291 |
+
try:
|
| 292 |
+
df.to_csv(path, index=False)
|
| 293 |
+
except Exception as e:
|
| 294 |
+
print(f"Error saving dataset: {e}")
|
| 295 |
+
|
| 296 |
+
|
| 297 |
+
def list_datasets(directory: str = "datasets") -> list:
|
| 298 |
+
"""List all available datasets in the directory."""
|
| 299 |
+
try:
|
| 300 |
+
return [f for f in os.listdir(directory) if f.endswith(('.csv', '.json', '.xlsx'))]
|
| 301 |
+
except Exception as e:
|
| 302 |
+
print(f"Error listing datasets: {e}")
|
| 303 |
+
return []
|
| 304 |
+
|
| 305 |
+
|
| 306 |
+
# ======================================
|
| 307 |
+
# Data Cleaning Functions
|
| 308 |
+
# ======================================
|
| 309 |
+
def clean_dataset(
|
| 310 |
+
df: pd.DataFrame,
|
| 311 |
+
remove_duplicates: bool = True,
|
| 312 |
+
fill_missing: bool = False,
|
| 313 |
+
fill_value: str = "0",
|
| 314 |
+
trim_spaces: bool = True
|
| 315 |
+
) -> pd.DataFrame:
|
| 316 |
+
"""
|
| 317 |
+
Clean the dataset with multiple operations:
|
| 318 |
+
- Remove duplicates
|
| 319 |
+
- Fill missing values
|
| 320 |
+
- Trim spaces
|
| 321 |
+
- Remove empty columns and rows
|
| 322 |
+
- Auto-cast date columns
|
| 323 |
+
"""
|
| 324 |
+
# Remove duplicates
|
| 325 |
+
if remove_duplicates:
|
| 326 |
+
df = df.drop_duplicates()
|
| 327 |
+
|
| 328 |
+
# Fill missing values
|
| 329 |
+
if fill_missing:
|
| 330 |
+
df = df.fillna(fill_value)
|
| 331 |
+
|
| 332 |
+
# Trim spaces
|
| 333 |
+
if trim_spaces:
|
| 334 |
+
df = df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)
|
| 335 |
+
|
| 336 |
+
# Remove empty columns & rows
|
| 337 |
+
df = df.dropna(how="all", axis=1)
|
| 338 |
+
df = df.dropna(how="all", axis=0)
|
| 339 |
+
|
| 340 |
+
# Auto-cast date columns
|
| 341 |
+
for col in df.columns:
|
| 342 |
+
try:
|
| 343 |
+
df[col] = pd.to_datetime(df[col])
|
| 344 |
+
except (ValueError, TypeError):
|
| 345 |
+
pass
|
| 346 |
+
|
| 347 |
+
return make_arrow_compatible(df)
|
| 348 |
+
|
| 349 |
+
|
| 350 |
+
# --------------------------------------
|
| 351 |
+
# Dataset Quality Score
|
| 352 |
+
# --------------------------------------
|
| 353 |
+
def compute_dataset_score(df):
|
| 354 |
+
"""Compute dataset quality score."""
|
| 355 |
+
if df.empty:
|
| 356 |
+
return 0.0
|
| 357 |
+
|
| 358 |
+
total_cells = np.product(df.shape)
|
| 359 |
+
missing_cells = df.isnull().sum().sum()
|
| 360 |
+
missing_ratio = missing_cells / total_cells
|
| 361 |
+
|
| 362 |
+
duplicate_ratio = 1 - (df.drop_duplicates().shape[0] / df.shape[0])
|
| 363 |
+
|
| 364 |
+
numeric_cols = df.select_dtypes(include=["number"]).columns
|
| 365 |
+
if len(numeric_cols) > 0:
|
| 366 |
+
skew_vals = df[numeric_cols].apply(lambda x: np.abs(skew(x.dropna())), axis=0)
|
| 367 |
+
kurt_vals = df[numeric_cols].apply(lambda x: np.abs(kurtosis(x.dropna())), axis=0)
|
| 368 |
+
numeric_score = 1 - (skew_vals.mean() + kurt_vals.mean()) / 10
|
| 369 |
+
else:
|
| 370 |
+
numeric_score = 1
|
| 371 |
+
|
| 372 |
+
score = (1 - missing_ratio) * (1 - duplicate_ratio) * numeric_score * 100
|
| 373 |
+
return round(score, 2)
|
| 374 |
+
|
| 375 |
+
|
| 376 |
+
# --------------------------------------
|
| 377 |
+
# Outlier Detection
|
| 378 |
+
# --------------------------------------
|
| 379 |
+
def detect_outliers(df, threshold=3):
|
| 380 |
+
"""Detect outliers in numeric columns using Z-score."""
|
| 381 |
+
numeric_cols = df.select_dtypes(include=["number"]).columns
|
| 382 |
+
outliers = {}
|
| 383 |
+
for col in numeric_cols:
|
| 384 |
+
z_scores = np.abs(zscore(df[col].dropna()))
|
| 385 |
+
outliers[col] = np.sum(z_scores > threshold)
|
| 386 |
+
return outliers
|
| 387 |
+
|
| 388 |
+
|
| 389 |
+
# --------------------------------------
|
| 390 |
+
# Detect Inconsistent Types
|
| 391 |
+
# --------------------------------------
|
| 392 |
+
def detect_inconsistent_types(df):
|
| 393 |
+
"""Detect inconsistent data types across columns."""
|
| 394 |
+
inconsistent_cols = {}
|
| 395 |
+
for col in df.columns:
|
| 396 |
+
if df[col].apply(type).nunique() > 1:
|
| 397 |
+
inconsistent_cols[col] = df[col].apply(type).value_counts().to_dict()
|
| 398 |
+
return inconsistent_cols
|
| 399 |
+
|
| 400 |
+
|
| 401 |
+
# ======================================
|
| 402 |
+
# Data Transformations
|
| 403 |
+
# ======================================
|
| 404 |
+
def apply_transformation(df: pd.DataFrame, col: str, transform: str) -> pd.DataFrame:
|
| 405 |
+
"""
|
| 406 |
+
Apply transformations to a specified column:
|
| 407 |
+
- Log Transformation
|
| 408 |
+
- Min-Max Normalization
|
| 409 |
+
- Z-score Standardization
|
| 410 |
+
"""
|
| 411 |
+
if col not in df.columns:
|
| 412 |
+
raise KeyError(f"Column '{col}' not found in dataset")
|
| 413 |
+
|
| 414 |
+
if transform == "Log":
|
| 415 |
+
df[col] = np.log1p(df[col].replace(0, np.nan)).fillna(0)
|
| 416 |
+
|
| 417 |
+
elif transform == "Normalize":
|
| 418 |
+
df[col] = (df[col] - df[col].min()) / (df[col].max() - df[col].min())
|
| 419 |
+
|
| 420 |
+
elif transform == "Standardize":
|
| 421 |
+
df[col] = (df[col] - df[col].mean()) / df[col].std()
|
| 422 |
+
|
| 423 |
+
return make_arrow_compatible(df)
|
| 424 |
+
|
| 425 |
+
|
| 426 |
+
# ======================================
|
| 427 |
+
# Normalization & Standardization
|
| 428 |
+
# ======================================
|
| 429 |
+
def normalize_column(df: pd.DataFrame, col: str) -> pd.DataFrame:
|
| 430 |
+
"""Normalize column (Min-Max Scaling)."""
|
| 431 |
+
df[col] = (df[col] - df[col].min()) / (df[col].max() - df[col].min())
|
| 432 |
+
return df
|
| 433 |
+
|
| 434 |
+
|
| 435 |
+
def standardize_column(df: pd.DataFrame, col: str) -> pd.DataFrame:
|
| 436 |
+
"""Standardize column (Z-score)."""
|
| 437 |
+
df[col] = (df[col] - df[col].mean()) / df[col].std()
|
| 438 |
+
return df
|
| 439 |
+
|
| 440 |
+
|
| 441 |
+
# ======================================
|
| 442 |
+
# Arrow Compatibility & Fixes
|
| 443 |
+
# ======================================
|
| 444 |
+
def make_arrow_compatible(df: pd.DataFrame) -> pd.DataFrame:
|
| 445 |
+
"""
|
| 446 |
+
Ensure dataset compatibility with Streamlit Arrow serialization.
|
| 447 |
+
"""
|
| 448 |
+
for col in df.columns:
|
| 449 |
+
if df[col].dtype == object:
|
| 450 |
+
try:
|
| 451 |
+
df[col] = df[col].astype(str)
|
| 452 |
+
except Exception as e:
|
| 453 |
+
print(f"Could not convert column {col}: {e}")
|
| 454 |
+
return df
|
| 455 |
+
|
| 456 |
+
|
| 457 |
+
def fix_arrow_incompatibility(df: pd.DataFrame) -> pd.DataFrame:
|
| 458 |
+
"""
|
| 459 |
+
Fix Arrow incompatibility by converting mixed types to `str`.
|
| 460 |
+
"""
|
| 461 |
+
for col in df.columns:
|
| 462 |
+
try:
|
| 463 |
+
pa.Table.from_pandas(df[[col]])
|
| 464 |
+
except pa.lib.ArrowInvalid:
|
| 465 |
+
print(f"Arrow compatibility issue in column: {col}")
|
| 466 |
+
df[col] = df[col].astype(str)
|
| 467 |
+
return df
|
| 468 |
+
|