Lubabah0 commited on
Commit
adfb728
·
verified ·
1 Parent(s): 183a970

Upload 10 files

Browse files
app.py ADDED
@@ -0,0 +1,778 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ULTIMATE LoRA Fine-Tuning Demo - Covers ALL Project Requirements
3
+ Group 6: Model Adaptation, Efficient Fine-Tuning & Deployment of LLMs
4
+ """
5
+
6
+ import streamlit as st
7
+ import torch
8
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
9
+ from peft import PeftModel
10
+ import time
11
+ import psutil
12
+ import os
13
+
14
+ # Page configuration
15
+ st.set_page_config(
16
+ page_title="LoRA Fine-Tuning Complete Demo",
17
+ page_icon="🤖",
18
+ layout="wide",
19
+ initial_sidebar_state="expanded"
20
+ )
21
+
22
+ # Custom CSS
23
+ st.markdown("""
24
+ <style>
25
+ .main-header {
26
+ font-size: 2.5rem;
27
+ font-weight: bold;
28
+ text-align: center;
29
+ background: linear-gradient(120deg, #1f77b4, #00cc88);
30
+ -webkit-background-clip: text;
31
+ -webkit-text-fill-color: transparent;
32
+ margin-bottom: 0.5rem;
33
+ }
34
+ .sub-header {
35
+ text-align: center;
36
+ color: #666;
37
+ margin-bottom: 2rem;
38
+ font-size: 1.1rem;
39
+ }
40
+ .metric-card {
41
+ background: #f0f2f6;
42
+ padding: 1rem;
43
+ border-radius: 10px;
44
+ border-left: 4px solid #1f77b4;
45
+ }
46
+ .model-box {
47
+ padding: 1.5rem;
48
+ border-radius: 10px;
49
+ margin: 1rem 0;
50
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
51
+ }
52
+ .base-model {
53
+ background-color: #fff5f5;
54
+ border-left: 4px solid #ff4b4b;
55
+ }
56
+ .finetuned-model {
57
+ background-color: #f0fff4;
58
+ border-left: 4px solid #00cc88;
59
+ }
60
+ .theory-box {
61
+ background: #e8f4f8;
62
+ padding: 1.5rem;
63
+ border-radius: 10px;
64
+ margin: 1rem 0;
65
+ border-left: 4px solid #1f77b4;
66
+ }
67
+ </style>
68
+ """, unsafe_allow_html=True)
69
+
70
+ # Title
71
+ st.markdown('<div class="main-header">🚀 Complete LoRA Fine-Tuning Demo</div>', unsafe_allow_html=True)
72
+ st.markdown('<div class="sub-header">Parameter-Efficient Fine-Tuning & Deployment Showcase</div>',
73
+ unsafe_allow_html=True)
74
+
75
+ # Sidebar Navigation
76
+ with st.sidebar:
77
+ st.header("📚 Navigation")
78
+ page = st.radio(
79
+ "Select Section:",
80
+ ["🎯 Live Demo", "📊 Theory & Concepts", "⚙️ Technical Details", "🚀 Deployment Info"],
81
+ label_visibility="collapsed"
82
+ )
83
+
84
+ st.divider()
85
+
86
+ if page == "🎯 Live Demo":
87
+ st.header("⚙️ Model Settings")
88
+
89
+ device_option = st.selectbox(
90
+ "Inference Device",
91
+ ["Auto (GPU if available)", "Force CPU", "Force GPU"],
92
+ help="Compare CPU vs GPU inference speed"
93
+ )
94
+
95
+ use_quantization = st.checkbox(
96
+ "Use 8-bit Quantization",
97
+ value=False,
98
+ help="Reduces memory usage, slightly slower"
99
+ )
100
+
101
+ temperature = st.slider("Temperature", 0.1, 1.0, 0.3, 0.1)
102
+ max_length = st.slider("Max Length", 50, 400, 200, 10)
103
+ top_p = st.slider("Top P", 0.1, 1.0, 0.95, 0.05)
104
+
105
+ st.divider()
106
+
107
+ st.header("📊 Quick Stats")
108
+ col1, col2 = st.columns(2)
109
+ with col1:
110
+ st.metric("Base Model", "82M params")
111
+ st.metric("Adapter Size", "~3 MB")
112
+ with col2:
113
+ st.metric("Trainable", "0.4%")
114
+ st.metric("Training Time", "~30 min")
115
+
116
+
117
+ # Cache model loading
118
+ @st.cache_resource
119
+ def load_models(use_quantization=False, device_option="Auto"):
120
+ """Load base model and fine-tuned model"""
121
+
122
+ base_model_name = "distilgpt2"
123
+ adapter_path = "./models/lora_adapters"
124
+
125
+ # Determine device
126
+ if device_option == "Force CPU":
127
+ device = "cpu"
128
+ elif device_option == "Force GPU":
129
+ device = "cuda" if torch.cuda.is_available() else "cpu"
130
+ else:
131
+ device = "cuda" if torch.cuda.is_available() else "cpu"
132
+
133
+ with st.spinner("🔄 Loading models..."):
134
+ # Load tokenizer
135
+ tokenizer = AutoTokenizer.from_pretrained(base_model_name)
136
+ tokenizer.pad_token = tokenizer.eos_token
137
+
138
+ # Quantization config
139
+ if use_quantization and device == "cuda":
140
+ quantization_config = BitsAndBytesConfig(
141
+ load_in_8bit=True,
142
+ llm_int8_threshold=6.0
143
+ )
144
+ base_model = AutoModelForCausalLM.from_pretrained(
145
+ base_model_name,
146
+ quantization_config=quantization_config,
147
+ device_map="auto"
148
+ )
149
+ finetuned_model = AutoModelForCausalLM.from_pretrained(
150
+ base_model_name,
151
+ quantization_config=quantization_config,
152
+ device_map="auto"
153
+ )
154
+ finetuned_model = PeftModel.from_pretrained(finetuned_model, adapter_path)
155
+ else:
156
+ # Standard loading
157
+ base_model = AutoModelForCausalLM.from_pretrained(base_model_name)
158
+ finetuned_model = AutoModelForCausalLM.from_pretrained(base_model_name)
159
+ finetuned_model = PeftModel.from_pretrained(finetuned_model, adapter_path)
160
+
161
+ base_model.to(device)
162
+ finetuned_model.to(device)
163
+
164
+ return tokenizer, base_model, finetuned_model, device
165
+
166
+
167
+ def get_model_size_mb(model):
168
+ """Calculate model size in MB"""
169
+ param_size = sum(p.nelement() * p.element_size() for p in model.parameters())
170
+ buffer_size = sum(b.nelement() * b.element_size() for b in model.buffers())
171
+ return (param_size + buffer_size) / (1024 ** 2)
172
+
173
+
174
+ def generate_response(model, tokenizer, prompt, device, temperature, max_length, top_p):
175
+ """Generate response from a model"""
176
+ formatted_input = f"### Instruction:\n{prompt}\n\n### Code:\n"
177
+ inputs = tokenizer(formatted_input, return_tensors="pt", padding=True)
178
+ inputs = {k: v.to(device) for k, v in inputs.items()}
179
+
180
+ with torch.no_grad():
181
+ outputs = model.generate(
182
+ **inputs,
183
+ max_length=max_length,
184
+ temperature=temperature,
185
+ top_p=top_p,
186
+ do_sample=True,
187
+ num_return_sequences=1,
188
+ pad_token_id=tokenizer.eos_token_id,
189
+ eos_token_id=tokenizer.eos_token_id
190
+ )
191
+
192
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
193
+ return response
194
+
195
+
196
+ # =============================================================================
197
+ # PAGE 1: LIVE DEMO
198
+ # =============================================================================
199
+ if page == "🎯 Live Demo":
200
+ # Load models
201
+ try:
202
+ tokenizer, base_model, finetuned_model, device = load_models(
203
+ use_quantization=use_quantization if 'use_quantization' in dir() else False,
204
+ device_option=device_option if 'device_option' in dir() else "Auto"
205
+ )
206
+
207
+ # Show device info
208
+ device_emoji = "🚀" if device == "cuda" else "🐢"
209
+ if device == "cuda":
210
+ st.success(f"{device_emoji} Running on GPU: {torch.cuda.get_device_name(0)}")
211
+ else:
212
+ st.info(f"{device_emoji} Running on CPU (slower but works!)")
213
+
214
+ # Show quantization status
215
+ if use_quantization and device == "cuda":
216
+ st.info("⚡ 8-bit quantization enabled - Lower memory usage!")
217
+
218
+ except Exception as e:
219
+ st.error(f"❌ Error loading models: {str(e)}")
220
+ st.stop()
221
+
222
+ # Sample prompts
223
+ st.header("💬 Try the Demo")
224
+
225
+ sample_prompts = [
226
+ "Write a Python function to calculate factorial",
227
+ "Create a function to check if a string is palindrome",
228
+ "Write code to merge two sorted lists",
229
+ "Implement a function to find the largest element in a list",
230
+ "Create a Python function to check if a number is prime",
231
+ "Write code to reverse a linked list",
232
+ "Implement binary search algorithm in Python"
233
+ ]
234
+
235
+ col1, col2 = st.columns([3, 1])
236
+ with col1:
237
+ use_sample = st.selectbox("Select prompt or write custom:", ["Custom"] + sample_prompts)
238
+ with col2:
239
+ st.write("")
240
+ st.write("")
241
+
242
+ if use_sample == "Custom":
243
+ user_instruction = st.text_area(
244
+ "Enter your instruction:",
245
+ height=100,
246
+ placeholder="e.g., Write a Python function to sort a dictionary by values"
247
+ )
248
+ else:
249
+ user_instruction = use_sample
250
+ st.info(f"💡 Prompt: {user_instruction}")
251
+
252
+ # Generate button
253
+ if st.button("🚀 Generate Responses", type="primary", use_container_width=True):
254
+ if user_instruction.strip():
255
+
256
+ col_base, col_finetuned = st.columns(2)
257
+
258
+ with col_base:
259
+ st.markdown('<div class="model-box base-model">', unsafe_allow_html=True)
260
+ st.subheader("🔴 Base Model (Untrained)")
261
+
262
+ with st.spinner("Generating..."):
263
+ start_time = time.time()
264
+ base_response = generate_response(
265
+ base_model, tokenizer, user_instruction, device,
266
+ temperature, max_length, top_p
267
+ )
268
+ base_time = time.time() - start_time
269
+
270
+ st.code(base_response, language="python")
271
+ st.caption(f"⏱️ Generation time: {base_time:.3f}s")
272
+ st.markdown('</div>', unsafe_allow_html=True)
273
+
274
+ with col_finetuned:
275
+ st.markdown('<div class="model-box finetuned-model">', unsafe_allow_html=True)
276
+ st.subheader("🟢 Fine-tuned Model (+ LoRA)")
277
+
278
+ with st.spinner("Generating..."):
279
+ start_time = time.time()
280
+ finetuned_response = generate_response(
281
+ finetuned_model, tokenizer, user_instruction, device,
282
+ temperature, max_length, top_p
283
+ )
284
+ finetuned_time = time.time() - start_time
285
+
286
+ st.code(finetuned_response, language="python")
287
+ st.caption(f"⏱️ Generation time: {finetuned_time:.3f}s")
288
+ st.markdown('</div>', unsafe_allow_html=True)
289
+
290
+ # Performance Analysis
291
+ st.divider()
292
+ st.subheader("📊 Performance Analysis")
293
+
294
+ col1, col2, col3, col4 = st.columns(4)
295
+
296
+ with col1:
297
+ st.metric("Base Response", f"{len(base_response.split())} words")
298
+ with col2:
299
+ st.metric("Fine-tuned Response", f"{len(finetuned_response.split())} words")
300
+ with col3:
301
+ speed_diff = ((base_time - finetuned_time) / base_time) * 100
302
+ st.metric("Speed Difference", f"{speed_diff:+.1f}%")
303
+ with col4:
304
+ st.metric("Device", device.upper())
305
+
306
+ st.success("✅ Notice: Base model produces gibberish, fine-tuned generates actual Python code!")
307
+
308
+ else:
309
+ st.warning("⚠️ Please enter an instruction!")
310
+
311
+ # =============================================================================
312
+ # PAGE 2: THEORY & CONCEPTS
313
+ # =============================================================================
314
+ elif page == "📊 Theory & Concepts":
315
+ st.header("📚 Theory & Key Concepts")
316
+
317
+ tab1, tab2, tab3, tab4 = st.tabs([
318
+ "🎓 Pre-training vs Fine-tuning",
319
+ "🔧 LoRA & PEFT",
320
+ "⚡ Training vs Inference",
321
+ "📏 Trade-offs"
322
+ ])
323
+
324
+ with tab1:
325
+ st.markdown('<div class="theory-box">', unsafe_allow_html=True)
326
+ st.subheader("Pre-training vs Fine-tuning")
327
+
328
+ col1, col2 = st.columns(2)
329
+
330
+ with col1:
331
+ st.markdown("### 🏗️ Pre-training")
332
+ st.markdown("""
333
+ - **Task**: Learn general language understanding
334
+ - **Data**: Massive unlabeled text (billions of tokens)
335
+ - **Cost**: Extremely expensive ($$$$$)
336
+ - **Time**: Weeks to months
337
+ - **Example**: GPT, BERT, LLaMA training
338
+ - **Goal**: General purpose model
339
+ """)
340
+
341
+ with col2:
342
+ st.markdown("### 🎯 Fine-tuning")
343
+ st.markdown("""
344
+ - **Task**: Adapt to specific domain/task
345
+ - **Data**: Smaller labeled dataset (thousands)
346
+ - **Cost**: Much cheaper ($$)
347
+ - **Time**: Hours to days
348
+ - **Example**: Code generation, Q&A, summarization
349
+ - **Goal**: Specialized model
350
+ """)
351
+
352
+ st.divider()
353
+
354
+ st.markdown("### 📊 Our Project: Transfer Learning")
355
+ st.info("""
356
+ **We started with**: Pre-trained `distilgpt2` (general language model)
357
+ **We fine-tuned on**: Python code instructions (5000 samples)
358
+ **Result**: Model now generates Python code instead of general text!
359
+
360
+ This is **Transfer Learning** - leveraging pre-trained knowledge for new tasks.
361
+ """)
362
+ st.markdown('</div>', unsafe_allow_html=True)
363
+
364
+ with tab2:
365
+ st.markdown('<div class="theory-box">', unsafe_allow_html=True)
366
+ st.subheader("LoRA: Low-Rank Adaptation")
367
+
368
+ col1, col2 = st.columns([1, 1])
369
+
370
+ with col1:
371
+ st.markdown("### 🔴 Full Fine-tuning (Expensive)")
372
+ st.markdown("""
373
+ ```
374
+ Total Parameters: 82M
375
+ Trainable: 82M (100%)
376
+ Memory: High
377
+ Time: Long
378
+ GPU: Required (expensive)
379
+ Checkpoint: 320 MB
380
+ ```
381
+ **Problems**:
382
+ - ❌ Expensive GPUs needed
383
+ - ❌ Long training time
384
+ - ❌ Large model checkpoints
385
+ - ❌ Risk of catastrophic forgetting
386
+ """)
387
+
388
+ with col2:
389
+ st.markdown("### 🟢 LoRA Fine-tuning (Efficient)")
390
+ st.markdown("""
391
+ ```
392
+ Total Parameters: 82M
393
+ Trainable: 295K (0.36%)
394
+ Memory: Low
395
+ Time: Fast
396
+ GPU: Optional (Colab free tier OK)
397
+ Checkpoint: 3 MB
398
+ ```
399
+ **Advantages**:
400
+ - ✅ Train on free GPUs
401
+ - ✅ Fast training (~30 min)
402
+ - ✅ Tiny adapter files
403
+ - ✅ Preserve base model knowledge
404
+ """)
405
+
406
+ st.divider()
407
+
408
+ st.markdown("### 🧮 How LoRA Works")
409
+ st.markdown("""
410
+ Instead of updating all weights `W`, LoRA adds small adapter matrices:
411
+
412
+ ```
413
+ W_new = W_frozen + ΔW
414
+ where ΔW = B × A (low-rank decomposition)
415
+ ```
416
+
417
+ **Our Configuration**:
418
+ - `r = 16` (rank - controls adapter capacity)
419
+ - `alpha = 32` (scaling factor)
420
+ - Target modules: Attention layers only
421
+ - Result: 99.6% fewer trainable parameters!
422
+ """)
423
+ st.markdown('</div>', unsafe_allow_html=True)
424
+
425
+ with tab3:
426
+ st.markdown('<div class="theory-box">', unsafe_allow_html=True)
427
+ st.subheader("Training vs Inference")
428
+
429
+ col1, col2 = st.columns(2)
430
+
431
+ with col1:
432
+ st.markdown("### 🏋️ Training Phase")
433
+ st.markdown("""
434
+ **What happens**:
435
+ - Forward pass through model
436
+ - Calculate loss (prediction error)
437
+ - Backward propagation (gradients)
438
+ - Update weights (only LoRA adapters)
439
+
440
+ **Requirements**:
441
+ - GPU highly recommended
442
+ - More memory needed
443
+ - Longer time
444
+ - Batch processing
445
+
446
+ **Our Training**:
447
+ - Dataset: 5000 Python code examples
448
+ - Time: ~30 minutes (Colab T4 GPU)
449
+ - Memory: ~8 GB VRAM
450
+ - Output: 3 MB adapter file
451
+ """)
452
+
453
+ with col2:
454
+ st.markdown("### 🚀 Inference Phase")
455
+ st.markdown("""
456
+ **What happens**:
457
+ - Load base model + adapters
458
+ - Forward pass only (no backprop)
459
+ - Generate predictions
460
+ - No weight updates
461
+
462
+ **Requirements**:
463
+ - CPU works (slower)
464
+ - GPU faster (optional)
465
+ - Less memory
466
+ - Real-time response
467
+
468
+ **Our Deployment**:
469
+ - Works on: CPU or GPU
470
+ - Load time: ~10-30 seconds
471
+ - Inference: ~1-3 seconds per response
472
+ - Memory: ~2 GB RAM
473
+ """)
474
+
475
+ st.markdown('</div>', unsafe_allow_html=True)
476
+
477
+ with tab4:
478
+ st.markdown('<div class="theory-box">', unsafe_allow_html=True)
479
+ st.subheader("Trade-offs & Optimization")
480
+
481
+ st.markdown("### ⚖️ Key Trade-offs")
482
+
483
+ col1, col2 = st.columns(2)
484
+
485
+ with col1:
486
+ st.markdown("#### 📏 Model Size vs Accuracy")
487
+ st.markdown("""
488
+ **Larger models**:
489
+ - ✅ Better accuracy
490
+ - ✅ More capacity
491
+ - ❌ Slower inference
492
+ - ❌ More memory
493
+
494
+ **Smaller models**:
495
+ - ✅ Faster inference
496
+ - ✅ Less memory
497
+ - ❌ Lower accuracy
498
+ - ❌ Less capacity
499
+ """)
500
+
501
+ with col2:
502
+ st.markdown("#### ⚡ Speed vs Quality")
503
+ st.markdown("""
504
+ **Higher quality**:
505
+ - More parameters
506
+ - Longer sequences
507
+ - Lower temperature
508
+ - ❌ Slower
509
+
510
+ **Higher speed**:
511
+ - Fewer parameters
512
+ - Shorter sequences
513
+ - Quantization
514
+ - ❌ Potentially lower quality
515
+ """)
516
+
517
+ st.divider()
518
+
519
+ st.markdown("### 🔢 Quantization")
520
+ st.markdown("""
521
+ **What**: Reduce precision of model weights (32-bit → 8-bit)
522
+
523
+ **Benefits**:
524
+ - 75% less memory usage
525
+ - Faster inference on some hardware
526
+ - Enables larger models on limited hardware
527
+
528
+ **Cost**:
529
+ - Slight accuracy loss (~1-2%)
530
+ - Requires calibration
531
+
532
+ **Try it**: Enable "8-bit quantization" in the sidebar on Demo page!
533
+ """)
534
+
535
+ st.markdown('</div>', unsafe_allow_html=True)
536
+
537
+ # =============================================================================
538
+ # PAGE 3: TECHNICAL DETAILS
539
+ # =============================================================================
540
+ elif page == "⚙️ Technical Details":
541
+ st.header("⚙️ Technical Implementation")
542
+
543
+ col1, col2 = st.columns(2)
544
+
545
+ with col1:
546
+ st.markdown('<div class="metric-card">', unsafe_allow_html=True)
547
+ st.markdown("### 📦 Model Architecture")
548
+ st.markdown("""
549
+ **Base Model**: distilgpt2
550
+ - Type: Causal Language Model
551
+ - Parameters: 82M
552
+ - Layers: 6 transformer blocks
553
+ - Hidden size: 768
554
+ - Attention heads: 12
555
+ - Vocabulary: 50,257 tokens
556
+ """)
557
+ st.markdown('</div>', unsafe_allow_html=True)
558
+
559
+ st.markdown('<div class="metric-card">', unsafe_allow_html=True)
560
+ st.markdown("### 🔧 LoRA Configuration")
561
+ st.markdown("""
562
+ ```python
563
+ LoraConfig(
564
+ r=16, # Rank
565
+ lora_alpha=32, # Scaling
566
+ target_modules=["c_attn"], # Attention only
567
+ lora_dropout=0.05,
568
+ task_type="CAUSAL_LM"
569
+ )
570
+ ```
571
+
572
+ **Trainable Parameters**: 294,912 (0.36%)
573
+ **Adapter Size**: ~3 MB
574
+ """)
575
+ st.markdown('</div>', unsafe_allow_html=True)
576
+
577
+ with col2:
578
+ st.markdown('<div class="metric-card">', unsafe_allow_html=True)
579
+ st.markdown("### 📊 Dataset")
580
+ st.markdown("""
581
+ **Name**: Python Code Instructions (18k Alpaca)
582
+ **Source**: `iamtarun/python_code_instructions_18k_alpaca`
583
+ **Used**: 5000 samples
584
+ - Training: 4500 samples
585
+ - Validation: 500 samples
586
+
587
+ **Format**:
588
+ ```
589
+ Instruction: Write Python code for X
590
+ Code: def function()...
591
+ ```
592
+ """)
593
+ st.markdown('</div>', unsafe_allow_html=True)
594
+
595
+ st.markdown('<div class="metric-card">', unsafe_allow_html=True)
596
+ st.markdown("### 🏋️ Training Hyperparameters")
597
+ st.markdown("""
598
+ ```python
599
+ Epochs: 4
600
+ Batch size: 2 (per device)
601
+ Gradient accumulation: 4
602
+ Learning rate: 3e-4
603
+ Max sequence length: 512
604
+ Optimizer: AdamW
605
+ Scheduler: Linear warmup
606
+ ```
607
+
608
+ **Training Time**: ~30 minutes (T4 GPU)
609
+ **Final Loss**: ~2.5
610
+ """)
611
+ st.markdown('</div>', unsafe_allow_html=True)
612
+
613
+ st.divider()
614
+
615
+ st.markdown("### 🛠️ Tools & Libraries Used")
616
+
617
+ col1, col2, col3 = st.columns(3)
618
+
619
+ with col1:
620
+ st.markdown("""
621
+ **Training**:
622
+ - 🤗 Transformers
623
+ - 🎯 PEFT (LoRA)
624
+ - 🚀 Accelerate
625
+ - 📊 Datasets
626
+ - 🔥 PyTorch
627
+ """)
628
+
629
+ with col2:
630
+ st.markdown("""
631
+ **Deployment**:
632
+ - 🌐 Streamlit
633
+ - 🤗 Hugging Face Hub
634
+ - ⚡ bitsandbytes (quantization)
635
+ - 💾 safetensors
636
+ """)
637
+
638
+ with col3:
639
+ st.markdown("""
640
+ **Infrastructure**:
641
+ - 📓 Google Colab (training)
642
+ - 💻 Local deployment
643
+ - ☁️ Hugging Face Spaces (optional)
644
+ - 🔒 Git LFS (model versioning)
645
+ """)
646
+
647
+ # =============================================================================
648
+ # PAGE 4: DEPLOYMENT INFO
649
+ # =============================================================================
650
+ else: # Deployment Info
651
+ st.header("🚀 Deployment Options")
652
+
653
+ tab1, tab2, tab3 = st.tabs(["💻 Local", "☁️ Cloud", "📊 Comparison"])
654
+
655
+ with tab1:
656
+ st.markdown('<div class="theory-box">', unsafe_allow_html=True)
657
+ st.markdown("### 💻 Local Deployment (Current)")
658
+
659
+ st.markdown("""
660
+ **Advantages**:
661
+ - ✅ Full control
662
+ - ✅ No API costs
663
+ - ✅ Data privacy
664
+ - ✅ Works offline
665
+ - ✅ Fast iteration
666
+
667
+ **Requirements**:
668
+ - Python 3.8+
669
+ - 2-4 GB RAM
670
+ - Optional: NVIDIA GPU
671
+
672
+ **Setup**:
673
+ ```bash
674
+ pip install streamlit transformers peft torch
675
+ streamlit run app.py
676
+ ```
677
+
678
+ **Best for**: Development, testing, demos
679
+ """)
680
+ st.markdown('</div>', unsafe_allow_html=True)
681
+
682
+ with tab2:
683
+ st.markdown('<div class="theory-box">', unsafe_allow_html=True)
684
+ st.markdown("### ☁️ Cloud Deployment")
685
+
686
+ st.markdown("#### 🤗 Hugging Face Spaces (Recommended)")
687
+ st.markdown("""
688
+ **Features**:
689
+ - ✅ Free tier available
690
+ - ✅ Auto-deploys from Git
691
+ - ✅ Public URL
692
+ - ✅ No server management
693
+ - ✅ Built-in CI/CD
694
+
695
+ **Setup**:
696
+ 1. Create account on huggingface.co
697
+ 2. Create new Space (Streamlit)
698
+ 3. Upload: app.py, requirements.txt, models/
699
+ 4. Auto-deploys!
700
+
701
+ **URL**: `https://huggingface.co/spaces/YOUR_USERNAME/lora-demo`
702
+ """)
703
+
704
+ st.divider()
705
+
706
+ st.markdown("#### Other Options")
707
+
708
+ col1, col2 = st.columns(2)
709
+
710
+ with col1:
711
+ st.markdown("""
712
+ **Streamlit Cloud**:
713
+ - Free for public apps
714
+ - GitHub integration
715
+ - Easy deployment
716
+ - Resource limits
717
+ """)
718
+
719
+ with col2:
720
+ st.markdown("""
721
+ **AWS/GCP/Azure**:
722
+ - Full control
723
+ - Scalable
724
+ - More expensive
725
+ - Requires devops
726
+ """)
727
+
728
+ st.markdown('</div>', unsafe_allow_html=True)
729
+
730
+ with tab3:
731
+ st.markdown('<div class="theory-box">', unsafe_allow_html=True)
732
+ st.markdown("### 📊 Deployment Comparison")
733
+
734
+ comparison_data = {
735
+ "Feature": ["Cost", "Setup Time", "Control", "Scalability", "Maintenance", "Best For"],
736
+ "Local": ["Free", "5 mins", "Full", "Limited", "Manual", "Development"],
737
+ "HF Spaces": ["Free", "10 mins", "Medium", "Auto", "Minimal", "Demos"],
738
+ "Cloud (AWS)": ["$$$", "1-2 hours", "Full", "High", "Manual", "Production"]
739
+ }
740
+
741
+ st.table(comparison_data)
742
+
743
+ st.divider()
744
+
745
+ st.markdown("### 🎯 CPU vs GPU Inference")
746
+
747
+ col1, col2 = st.columns(2)
748
+
749
+ with col1:
750
+ st.markdown("""
751
+ **CPU Inference**:
752
+ - Speed: 2-5 seconds/response
753
+ - Cost: $0 (uses existing hardware)
754
+ - Memory: ~2 GB RAM
755
+ - Best for: Low-traffic apps, development
756
+ """)
757
+
758
+ with col2:
759
+ st.markdown("""
760
+ **GPU Inference**:
761
+ - Speed: 0.5-2 seconds/response
762
+ - Cost: $0.50-2/hour (cloud)
763
+ - Memory: ~4-8 GB VRAM
764
+ - Best for: High-traffic, real-time apps
765
+ """)
766
+
767
+ st.info("💡 **Tip**: Start with CPU deployment, upgrade to GPU only if needed!")
768
+
769
+ st.markdown('</div>', unsafe_allow_html=True)
770
+
771
+ # Footer
772
+ st.divider()
773
+ st.markdown("""
774
+ <div style="text-align: center; color: #666; padding: 1rem;">
775
+ <p><strong>🎓 Group 6: Model Adaptation, Efficient Fine-Tuning & Deployment of LLMs</strong></p>
776
+ <p>Built with Streamlit • Transformers • PEFT • PyTorch</p>
777
+ </div>
778
+ """, unsafe_allow_html=True)
models/lora_adapters/README.md ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: distilgpt2
3
+ library_name: peft
4
+ pipeline_tag: text-generation
5
+ tags:
6
+ - base_model:adapter:distilgpt2
7
+ - lora
8
+ - transformers
9
+ ---
10
+
11
+ # Model Card for Model ID
12
+
13
+ <!-- Provide a quick summary of what the model is/does. -->
14
+
15
+
16
+
17
+ ## Model Details
18
+
19
+ ### Model Description
20
+
21
+ <!-- Provide a longer summary of what this model is. -->
22
+
23
+
24
+
25
+ - **Developed by:** [More Information Needed]
26
+ - **Funded by [optional]:** [More Information Needed]
27
+ - **Shared by [optional]:** [More Information Needed]
28
+ - **Model type:** [More Information Needed]
29
+ - **Language(s) (NLP):** [More Information Needed]
30
+ - **License:** [More Information Needed]
31
+ - **Finetuned from model [optional]:** [More Information Needed]
32
+
33
+ ### Model Sources [optional]
34
+
35
+ <!-- Provide the basic links for the model. -->
36
+
37
+ - **Repository:** [More Information Needed]
38
+ - **Paper [optional]:** [More Information Needed]
39
+ - **Demo [optional]:** [More Information Needed]
40
+
41
+ ## Uses
42
+
43
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
44
+
45
+ ### Direct Use
46
+
47
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
48
+
49
+ [More Information Needed]
50
+
51
+ ### Downstream Use [optional]
52
+
53
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
54
+
55
+ [More Information Needed]
56
+
57
+ ### Out-of-Scope Use
58
+
59
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
60
+
61
+ [More Information Needed]
62
+
63
+ ## Bias, Risks, and Limitations
64
+
65
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
66
+
67
+ [More Information Needed]
68
+
69
+ ### Recommendations
70
+
71
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
72
+
73
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
74
+
75
+ ## How to Get Started with the Model
76
+
77
+ Use the code below to get started with the model.
78
+
79
+ [More Information Needed]
80
+
81
+ ## Training Details
82
+
83
+ ### Training Data
84
+
85
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
86
+
87
+ [More Information Needed]
88
+
89
+ ### Training Procedure
90
+
91
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
92
+
93
+ #### Preprocessing [optional]
94
+
95
+ [More Information Needed]
96
+
97
+
98
+ #### Training Hyperparameters
99
+
100
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
101
+
102
+ #### Speeds, Sizes, Times [optional]
103
+
104
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
105
+
106
+ [More Information Needed]
107
+
108
+ ## Evaluation
109
+
110
+ <!-- This section describes the evaluation protocols and provides the results. -->
111
+
112
+ ### Testing Data, Factors & Metrics
113
+
114
+ #### Testing Data
115
+
116
+ <!-- This should link to a Dataset Card if possible. -->
117
+
118
+ [More Information Needed]
119
+
120
+ #### Factors
121
+
122
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
123
+
124
+ [More Information Needed]
125
+
126
+ #### Metrics
127
+
128
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
129
+
130
+ [More Information Needed]
131
+
132
+ ### Results
133
+
134
+ [More Information Needed]
135
+
136
+ #### Summary
137
+
138
+
139
+
140
+ ## Model Examination [optional]
141
+
142
+ <!-- Relevant interpretability work for the model goes here -->
143
+
144
+ [More Information Needed]
145
+
146
+ ## Environmental Impact
147
+
148
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
149
+
150
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
151
+
152
+ - **Hardware Type:** [More Information Needed]
153
+ - **Hours used:** [More Information Needed]
154
+ - **Cloud Provider:** [More Information Needed]
155
+ - **Compute Region:** [More Information Needed]
156
+ - **Carbon Emitted:** [More Information Needed]
157
+
158
+ ## Technical Specifications [optional]
159
+
160
+ ### Model Architecture and Objective
161
+
162
+ [More Information Needed]
163
+
164
+ ### Compute Infrastructure
165
+
166
+ [More Information Needed]
167
+
168
+ #### Hardware
169
+
170
+ [More Information Needed]
171
+
172
+ #### Software
173
+
174
+ [More Information Needed]
175
+
176
+ ## Citation [optional]
177
+
178
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
179
+
180
+ **BibTeX:**
181
+
182
+ [More Information Needed]
183
+
184
+ **APA:**
185
+
186
+ [More Information Needed]
187
+
188
+ ## Glossary [optional]
189
+
190
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
191
+
192
+ [More Information Needed]
193
+
194
+ ## More Information [optional]
195
+
196
+ [More Information Needed]
197
+
198
+ ## Model Card Authors [optional]
199
+
200
+ [More Information Needed]
201
+
202
+ ## Model Card Contact
203
+
204
+ [More Information Needed]
205
+ ### Framework versions
206
+
207
+ - PEFT 0.18.0
models/lora_adapters/adapter_config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alora_invocation_tokens": null,
3
+ "alpha_pattern": {},
4
+ "arrow_config": null,
5
+ "auto_mapping": null,
6
+ "base_model_name_or_path": "distilgpt2",
7
+ "bias": "none",
8
+ "corda_config": null,
9
+ "ensure_weight_tying": false,
10
+ "eva_config": null,
11
+ "exclude_modules": null,
12
+ "fan_in_fan_out": true,
13
+ "inference_mode": true,
14
+ "init_lora_weights": true,
15
+ "layer_replication": null,
16
+ "layers_pattern": null,
17
+ "layers_to_transform": null,
18
+ "loftq_config": {},
19
+ "lora_alpha": 32,
20
+ "lora_bias": false,
21
+ "lora_dropout": 0.05,
22
+ "megatron_config": null,
23
+ "megatron_core": "megatron.core",
24
+ "modules_to_save": null,
25
+ "peft_type": "LORA",
26
+ "peft_version": "0.18.0",
27
+ "qalora_group_size": 16,
28
+ "r": 16,
29
+ "rank_pattern": {},
30
+ "revision": null,
31
+ "target_modules": [
32
+ "c_attn"
33
+ ],
34
+ "target_parameters": null,
35
+ "task_type": "CAUSAL_LM",
36
+ "trainable_token_indices": null,
37
+ "use_dora": false,
38
+ "use_qalora": false,
39
+ "use_rslora": false
40
+ }
models/lora_adapters/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9aea375c6a93cbdfd692e73275df4493c92c3f8256e709682545d6b74ae8cff5
3
+ size 1181192
models/lora_adapters/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
models/lora_adapters/special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "pad_token": "<|endoftext|>",
5
+ "unk_token": "<|endoftext|>"
6
+ }
models/lora_adapters/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
models/lora_adapters/tokenizer_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "50256": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ }
12
+ },
13
+ "bos_token": "<|endoftext|>",
14
+ "clean_up_tokenization_spaces": false,
15
+ "eos_token": "<|endoftext|>",
16
+ "extra_special_tokens": {},
17
+ "model_max_length": 1024,
18
+ "pad_token": "<|endoftext|>",
19
+ "tokenizer_class": "GPT2Tokenizer",
20
+ "unk_token": "<|endoftext|>"
21
+ }
models/lora_adapters/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt CHANGED
@@ -1,3 +1,8 @@
1
- altair
2
- pandas
3
- streamlit
 
 
 
 
 
 
1
+ streamlit==1.29.0
2
+ transformers==4.36.0
3
+ torch==2.1.0
4
+ peft==0.7.1
5
+ accelerate==0.25.0
6
+ bitsandbytes==0.41.0
7
+ sentencepiece==0.1.99
8
+ protobuf==3.20.3