Spaces:
Build error
Build error
| """ | |
| ULTIMATE LoRA Fine-Tuning Demo - Covers ALL Project Requirements | |
| Group 6: Model Adaptation, Efficient Fine-Tuning & Deployment of LLMs | |
| """ | |
| import streamlit as st | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig | |
| from peft import PeftModel | |
| import time | |
| import psutil | |
| import os | |
| # Page configuration | |
| st.set_page_config( | |
| page_title="LoRA Fine-Tuning Complete Demo", | |
| page_icon="🤖", | |
| layout="wide", | |
| initial_sidebar_state="expanded" | |
| ) | |
| # Custom CSS | |
| st.markdown(""" | |
| <style> | |
| .main-header { | |
| font-size: 2.5rem; | |
| font-weight: bold; | |
| text-align: center; | |
| background: linear-gradient(120deg, #1f77b4, #00cc88); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| margin-bottom: 0.5rem; | |
| } | |
| .sub-header { | |
| text-align: center; | |
| color: #666; | |
| margin-bottom: 2rem; | |
| font-size: 1.1rem; | |
| } | |
| .metric-card { | |
| background: #f0f2f6; | |
| padding: 1rem; | |
| border-radius: 10px; | |
| border-left: 4px solid #1f77b4; | |
| } | |
| .model-box { | |
| padding: 1.5rem; | |
| border-radius: 10px; | |
| margin: 1rem 0; | |
| box-shadow: 0 2px 4px rgba(0,0,0,0.1); | |
| } | |
| .base-model { | |
| background-color: #fff5f5; | |
| border-left: 4px solid #ff4b4b; | |
| } | |
| .finetuned-model { | |
| background-color: #f0fff4; | |
| border-left: 4px solid #00cc88; | |
| } | |
| .theory-box { | |
| background: #e8f4f8; | |
| padding: 1.5rem; | |
| border-radius: 10px; | |
| margin: 1rem 0; | |
| border-left: 4px solid #1f77b4; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # Title | |
| st.markdown('<div class="main-header">🚀 Complete LoRA Fine-Tuning Demo</div>', unsafe_allow_html=True) | |
| st.markdown('<div class="sub-header">Parameter-Efficient Fine-Tuning & Deployment Showcase</div>', | |
| unsafe_allow_html=True) | |
| # Sidebar Navigation | |
| with st.sidebar: | |
| st.header("📚 Navigation") | |
| page = st.radio( | |
| "Select Section:", | |
| ["🎯 Live Demo", "📊 Theory & Concepts", "⚙️ Technical Details", "🚀 Deployment Info"], | |
| label_visibility="collapsed" | |
| ) | |
| st.divider() | |
| if page == "🎯 Live Demo": | |
| st.header("⚙️ Model Settings") | |
| device_option = st.selectbox( | |
| "Inference Device", | |
| ["Auto (GPU if available)", "Force CPU", "Force GPU"], | |
| help="Compare CPU vs GPU inference speed" | |
| ) | |
| use_quantization = st.checkbox( | |
| "Use 8-bit Quantization", | |
| value=False, | |
| help="Reduces memory usage, slightly slower" | |
| ) | |
| temperature = st.slider("Temperature", 0.1, 1.0, 0.3, 0.1) | |
| max_length = st.slider("Max Length", 50, 400, 200, 10) | |
| top_p = st.slider("Top P", 0.1, 1.0, 0.95, 0.05) | |
| st.divider() | |
| st.header("📊 Quick Stats") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.metric("Base Model", "82M params") | |
| st.metric("Adapter Size", "~3 MB") | |
| with col2: | |
| st.metric("Trainable", "0.4%") | |
| st.metric("Training Time", "~30 min") | |
| # Cache model loading | |
| def load_models(use_quantization=False, device_option="Auto"): | |
| """Load base model and fine-tuned model""" | |
| base_model_name = "distilgpt2" | |
| adapter_path = "./models/lora_adapters" | |
| # Determine device | |
| if device_option == "Force CPU": | |
| device = "cpu" | |
| elif device_option == "Force GPU": | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| else: | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| with st.spinner("🔄 Loading models..."): | |
| # Load tokenizer | |
| tokenizer = AutoTokenizer.from_pretrained(base_model_name) | |
| tokenizer.pad_token = tokenizer.eos_token | |
| # Quantization config | |
| if use_quantization and device == "cuda": | |
| quantization_config = BitsAndBytesConfig( | |
| load_in_8bit=True, | |
| llm_int8_threshold=6.0 | |
| ) | |
| base_model = AutoModelForCausalLM.from_pretrained( | |
| base_model_name, | |
| quantization_config=quantization_config, | |
| device_map="auto" | |
| ) | |
| finetuned_model = AutoModelForCausalLM.from_pretrained( | |
| base_model_name, | |
| quantization_config=quantization_config, | |
| device_map="auto" | |
| ) | |
| finetuned_model = PeftModel.from_pretrained(finetuned_model, adapter_path) | |
| else: | |
| # Standard loading | |
| base_model = AutoModelForCausalLM.from_pretrained(base_model_name) | |
| finetuned_model = AutoModelForCausalLM.from_pretrained(base_model_name) | |
| finetuned_model = PeftModel.from_pretrained(finetuned_model, adapter_path) | |
| base_model.to(device) | |
| finetuned_model.to(device) | |
| return tokenizer, base_model, finetuned_model, device | |
| def get_model_size_mb(model): | |
| """Calculate model size in MB""" | |
| param_size = sum(p.nelement() * p.element_size() for p in model.parameters()) | |
| buffer_size = sum(b.nelement() * b.element_size() for b in model.buffers()) | |
| return (param_size + buffer_size) / (1024 ** 2) | |
| def generate_response(model, tokenizer, prompt, device, temperature, max_length, top_p): | |
| """Generate response from a model""" | |
| formatted_input = f"### Instruction:\n{prompt}\n\n### Code:\n" | |
| inputs = tokenizer(formatted_input, return_tensors="pt", padding=True) | |
| inputs = {k: v.to(device) for k, v in inputs.items()} | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| **inputs, | |
| max_length=max_length, | |
| temperature=temperature, | |
| top_p=top_p, | |
| do_sample=True, | |
| num_return_sequences=1, | |
| pad_token_id=tokenizer.eos_token_id, | |
| eos_token_id=tokenizer.eos_token_id | |
| ) | |
| response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| return response | |
| # ============================================================================= | |
| # PAGE 1: LIVE DEMO | |
| # ============================================================================= | |
| if page == "🎯 Live Demo": | |
| # Load models | |
| try: | |
| tokenizer, base_model, finetuned_model, device = load_models( | |
| use_quantization=use_quantization if 'use_quantization' in dir() else False, | |
| device_option=device_option if 'device_option' in dir() else "Auto" | |
| ) | |
| # Show device info | |
| device_emoji = "🚀" if device == "cuda" else "🐢" | |
| if device == "cuda": | |
| st.success(f"{device_emoji} Running on GPU: {torch.cuda.get_device_name(0)}") | |
| else: | |
| st.info(f"{device_emoji} Running on CPU (slower but works!)") | |
| # Show quantization status | |
| if use_quantization and device == "cuda": | |
| st.info("⚡ 8-bit quantization enabled - Lower memory usage!") | |
| except Exception as e: | |
| st.error(f"❌ Error loading models: {str(e)}") | |
| st.stop() | |
| # Sample prompts | |
| st.header("💬 Try the Demo") | |
| sample_prompts = [ | |
| "Write a Python function to calculate factorial", | |
| "Create a function to check if a string is palindrome", | |
| "Write code to merge two sorted lists", | |
| "Implement a function to find the largest element in a list", | |
| "Create a Python function to check if a number is prime", | |
| "Write code to reverse a linked list", | |
| "Implement binary search algorithm in Python" | |
| ] | |
| col1, col2 = st.columns([3, 1]) | |
| with col1: | |
| use_sample = st.selectbox("Select prompt or write custom:", ["Custom"] + sample_prompts) | |
| with col2: | |
| st.write("") | |
| st.write("") | |
| if use_sample == "Custom": | |
| user_instruction = st.text_area( | |
| "Enter your instruction:", | |
| height=100, | |
| placeholder="e.g., Write a Python function to sort a dictionary by values" | |
| ) | |
| else: | |
| user_instruction = use_sample | |
| st.info(f"💡 Prompt: {user_instruction}") | |
| # Generate button | |
| if st.button("🚀 Generate Responses", type="primary", use_container_width=True): | |
| if user_instruction.strip(): | |
| col_base, col_finetuned = st.columns(2) | |
| with col_base: | |
| st.markdown('<div class="model-box base-model">', unsafe_allow_html=True) | |
| st.subheader("🔴 Base Model (Untrained)") | |
| with st.spinner("Generating..."): | |
| start_time = time.time() | |
| base_response = generate_response( | |
| base_model, tokenizer, user_instruction, device, | |
| temperature, max_length, top_p | |
| ) | |
| base_time = time.time() - start_time | |
| st.code(base_response, language="python") | |
| st.caption(f"⏱️ Generation time: {base_time:.3f}s") | |
| st.markdown('</div>', unsafe_allow_html=True) | |
| with col_finetuned: | |
| st.markdown('<div class="model-box finetuned-model">', unsafe_allow_html=True) | |
| st.subheader("🟢 Fine-tuned Model (+ LoRA)") | |
| with st.spinner("Generating..."): | |
| start_time = time.time() | |
| finetuned_response = generate_response( | |
| finetuned_model, tokenizer, user_instruction, device, | |
| temperature, max_length, top_p | |
| ) | |
| finetuned_time = time.time() - start_time | |
| st.code(finetuned_response, language="python") | |
| st.caption(f"⏱️ Generation time: {finetuned_time:.3f}s") | |
| st.markdown('</div>', unsafe_allow_html=True) | |
| # Performance Analysis | |
| st.divider() | |
| st.subheader("📊 Performance Analysis") | |
| col1, col2, col3, col4 = st.columns(4) | |
| with col1: | |
| st.metric("Base Response", f"{len(base_response.split())} words") | |
| with col2: | |
| st.metric("Fine-tuned Response", f"{len(finetuned_response.split())} words") | |
| with col3: | |
| speed_diff = ((base_time - finetuned_time) / base_time) * 100 | |
| st.metric("Speed Difference", f"{speed_diff:+.1f}%") | |
| with col4: | |
| st.metric("Device", device.upper()) | |
| st.success("✅ Notice: Base model produces gibberish, fine-tuned generates actual Python code!") | |
| else: | |
| st.warning("⚠️ Please enter an instruction!") | |
| # ============================================================================= | |
| # PAGE 2: THEORY & CONCEPTS | |
| # ============================================================================= | |
| elif page == "📊 Theory & Concepts": | |
| st.header("📚 Theory & Key Concepts") | |
| tab1, tab2, tab3, tab4 = st.tabs([ | |
| "🎓 Pre-training vs Fine-tuning", | |
| "🔧 LoRA & PEFT", | |
| "⚡ Training vs Inference", | |
| "📏 Trade-offs" | |
| ]) | |
| with tab1: | |
| st.markdown('<div class="theory-box">', unsafe_allow_html=True) | |
| st.subheader("Pre-training vs Fine-tuning") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.markdown("### 🏗️ Pre-training") | |
| st.markdown(""" | |
| - **Task**: Learn general language understanding | |
| - **Data**: Massive unlabeled text (billions of tokens) | |
| - **Cost**: Extremely expensive ($$$$$) | |
| - **Time**: Weeks to months | |
| - **Example**: GPT, BERT, LLaMA training | |
| - **Goal**: General purpose model | |
| """) | |
| with col2: | |
| st.markdown("### 🎯 Fine-tuning") | |
| st.markdown(""" | |
| - **Task**: Adapt to specific domain/task | |
| - **Data**: Smaller labeled dataset (thousands) | |
| - **Cost**: Much cheaper ($$) | |
| - **Time**: Hours to days | |
| - **Example**: Code generation, Q&A, summarization | |
| - **Goal**: Specialized model | |
| """) | |
| st.divider() | |
| st.markdown("### 📊 Our Project: Transfer Learning") | |
| st.info(""" | |
| **We started with**: Pre-trained `distilgpt2` (general language model) | |
| **We fine-tuned on**: Python code instructions (5000 samples) | |
| **Result**: Model now generates Python code instead of general text! | |
| This is **Transfer Learning** - leveraging pre-trained knowledge for new tasks. | |
| """) | |
| st.markdown('</div>', unsafe_allow_html=True) | |
| with tab2: | |
| st.markdown('<div class="theory-box">', unsafe_allow_html=True) | |
| st.subheader("LoRA: Low-Rank Adaptation") | |
| col1, col2 = st.columns([1, 1]) | |
| with col1: | |
| st.markdown("### 🔴 Full Fine-tuning (Expensive)") | |
| st.markdown(""" | |
| ``` | |
| Total Parameters: 82M | |
| Trainable: 82M (100%) | |
| Memory: High | |
| Time: Long | |
| GPU: Required (expensive) | |
| Checkpoint: 320 MB | |
| ``` | |
| **Problems**: | |
| - ❌ Expensive GPUs needed | |
| - ❌ Long training time | |
| - ❌ Large model checkpoints | |
| - ❌ Risk of catastrophic forgetting | |
| """) | |
| with col2: | |
| st.markdown("### 🟢 LoRA Fine-tuning (Efficient)") | |
| st.markdown(""" | |
| ``` | |
| Total Parameters: 82M | |
| Trainable: 295K (0.36%) | |
| Memory: Low | |
| Time: Fast | |
| GPU: Optional (Colab free tier OK) | |
| Checkpoint: 3 MB | |
| ``` | |
| **Advantages**: | |
| - ✅ Train on free GPUs | |
| - ✅ Fast training (~30 min) | |
| - ✅ Tiny adapter files | |
| - ✅ Preserve base model knowledge | |
| """) | |
| st.divider() | |
| st.markdown("### 🧮 How LoRA Works") | |
| st.markdown(""" | |
| Instead of updating all weights `W`, LoRA adds small adapter matrices: | |
| ``` | |
| W_new = W_frozen + ΔW | |
| where ΔW = B × A (low-rank decomposition) | |
| ``` | |
| **Our Configuration**: | |
| - `r = 16` (rank - controls adapter capacity) | |
| - `alpha = 32` (scaling factor) | |
| - Target modules: Attention layers only | |
| - Result: 99.6% fewer trainable parameters! | |
| """) | |
| st.markdown('</div>', unsafe_allow_html=True) | |
| with tab3: | |
| st.markdown('<div class="theory-box">', unsafe_allow_html=True) | |
| st.subheader("Training vs Inference") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.markdown("### 🏋️ Training Phase") | |
| st.markdown(""" | |
| **What happens**: | |
| - Forward pass through model | |
| - Calculate loss (prediction error) | |
| - Backward propagation (gradients) | |
| - Update weights (only LoRA adapters) | |
| **Requirements**: | |
| - GPU highly recommended | |
| - More memory needed | |
| - Longer time | |
| - Batch processing | |
| **Our Training**: | |
| - Dataset: 5000 Python code examples | |
| - Time: ~30 minutes (Colab T4 GPU) | |
| - Memory: ~8 GB VRAM | |
| - Output: 3 MB adapter file | |
| """) | |
| with col2: | |
| st.markdown("### 🚀 Inference Phase") | |
| st.markdown(""" | |
| **What happens**: | |
| - Load base model + adapters | |
| - Forward pass only (no backprop) | |
| - Generate predictions | |
| - No weight updates | |
| **Requirements**: | |
| - CPU works (slower) | |
| - GPU faster (optional) | |
| - Less memory | |
| - Real-time response | |
| **Our Deployment**: | |
| - Works on: CPU or GPU | |
| - Load time: ~10-30 seconds | |
| - Inference: ~1-3 seconds per response | |
| - Memory: ~2 GB RAM | |
| """) | |
| st.markdown('</div>', unsafe_allow_html=True) | |
| with tab4: | |
| st.markdown('<div class="theory-box">', unsafe_allow_html=True) | |
| st.subheader("Trade-offs & Optimization") | |
| st.markdown("### ⚖️ Key Trade-offs") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.markdown("#### 📏 Model Size vs Accuracy") | |
| st.markdown(""" | |
| **Larger models**: | |
| - ✅ Better accuracy | |
| - ✅ More capacity | |
| - ❌ Slower inference | |
| - ❌ More memory | |
| **Smaller models**: | |
| - ✅ Faster inference | |
| - ✅ Less memory | |
| - ❌ Lower accuracy | |
| - ❌ Less capacity | |
| """) | |
| with col2: | |
| st.markdown("#### ⚡ Speed vs Quality") | |
| st.markdown(""" | |
| **Higher quality**: | |
| - More parameters | |
| - Longer sequences | |
| - Lower temperature | |
| - ❌ Slower | |
| **Higher speed**: | |
| - Fewer parameters | |
| - Shorter sequences | |
| - Quantization | |
| - ❌ Potentially lower quality | |
| """) | |
| st.divider() | |
| st.markdown("### 🔢 Quantization") | |
| st.markdown(""" | |
| **What**: Reduce precision of model weights (32-bit → 8-bit) | |
| **Benefits**: | |
| - 75% less memory usage | |
| - Faster inference on some hardware | |
| - Enables larger models on limited hardware | |
| **Cost**: | |
| - Slight accuracy loss (~1-2%) | |
| - Requires calibration | |
| **Try it**: Enable "8-bit quantization" in the sidebar on Demo page! | |
| """) | |
| st.markdown('</div>', unsafe_allow_html=True) | |
| # ============================================================================= | |
| # PAGE 3: TECHNICAL DETAILS | |
| # ============================================================================= | |
| elif page == "⚙️ Technical Details": | |
| st.header("⚙️ Technical Implementation") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.markdown('<div class="metric-card">', unsafe_allow_html=True) | |
| st.markdown("### 📦 Model Architecture") | |
| st.markdown(""" | |
| **Base Model**: distilgpt2 | |
| - Type: Causal Language Model | |
| - Parameters: 82M | |
| - Layers: 6 transformer blocks | |
| - Hidden size: 768 | |
| - Attention heads: 12 | |
| - Vocabulary: 50,257 tokens | |
| """) | |
| st.markdown('</div>', unsafe_allow_html=True) | |
| st.markdown('<div class="metric-card">', unsafe_allow_html=True) | |
| st.markdown("### 🔧 LoRA Configuration") | |
| st.markdown(""" | |
| ```python | |
| LoraConfig( | |
| r=16, # Rank | |
| lora_alpha=32, # Scaling | |
| target_modules=["c_attn"], # Attention only | |
| lora_dropout=0.05, | |
| task_type="CAUSAL_LM" | |
| ) | |
| ``` | |
| **Trainable Parameters**: 294,912 (0.36%) | |
| **Adapter Size**: ~3 MB | |
| """) | |
| st.markdown('</div>', unsafe_allow_html=True) | |
| with col2: | |
| st.markdown('<div class="metric-card">', unsafe_allow_html=True) | |
| st.markdown("### 📊 Dataset") | |
| st.markdown(""" | |
| **Name**: Python Code Instructions (18k Alpaca) | |
| **Source**: `iamtarun/python_code_instructions_18k_alpaca` | |
| **Used**: 5000 samples | |
| - Training: 4500 samples | |
| - Validation: 500 samples | |
| **Format**: | |
| ``` | |
| Instruction: Write Python code for X | |
| Code: def function()... | |
| ``` | |
| """) | |
| st.markdown('</div>', unsafe_allow_html=True) | |
| st.markdown('<div class="metric-card">', unsafe_allow_html=True) | |
| st.markdown("### 🏋️ Training Hyperparameters") | |
| st.markdown(""" | |
| ```python | |
| Epochs: 4 | |
| Batch size: 2 (per device) | |
| Gradient accumulation: 4 | |
| Learning rate: 3e-4 | |
| Max sequence length: 512 | |
| Optimizer: AdamW | |
| Scheduler: Linear warmup | |
| ``` | |
| **Training Time**: ~30 minutes (T4 GPU) | |
| **Final Loss**: ~2.5 | |
| """) | |
| st.markdown('</div>', unsafe_allow_html=True) | |
| st.divider() | |
| st.markdown("### 🛠️ Tools & Libraries Used") | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| st.markdown(""" | |
| **Training**: | |
| - 🤗 Transformers | |
| - 🎯 PEFT (LoRA) | |
| - 🚀 Accelerate | |
| - 📊 Datasets | |
| - 🔥 PyTorch | |
| """) | |
| with col2: | |
| st.markdown(""" | |
| **Deployment**: | |
| - 🌐 Streamlit | |
| - 🤗 Hugging Face Hub | |
| - ⚡ bitsandbytes (quantization) | |
| - 💾 safetensors | |
| """) | |
| with col3: | |
| st.markdown(""" | |
| **Infrastructure**: | |
| - 📓 Google Colab (training) | |
| - 💻 Local deployment | |
| - ☁️ Hugging Face Spaces (optional) | |
| - 🔒 Git LFS (model versioning) | |
| """) | |
| # ============================================================================= | |
| # PAGE 4: DEPLOYMENT INFO | |
| # ============================================================================= | |
| else: # Deployment Info | |
| st.header("🚀 Deployment Options") | |
| tab1, tab2, tab3 = st.tabs(["💻 Local", "☁️ Cloud", "📊 Comparison"]) | |
| with tab1: | |
| st.markdown('<div class="theory-box">', unsafe_allow_html=True) | |
| st.markdown("### 💻 Local Deployment (Current)") | |
| st.markdown(""" | |
| **Advantages**: | |
| - ✅ Full control | |
| - ✅ No API costs | |
| - ✅ Data privacy | |
| - ✅ Works offline | |
| - ✅ Fast iteration | |
| **Requirements**: | |
| - Python 3.8+ | |
| - 2-4 GB RAM | |
| - Optional: NVIDIA GPU | |
| **Setup**: | |
| ```bash | |
| pip install streamlit transformers peft torch | |
| streamlit run app.py | |
| ``` | |
| **Best for**: Development, testing, demos | |
| """) | |
| st.markdown('</div>', unsafe_allow_html=True) | |
| with tab2: | |
| st.markdown('<div class="theory-box">', unsafe_allow_html=True) | |
| st.markdown("### ☁️ Cloud Deployment") | |
| st.markdown("#### 🤗 Hugging Face Spaces (Recommended)") | |
| st.markdown(""" | |
| **Features**: | |
| - ✅ Free tier available | |
| - ✅ Auto-deploys from Git | |
| - ✅ Public URL | |
| - ✅ No server management | |
| - ✅ Built-in CI/CD | |
| **Setup**: | |
| 1. Create account on huggingface.co | |
| 2. Create new Space (Streamlit) | |
| 3. Upload: app.py, requirements.txt, models/ | |
| 4. Auto-deploys! | |
| **URL**: `https://huggingface.co/spaces/YOUR_USERNAME/lora-demo` | |
| """) | |
| st.divider() | |
| st.markdown("#### Other Options") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.markdown(""" | |
| **Streamlit Cloud**: | |
| - Free for public apps | |
| - GitHub integration | |
| - Easy deployment | |
| - Resource limits | |
| """) | |
| with col2: | |
| st.markdown(""" | |
| **AWS/GCP/Azure**: | |
| - Full control | |
| - Scalable | |
| - More expensive | |
| - Requires devops | |
| """) | |
| st.markdown('</div>', unsafe_allow_html=True) | |
| with tab3: | |
| st.markdown('<div class="theory-box">', unsafe_allow_html=True) | |
| st.markdown("### 📊 Deployment Comparison") | |
| comparison_data = { | |
| "Feature": ["Cost", "Setup Time", "Control", "Scalability", "Maintenance", "Best For"], | |
| "Local": ["Free", "5 mins", "Full", "Limited", "Manual", "Development"], | |
| "HF Spaces": ["Free", "10 mins", "Medium", "Auto", "Minimal", "Demos"], | |
| "Cloud (AWS)": ["$$$", "1-2 hours", "Full", "High", "Manual", "Production"] | |
| } | |
| st.table(comparison_data) | |
| st.divider() | |
| st.markdown("### 🎯 CPU vs GPU Inference") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.markdown(""" | |
| **CPU Inference**: | |
| - Speed: 2-5 seconds/response | |
| - Cost: $0 (uses existing hardware) | |
| - Memory: ~2 GB RAM | |
| - Best for: Low-traffic apps, development | |
| """) | |
| with col2: | |
| st.markdown(""" | |
| **GPU Inference**: | |
| - Speed: 0.5-2 seconds/response | |
| - Cost: $0.50-2/hour (cloud) | |
| - Memory: ~4-8 GB VRAM | |
| - Best for: High-traffic, real-time apps | |
| """) | |
| st.info("💡 **Tip**: Start with CPU deployment, upgrade to GPU only if needed!") | |
| st.markdown('</div>', unsafe_allow_html=True) | |
| # Footer | |
| st.divider() | |
| st.markdown(""" | |
| <div style="text-align: center; color: #666; padding: 1rem;"> | |
| <p><strong>🎓 Group 6: Model Adaptation, Efficient Fine-Tuning & Deployment of LLMs</strong></p> | |
| <p>Built with Streamlit • Transformers • PEFT • PyTorch</p> | |
| </div> | |
| """, unsafe_allow_html=True) |