Spaces:

Lubabah0
/

lora-code-generation-demo

Build error

App Files Files Community

Lubabah0 commited on Dec 22, 2025

Commit

adfb728

verified ·

1 Parent(s): 183a970

Upload 10 files

Browse files

Files changed (10) hide show

app.py +778 -0
models/lora_adapters/README.md +207 -0
models/lora_adapters/adapter_config.json +40 -0
models/lora_adapters/adapter_model.safetensors +3 -0
models/lora_adapters/merges.txt +0 -0
models/lora_adapters/special_tokens_map.json +6 -0
models/lora_adapters/tokenizer.json +0 -0
models/lora_adapters/tokenizer_config.json +21 -0
models/lora_adapters/vocab.json +0 -0
requirements.txt +8 -3

app.py ADDED Viewed

	@@ -0,0 +1,778 @@

+"""
+ULTIMATE LoRA Fine-Tuning Demo - Covers ALL Project Requirements
+Group 6: Model Adaptation, Efficient Fine-Tuning & Deployment of LLMs
+"""
+import streamlit as st
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+from peft import PeftModel
+import time
+import psutil
+import os
+# Page configuration
+st.set_page_config(
+    page_title="LoRA Fine-Tuning Complete Demo",
+    page_icon="🤖",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+# Custom CSS
+st.markdown("""
+<style>
+    .main-header {
+        font-size: 2.5rem;
+        font-weight: bold;
+        text-align: center;
+        background: linear-gradient(120deg, #1f77b4, #00cc88);
+        -webkit-background-clip: text;
+        -webkit-text-fill-color: transparent;
+        margin-bottom: 0.5rem;
+    }
+    .sub-header {
+        text-align: center;
+        color: #666;
+        margin-bottom: 2rem;
+        font-size: 1.1rem;
+    }
+    .metric-card {
+        background: #f0f2f6;
+        padding: 1rem;
+        border-radius: 10px;
+        border-left: 4px solid #1f77b4;
+    }
+    .model-box {
+        padding: 1.5rem;
+        border-radius: 10px;
+        margin: 1rem 0;
+        box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+    }
+    .base-model {
+        background-color: #fff5f5;
+        border-left: 4px solid #ff4b4b;
+    }
+    .finetuned-model {
+        background-color: #f0fff4;
+        border-left: 4px solid #00cc88;
+    }
+    .theory-box {
+        background: #e8f4f8;
+        padding: 1.5rem;
+        border-radius: 10px;
+        margin: 1rem 0;
+        border-left: 4px solid #1f77b4;
+    }
+</style>
+""", unsafe_allow_html=True)
+# Title
+st.markdown('<div class="main-header">🚀 Complete LoRA Fine-Tuning Demo</div>', unsafe_allow_html=True)
+st.markdown('<div class="sub-header">Parameter-Efficient Fine-Tuning & Deployment Showcase</div>',
+            unsafe_allow_html=True)
+# Sidebar Navigation
+with st.sidebar:
+    st.header("📚 Navigation")
+    page = st.radio(
+        "Select Section:",
+        ["🎯 Live Demo", "📊 Theory & Concepts", "⚙️ Technical Details", "🚀 Deployment Info"],
+        label_visibility="collapsed"
+    )
+    st.divider()
+    if page == "🎯 Live Demo":
+        st.header("⚙️ Model Settings")
+        device_option = st.selectbox(
+            "Inference Device",
+            ["Auto (GPU if available)", "Force CPU", "Force GPU"],
+            help="Compare CPU vs GPU inference speed"
+        )
+        use_quantization = st.checkbox(
+            "Use 8-bit Quantization",
+            value=False,
+            help="Reduces memory usage, slightly slower"
+        )
+        temperature = st.slider("Temperature", 0.1, 1.0, 0.3, 0.1)
+        max_length = st.slider("Max Length", 50, 400, 200, 10)
+        top_p = st.slider("Top P", 0.1, 1.0, 0.95, 0.05)
+        st.divider()
+        st.header("📊 Quick Stats")
+        col1, col2 = st.columns(2)
+        with col1:
+            st.metric("Base Model", "82M params")
+            st.metric("Adapter Size", "~3 MB")
+        with col2:
+            st.metric("Trainable", "0.4%")
+            st.metric("Training Time", "~30 min")
+# Cache model loading
+@st.cache_resource
+def load_models(use_quantization=False, device_option="Auto"):
+    """Load base model and fine-tuned model"""
+    base_model_name = "distilgpt2"
+    adapter_path = "./models/lora_adapters"
+    # Determine device
+    if device_option == "Force CPU":
+        device = "cpu"
+    elif device_option == "Force GPU":
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+    else:
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+    with st.spinner("🔄 Loading models..."):
+        # Load tokenizer
+        tokenizer = AutoTokenizer.from_pretrained(base_model_name)
+        tokenizer.pad_token = tokenizer.eos_token
+        # Quantization config
+        if use_quantization and device == "cuda":
+            quantization_config = BitsAndBytesConfig(
+                load_in_8bit=True,
+                llm_int8_threshold=6.0
+            )
+            base_model = AutoModelForCausalLM.from_pretrained(
+                base_model_name,
+                quantization_config=quantization_config,
+                device_map="auto"
+            )
+            finetuned_model = AutoModelForCausalLM.from_pretrained(
+                base_model_name,
+                quantization_config=quantization_config,
+                device_map="auto"
+            )
+            finetuned_model = PeftModel.from_pretrained(finetuned_model, adapter_path)
+        else:
+            # Standard loading
+            base_model = AutoModelForCausalLM.from_pretrained(base_model_name)
+            finetuned_model = AutoModelForCausalLM.from_pretrained(base_model_name)
+            finetuned_model = PeftModel.from_pretrained(finetuned_model, adapter_path)
+            base_model.to(device)
+            finetuned_model.to(device)
+    return tokenizer, base_model, finetuned_model, device
+def get_model_size_mb(model):
+    """Calculate model size in MB"""
+    param_size = sum(p.nelement() * p.element_size() for p in model.parameters())
+    buffer_size = sum(b.nelement() * b.element_size() for b in model.buffers())
+    return (param_size + buffer_size) / (1024 ** 2)
+def generate_response(model, tokenizer, prompt, device, temperature, max_length, top_p):
+    """Generate response from a model"""
+    formatted_input = f"### Instruction:\n{prompt}\n\n### Code:\n"
+    inputs = tokenizer(formatted_input, return_tensors="pt", padding=True)
+    inputs = {k: v.to(device) for k, v in inputs.items()}
+    with torch.no_grad():
+        outputs = model.generate(
+            **inputs,
+            max_length=max_length,
+            temperature=temperature,
+            top_p=top_p,
+            do_sample=True,
+            num_return_sequences=1,
+            pad_token_id=tokenizer.eos_token_id,
+            eos_token_id=tokenizer.eos_token_id
+        )
+    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return response
+# =============================================================================
+# PAGE 1: LIVE DEMO
+# =============================================================================
+if page == "🎯 Live Demo":
+    # Load models
+    try:
+        tokenizer, base_model, finetuned_model, device = load_models(
+            use_quantization=use_quantization if 'use_quantization' in dir() else False,
+            device_option=device_option if 'device_option' in dir() else "Auto"
+        )
+        # Show device info
+        device_emoji = "🚀" if device == "cuda" else "🐢"
+        if device == "cuda":
+            st.success(f"{device_emoji} Running on GPU: {torch.cuda.get_device_name(0)}")
+        else:
+            st.info(f"{device_emoji} Running on CPU (slower but works!)")
+        # Show quantization status
+        if use_quantization and device == "cuda":
+            st.info("⚡ 8-bit quantization enabled - Lower memory usage!")
+    except Exception as e:
+        st.error(f"❌ Error loading models: {str(e)}")
+        st.stop()
+    # Sample prompts
+    st.header("💬 Try the Demo")
+    sample_prompts = [
+        "Write a Python function to calculate factorial",
+        "Create a function to check if a string is palindrome",
+        "Write code to merge two sorted lists",
+        "Implement a function to find the largest element in a list",
+        "Create a Python function to check if a number is prime",
+        "Write code to reverse a linked list",
+        "Implement binary search algorithm in Python"
+    ]
+    col1, col2 = st.columns([3, 1])
+    with col1:
+        use_sample = st.selectbox("Select prompt or write custom:", ["Custom"] + sample_prompts)
+    with col2:
+        st.write("")
+        st.write("")
+    if use_sample == "Custom":
+        user_instruction = st.text_area(
+            "Enter your instruction:",
+            height=100,
+            placeholder="e.g., Write a Python function to sort a dictionary by values"
+        )
+    else:
+        user_instruction = use_sample
+        st.info(f"💡 Prompt: {user_instruction}")
+    # Generate button
+    if st.button("🚀 Generate Responses", type="primary", use_container_width=True):
+        if user_instruction.strip():
+            col_base, col_finetuned = st.columns(2)
+            with col_base:
+                st.markdown('<div class="model-box base-model">', unsafe_allow_html=True)
+                st.subheader("🔴 Base Model (Untrained)")
+                with st.spinner("Generating..."):
+                    start_time = time.time()
+                    base_response = generate_response(
+                        base_model, tokenizer, user_instruction, device,
+                        temperature, max_length, top_p
+                    )
+                    base_time = time.time() - start_time
+                st.code(base_response, language="python")
+                st.caption(f"⏱️ Generation time: {base_time:.3f}s")
+                st.markdown('</div>', unsafe_allow_html=True)
+            with col_finetuned:
+                st.markdown('<div class="model-box finetuned-model">', unsafe_allow_html=True)
+                st.subheader("🟢 Fine-tuned Model (+ LoRA)")
+                with st.spinner("Generating..."):
+                    start_time = time.time()
+                    finetuned_response = generate_response(
+                        finetuned_model, tokenizer, user_instruction, device,
+                        temperature, max_length, top_p
+                    )
+                    finetuned_time = time.time() - start_time
+                st.code(finetuned_response, language="python")
+                st.caption(f"⏱️ Generation time: {finetuned_time:.3f}s")
+                st.markdown('</div>', unsafe_allow_html=True)
+            # Performance Analysis
+            st.divider()
+            st.subheader("📊 Performance Analysis")
+            col1, col2, col3, col4 = st.columns(4)
+            with col1:
+                st.metric("Base Response", f"{len(base_response.split())} words")
+            with col2:
+                st.metric("Fine-tuned Response", f"{len(finetuned_response.split())} words")
+            with col3:
+                speed_diff = ((base_time - finetuned_time) / base_time) * 100
+                st.metric("Speed Difference", f"{speed_diff:+.1f}%")
+            with col4:
+                st.metric("Device", device.upper())
+            st.success("✅ Notice: Base model produces gibberish, fine-tuned generates actual Python code!")
+        else:
+            st.warning("⚠️ Please enter an instruction!")
+# =============================================================================
+# PAGE 2: THEORY & CONCEPTS
+# =============================================================================
+elif page == "📊 Theory & Concepts":
+    st.header("📚 Theory & Key Concepts")
+    tab1, tab2, tab3, tab4 = st.tabs([
+        "🎓 Pre-training vs Fine-tuning",
+        "🔧 LoRA & PEFT",
+        "⚡ Training vs Inference",
+        "📏 Trade-offs"
+    ])
+    with tab1:
+        st.markdown('<div class="theory-box">', unsafe_allow_html=True)
+        st.subheader("Pre-training vs Fine-tuning")
+        col1, col2 = st.columns(2)
+        with col1:
+            st.markdown("### 🏗️ Pre-training")
+            st.markdown("""
+            - **Task**: Learn general language understanding
+            - **Data**: Massive unlabeled text (billions of tokens)
+            - **Cost**: Extremely expensive ($$$$$)
+            - **Time**: Weeks to months
+            - **Example**: GPT, BERT, LLaMA training
+            - **Goal**: General purpose model
+            """)
+        with col2:
+            st.markdown("### 🎯 Fine-tuning")
+            st.markdown("""
+            - **Task**: Adapt to specific domain/task
+            - **Data**: Smaller labeled dataset (thousands)
+            - **Cost**: Much cheaper ($$)
+            - **Time**: Hours to days
+            - **Example**: Code generation, Q&A, summarization
+            - **Goal**: Specialized model
+            """)
+        st.divider()
+        st.markdown("### 📊 Our Project: Transfer Learning")
+        st.info("""
+        **We started with**: Pre-trained `distilgpt2` (general language model)
+        **We fine-tuned on**: Python code instructions (5000 samples)
+        **Result**: Model now generates Python code instead of general text!
+        This is **Transfer Learning** - leveraging pre-trained knowledge for new tasks.
+        """)
+        st.markdown('</div>', unsafe_allow_html=True)
+    with tab2:
+        st.markdown('<div class="theory-box">', unsafe_allow_html=True)
+        st.subheader("LoRA: Low-Rank Adaptation")
+        col1, col2 = st.columns([1, 1])
+        with col1:
+            st.markdown("### 🔴 Full Fine-tuning (Expensive)")
+            st.markdown("""
+            ```
+            Total Parameters: 82M
+            Trainable: 82M (100%)
+            Memory: High
+            Time: Long
+            GPU: Required (expensive)
+            Checkpoint: 320 MB
+            ```
+            **Problems**:
+            - ❌ Expensive GPUs needed
+            - ❌ Long training time
+            - ❌ Large model checkpoints
+            - ❌ Risk of catastrophic forgetting
+            """)
+        with col2:
+            st.markdown("### 🟢 LoRA Fine-tuning (Efficient)")
+            st.markdown("""
+            ```
+            Total Parameters: 82M
+            Trainable: 295K (0.36%)
+            Memory: Low
+            Time: Fast
+            GPU: Optional (Colab free tier OK)
+            Checkpoint: 3 MB
+            ```
+            **Advantages**:
+            - ✅ Train on free GPUs
+            - ✅ Fast training (~30 min)
+            - ✅ Tiny adapter files
+            - ✅ Preserve base model knowledge
+            """)
+        st.divider()
+        st.markdown("### 🧮 How LoRA Works")
+        st.markdown("""
+        Instead of updating all weights `W`, LoRA adds small adapter matrices:
+        ```
+        W_new = W_frozen + ΔW
+        where ΔW = B × A  (low-rank decomposition)
+        ```
+        **Our Configuration**:
+        - `r = 16` (rank - controls adapter capacity)
+        - `alpha = 32` (scaling factor)
+        - Target modules: Attention layers only
+        - Result: 99.6% fewer trainable parameters!
+        """)
+        st.markdown('</div>', unsafe_allow_html=True)
+    with tab3:
+        st.markdown('<div class="theory-box">', unsafe_allow_html=True)
+        st.subheader("Training vs Inference")
+        col1, col2 = st.columns(2)
+        with col1:
+            st.markdown("### 🏋️ Training Phase")
+            st.markdown("""
+            **What happens**:
+            - Forward pass through model
+            - Calculate loss (prediction error)
+            - Backward propagation (gradients)
+            - Update weights (only LoRA adapters)
+            **Requirements**:
+            - GPU highly recommended
+            - More memory needed
+            - Longer time
+            - Batch processing
+            **Our Training**:
+            - Dataset: 5000 Python code examples
+            - Time: ~30 minutes (Colab T4 GPU)
+            - Memory: ~8 GB VRAM
+            - Output: 3 MB adapter file
+            """)
+        with col2:
+            st.markdown("### 🚀 Inference Phase")
+            st.markdown("""
+            **What happens**:
+            - Load base model + adapters
+            - Forward pass only (no backprop)
+            - Generate predictions
+            - No weight updates
+            **Requirements**:
+            - CPU works (slower)
+            - GPU faster (optional)
+            - Less memory
+            - Real-time response
+            **Our Deployment**:
+            - Works on: CPU or GPU
+            - Load time: ~10-30 seconds
+            - Inference: ~1-3 seconds per response
+            - Memory: ~2 GB RAM
+            """)
+        st.markdown('</div>', unsafe_allow_html=True)
+    with tab4:
+        st.markdown('<div class="theory-box">', unsafe_allow_html=True)
+        st.subheader("Trade-offs & Optimization")
+        st.markdown("### ⚖️ Key Trade-offs")
+        col1, col2 = st.columns(2)
+        with col1:
+            st.markdown("#### 📏 Model Size vs Accuracy")
+            st.markdown("""
+            **Larger models**:
+            - ✅ Better accuracy
+            - ✅ More capacity
+            - ❌ Slower inference
+            - ❌ More memory
+            **Smaller models**:
+            - ✅ Faster inference
+            - ✅ Less memory
+            - ❌ Lower accuracy
+            - ❌ Less capacity
+            """)
+        with col2:
+            st.markdown("#### ⚡ Speed vs Quality")
+            st.markdown("""
+            **Higher quality**:
+            - More parameters
+            - Longer sequences
+            - Lower temperature
+            - ❌ Slower
+            **Higher speed**:
+            - Fewer parameters
+            - Shorter sequences
+            - Quantization
+            - ❌ Potentially lower quality
+            """)
+        st.divider()
+        st.markdown("### 🔢 Quantization")
+        st.markdown("""
+        **What**: Reduce precision of model weights (32-bit → 8-bit)
+        **Benefits**:
+        - 75% less memory usage
+        - Faster inference on some hardware
+        - Enables larger models on limited hardware
+        **Cost**:
+        - Slight accuracy loss (~1-2%)
+        - Requires calibration
+        **Try it**: Enable "8-bit quantization" in the sidebar on Demo page!
+        """)
+        st.markdown('</div>', unsafe_allow_html=True)
+# =============================================================================
+# PAGE 3: TECHNICAL DETAILS
+# =============================================================================
+elif page == "⚙️ Technical Details":
+    st.header("⚙️ Technical Implementation")
+    col1, col2 = st.columns(2)
+    with col1:
+        st.markdown('<div class="metric-card">', unsafe_allow_html=True)
+        st.markdown("### 📦 Model Architecture")
+        st.markdown("""
+        **Base Model**: distilgpt2
+        - Type: Causal Language Model
+        - Parameters: 82M
+        - Layers: 6 transformer blocks
+        - Hidden size: 768
+        - Attention heads: 12
+        - Vocabulary: 50,257 tokens
+        """)
+        st.markdown('</div>', unsafe_allow_html=True)
+        st.markdown('<div class="metric-card">', unsafe_allow_html=True)
+        st.markdown("### 🔧 LoRA Configuration")
+        st.markdown("""
+        ```python
+        LoraConfig(
+            r=16,                    # Rank
+            lora_alpha=32,           # Scaling
+            target_modules=["c_attn"], # Attention only
+            lora_dropout=0.05,
+            task_type="CAUSAL_LM"
+        )
+        ```
+        **Trainable Parameters**: 294,912 (0.36%)
+        **Adapter Size**: ~3 MB
+        """)
+        st.markdown('</div>', unsafe_allow_html=True)
+    with col2:
+        st.markdown('<div class="metric-card">', unsafe_allow_html=True)
+        st.markdown("### 📊 Dataset")
+        st.markdown("""
+        **Name**: Python Code Instructions (18k Alpaca)
+        **Source**: `iamtarun/python_code_instructions_18k_alpaca`
+        **Used**: 5000 samples
+        - Training: 4500 samples
+        - Validation: 500 samples
+        **Format**:
+        ```
+        Instruction: Write Python code for X
+        Code: def function()...
+        ```
+        """)
+        st.markdown('</div>', unsafe_allow_html=True)
+        st.markdown('<div class="metric-card">', unsafe_allow_html=True)
+        st.markdown("### 🏋️ Training Hyperparameters")
+        st.markdown("""
+        ```python
+        Epochs: 4
+        Batch size: 2 (per device)
+        Gradient accumulation: 4
+        Learning rate: 3e-4
+        Max sequence length: 512
+        Optimizer: AdamW
+        Scheduler: Linear warmup
+        ```
+        **Training Time**: ~30 minutes (T4 GPU)
+        **Final Loss**: ~2.5
+        """)
+        st.markdown('</div>', unsafe_allow_html=True)
+    st.divider()
+    st.markdown("### 🛠️ Tools & Libraries Used")
+    col1, col2, col3 = st.columns(3)
+    with col1:
+        st.markdown("""
+        **Training**:
+        - 🤗 Transformers
+        - 🎯 PEFT (LoRA)
+        - 🚀 Accelerate
+        - 📊 Datasets
+        - 🔥 PyTorch
+        """)
+    with col2:
+        st.markdown("""
+        **Deployment**:
+        - 🌐 Streamlit
+        - 🤗 Hugging Face Hub
+        - ⚡ bitsandbytes (quantization)
+        - 💾 safetensors
+        """)
+    with col3:
+        st.markdown("""
+        **Infrastructure**:
+        - 📓 Google Colab (training)
+        - 💻 Local deployment
+        - ☁️ Hugging Face Spaces (optional)
+        - 🔒 Git LFS (model versioning)
+        """)
+# =============================================================================
+# PAGE 4: DEPLOYMENT INFO
+# =============================================================================
+else:  # Deployment Info
+    st.header("🚀 Deployment Options")
+    tab1, tab2, tab3 = st.tabs(["💻 Local", "☁️ Cloud", "📊 Comparison"])
+    with tab1:
+        st.markdown('<div class="theory-box">', unsafe_allow_html=True)
+        st.markdown("### 💻 Local Deployment (Current)")
+        st.markdown("""
+        **Advantages**:
+        - ✅ Full control
+        - ✅ No API costs
+        - ✅ Data privacy
+        - ✅ Works offline
+        - ✅ Fast iteration
+        **Requirements**:
+        - Python 3.8+
+        - 2-4 GB RAM
+        - Optional: NVIDIA GPU
+        **Setup**:
+        ```bash
+        pip install streamlit transformers peft torch
+        streamlit run app.py
+        ```
+        **Best for**: Development, testing, demos
+        """)
+        st.markdown('</div>', unsafe_allow_html=True)
+    with tab2:
+        st.markdown('<div class="theory-box">', unsafe_allow_html=True)
+        st.markdown("### ☁️ Cloud Deployment")
+        st.markdown("#### 🤗 Hugging Face Spaces (Recommended)")
+        st.markdown("""
+        **Features**:
+        - ✅ Free tier available
+        - ✅ Auto-deploys from Git
+        - ✅ Public URL
+        - ✅ No server management
+        - ✅ Built-in CI/CD
+        **Setup**:
+        1. Create account on huggingface.co
+        2. Create new Space (Streamlit)
+        3. Upload: app.py, requirements.txt, models/
+        4. Auto-deploys!
+        **URL**: `https://huggingface.co/spaces/YOUR_USERNAME/lora-demo`
+        """)
+        st.divider()
+        st.markdown("#### Other Options")
+        col1, col2 = st.columns(2)
+        with col1:
+            st.markdown("""
+            **Streamlit Cloud**:
+            - Free for public apps
+            - GitHub integration
+            - Easy deployment
+            - Resource limits
+            """)
+        with col2:
+            st.markdown("""
+            **AWS/GCP/Azure**:
+            - Full control
+            - Scalable
+            - More expensive
+            - Requires devops
+            """)
+        st.markdown('</div>', unsafe_allow_html=True)
+    with tab3:
+        st.markdown('<div class="theory-box">', unsafe_allow_html=True)
+        st.markdown("### 📊 Deployment Comparison")
+        comparison_data = {
+            "Feature": ["Cost", "Setup Time", "Control", "Scalability", "Maintenance", "Best For"],
+            "Local": ["Free", "5 mins", "Full", "Limited", "Manual", "Development"],
+            "HF Spaces": ["Free", "10 mins", "Medium", "Auto", "Minimal", "Demos"],
+            "Cloud (AWS)": ["$$$", "1-2 hours", "Full", "High", "Manual", "Production"]
+        }
+        st.table(comparison_data)
+        st.divider()
+        st.markdown("### 🎯 CPU vs GPU Inference")
+        col1, col2 = st.columns(2)
+        with col1:
+            st.markdown("""
+            **CPU Inference**:
+            - Speed: 2-5 seconds/response
+            - Cost: $0 (uses existing hardware)
+            - Memory: ~2 GB RAM
+            - Best for: Low-traffic apps, development
+            """)
+        with col2:
+            st.markdown("""
+            **GPU Inference**:
+            - Speed: 0.5-2 seconds/response
+            - Cost: $0.50-2/hour (cloud)
+            - Memory: ~4-8 GB VRAM
+            - Best for: High-traffic, real-time apps
+            """)
+        st.info("💡 **Tip**: Start with CPU deployment, upgrade to GPU only if needed!")
+        st.markdown('</div>', unsafe_allow_html=True)
+# Footer
+st.divider()
+st.markdown("""
+<div style="text-align: center; color: #666; padding: 1rem;">
+    <p><strong>🎓 Group 6: Model Adaptation, Efficient Fine-Tuning & Deployment of LLMs</strong></p>
+    <p>Built with Streamlit • Transformers • PEFT • PyTorch</p>
+</div>
+""", unsafe_allow_html=True)

models/lora_adapters/README.md ADDED Viewed

	@@ -0,0 +1,207 @@

+---
+base_model: distilgpt2
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:distilgpt2
+- lora
+- transformers
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.18.0

models/lora_adapters/adapter_config.json ADDED Viewed

	@@ -0,0 +1,40 @@

+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "distilgpt2",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": true,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.18.0",
+  "qalora_group_size": 16,
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "c_attn"
+  ],
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

models/lora_adapters/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9aea375c6a93cbdfd692e73275df4493c92c3f8256e709682545d6b74ae8cff5
+size 1181192

models/lora_adapters/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

models/lora_adapters/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "bos_token": "<|endoftext|>",
+  "eos_token": "<|endoftext|>",
+  "pad_token": "<|endoftext|>",
+  "unk_token": "<|endoftext|>"
+}

models/lora_adapters/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

models/lora_adapters/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "50256": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|endoftext|>",
+  "extra_special_tokens": {},
+  "model_max_length": 1024,
+  "pad_token": "<|endoftext|>",
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>"
+}

models/lora_adapters/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt CHANGED Viewed

@@ -1,3 +1,8 @@
-altair
-pandas
-streamlit

+streamlit==1.29.0
+transformers==4.36.0
+torch==2.1.0
+peft==0.7.1
+accelerate==0.25.0
+bitsandbytes==0.41.0
+sentencepiece==0.1.99
+protobuf==3.20.3