Spaces:

Nihal2000
/

autoSLM

Sleeping

App Files Files Community

Nihal2000 commited on Aug 10, 2025

Commit

b47fdff

1 Parent(s): 2475af3

Initial commit

Browse files

Files changed (4) hide show

app.py +190 -5
inference_engine.py +132 -0
model_manager.py +148 -0
requirements.txt +7 -0

app.py CHANGED Viewed

@@ -1,7 +1,192 @@
-import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()

+import streamlit as st
+import os
+import time
+from model_manager import ModelManager
+from inference_engine import InferenceEngine
+import torch
+# Page configuration
+st.set_page_config(
+    page_title="Automotive SLM Chatbot",
+    page_icon="🚗",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+# Custom CSS
+st.markdown("""
+<style>
+.main-header {
+    font-size: 2.5rem;
+    color: #1f77b4;
+    text-align: center;
+    margin-bottom: 2rem;
+}
+.chat-message {
+    padding: 1rem;
+    border-radius: 0.5rem;
+    margin: 0.5rem 0;
+}
+.user-message {
+    background-color: #e3f2fd;
+    border-left: 4px solid #1976d2;
+}
+.assistant-message {
+    background-color: #f3e5f5;
+    border-left: 4px solid #7b1fa2;
+}
+.model-info {
+    background-color: #f5f5f5;
+    padding: 1rem;
+    border-radius: 0.5rem;
+    border: 1px solid #ddd;
+}
+</style>
+""", unsafe_allow_html=True)
+@st.cache_resource
+def load_model_manager():
+    """Cache the model manager to avoid reloading"""
+    return ModelManager("assets")
+def initialize_session_state():
+    """Initialize session state variables"""
+    if "messages" not in st.session_state:
+        st.session_state.messages = []
+    if "current_model" not in st.session_state:
+        st.session_state.current_model = None
+    if "inference_engine" not in st.session_state:
+        st.session_state.inference_engine = None
+def display_chat_message(role, content, model_info=None):
+    """Display a chat message with proper styling"""
+    if role == "user":
+        st.markdown(f"""
+        <div class="chat-message user-message">
+            <strong>You:</strong> {content}
+        </div>
+        """, unsafe_allow_html=True)
+    else:
+        model_text = f" ({model_info})" if model_info else ""
+        st.markdown(f"""
+        <div class="chat-message assistant-message">
+            <strong>Assistant{model_text}:</strong> {content}
+        </div>
+        """, unsafe_allow_html=True)
+def main():
+    # Initialize session state
+    initialize_session_state()
+    # Header
+    st.markdown('<h1 class="main-header">🚗 Automotive SLM Chatbot</h1>', unsafe_allow_html=True)
+    # Load model manager
+    model_manager = load_model_manager()
+    # Sidebar for model selection and settings
+    with st.sidebar:
+        st.header("⚙️ Model Settings")
+        # Model selection
+        available_models = model_manager.get_available_models()
+        if available_models:
+            selected_model = st.selectbox(
+                "Select Model:",
+                available_models,
+                index=0 if st.session_state.current_model is None else available_models.index(st.session_state.current_model) if st.session_state.current_model in available_models else 0
+            )
+            # Load model if changed
+            if selected_model != st.session_state.current_model:
+                with st.spinner(f"Loading {selected_model}..."):
+                    model, tokenizer, config = model_manager.load_model(selected_model)
+                    st.session_state.inference_engine = InferenceEngine(model, tokenizer, config)
+                    st.session_state.current_model = selected_model
+                st.success(f"Model {selected_model} loaded successfully!")
+        else:
+            st.error("No models found in assets folder!")
+            st.stop()
+        # Model information
+        if st.session_state.inference_engine:
+            st.subheader("📊 Model Info")
+            model_info = model_manager.get_model_info(selected_model)
+            st.markdown(f"""
+            <div class="model-info">
+                <strong>Model:</strong> {model_info['name']}<br>
+                <strong>Type:</strong> {model_info['type']}<br>
+                <strong>Parameters:</strong> {model_info['parameters']}<br>
+                <strong>Size:</strong> {model_info['size']}
+            </div>
+            """, unsafe_allow_html=True)
+        # Generation settings
+        st.subheader("🎛️ Generation Settings")
+        max_tokens = st.slider("Max Tokens", 10, 200, 50)
+        temperature = st.slider("Temperature", 0.1, 2.0, 0.8, 0.1)
+        top_p = st.slider("Top P", 0.1, 1.0, 0.9, 0.05)
+        top_k = st.slider("Top K", 1, 100, 50)
+        # Clear chat button
+        if st.button("🗑️ Clear Chat"):
+            st.session_state.messages = []
+            st.rerun()
+    # Main chat interface
+    if st.session_state.inference_engine is None:
+        st.info("Please select a model from the sidebar to start chatting.")
+        return
+    # Display chat history
+    chat_container = st.container()
+    with chat_container:
+        for message in st.session_state.messages:
+            display_chat_message(
+                message["role"],
+                message["content"],
+                message.get("model", None)
+            )
+    # Chat input
+    prompt = st.chat_input("Ask me about automotive topics...")
+    if prompt:
+        # Add user message
+        st.session_state.messages.append({"role": "user", "content": prompt})
+        # Display user message
+        with chat_container:
+            display_chat_message("user", prompt)
+        # Generate response
+        with st.spinner("Generating response..."):
+            try:
+                response = st.session_state.inference_engine.generate_response(
+                    prompt,
+                    max_tokens=max_tokens,
+                    temperature=temperature,
+                    top_p=top_p,
+                    top_k=top_k
+                )
+                # Add assistant message
+                st.session_state.messages.append({
+                    "role": "assistant",
+                    "content": response,
+                    "model": selected_model
+                })
+                # Display assistant message
+                with chat_container:
+                    display_chat_message("assistant", response, selected_model)
+            except Exception as e:
+                st.error(f"Error generating response: {str(e)}")
+    # Footer
+    st.markdown("---")
+    st.markdown("*Powered by Automotive SLM - Specialized for automotive assistance*")
+if __name__ == "__main__":
+    main()

inference_engine.py ADDED Viewed

	@@ -0,0 +1,132 @@

+import torch
+import torch.nn.functional as F
+import numpy as np
+import onnxruntime as ort
+from typing import Union, Any
+import time
+class InferenceEngine:
+    def __init__(self, model: Any, tokenizer: Any, config: Any):
+        self.model = model
+        self.tokenizer = tokenizer
+        self.config = config
+        self.is_onnx = isinstance(model, ort.InferenceSession)
+        self.device = torch.device('cpu')  # Force CPU for edge deployment
+    def generate_response(self, prompt: str, max_tokens: int = 50, temperature: float = 0.8,
+                         top_p: float = 0.9, top_k: int = 50) -> str:
+        """Generate response from the model"""
+        try:
+            if self.is_onnx:
+                return self._generate_onnx(prompt, max_tokens, temperature, top_p, top_k)
+            else:
+                return self._generate_pytorch(prompt, max_tokens, temperature, top_p, top_k)
+        except Exception as e:
+            return f"Error generating response: {str(e)}"
+    def _generate_pytorch(self, prompt: str, max_tokens: int, temperature: float,
+                         top_p: float, top_k: int) -> str:
+        """Generate response using PyTorch model"""
+        # Tokenize input
+        inputs = self.tokenizer(prompt, return_tensors="pt", max_length=200, truncation=True)
+        input_ids = inputs['input_ids']
+        # Generate with the model
+        with torch.no_grad():
+            generated = self.model.generate(
+                input_ids=input_ids,
+                max_new_tokens=max_tokens,
+                temperature=temperature,
+                top_p=top_p,
+                top_k=top_k,
+                do_sample=True,
+                eos_token_id=self.tokenizer.eos_token_id,
+                pad_token_id=self.tokenizer.pad_token_id
+            )
+        # Decode response
+        response = self.tokenizer.decode(generated[0], skip_special_tokens=True)
+        # Remove the original prompt from response
+        if response.startswith(prompt):
+            response = response[len(prompt):].strip()
+        return response if response else "I'm sorry, I couldn't generate a response."
+    def _generate_onnx(self, prompt: str, max_tokens: int, temperature: float,
+                      top_p: float, top_k: int) -> str:
+        """Generate response using ONNX model"""
+        # Tokenize input
+        tokens = self.tokenizer.encode(prompt)
+        input_ids = np.array([tokens], dtype=np.int64)
+        generated_tokens = []
+        for _ in range(max_tokens):
+            # ONNX inference
+            outputs = self.model.run(
+                [self.model.get_outputs()[0].name],
+                {self.model.get_inputs()[0].name: input_ids}
+            )
+            logits = outputs[0][0, -1, :]  # Get last token logits
+            # Apply temperature
+            if temperature > 0:
+                logits = logits / temperature
+            # Apply top-k filtering
+            if top_k > 0:
+                top_k_indices = np.argpartition(logits, -top_k)[-top_k:]
+                filtered_logits = np.full_like(logits, -float('inf'))
+                filtered_logits[top_k_indices] = logits[top_k_indices]
+                logits = filtered_logits
+            # Convert to probabilities
+            probs = self._softmax(logits)
+            # Apply top-p filtering
+            if top_p < 1.0:
+                probs = self._top_p_filtering(probs, top_p)
+            # Sample next token
+            next_token = np.random.choice(len(probs), p=probs)
+            # Check for end of sequence
+            if next_token == self.tokenizer.eos_token_id:
+                break
+            generated_tokens.append(next_token)
+            # Update input_ids for next iteration
+            input_ids = np.concatenate([input_ids, [[next_token]]], axis=1)
+        # Decode generated tokens
+        if generated_tokens:
+            response = self.tokenizer.decode(generated_tokens, skip_special_tokens=True)
+            return response.strip()
+        else:
+            return "I'm sorry, I couldn't generate a response."
+    def _softmax(self, x: np.ndarray) -> np.ndarray:
+        """Compute softmax"""
+        exp_x = np.exp(x - np.max(x))
+        return exp_x / np.sum(exp_x)
+    def _top_p_filtering(self, probs: np.ndarray, top_p: float) -> np.ndarray:
+        """Apply top-p (nucleus) filtering"""
+        sorted_indices = np.argsort(probs)[::-1]
+        sorted_probs = probs[sorted_indices]
+        cumsum_probs = np.cumsum(sorted_probs)
+        # Find cutoff
+        cutoff_idx = np.searchsorted(cumsum_probs, top_p) + 1
+        # Zero out probabilities beyond cutoff
+        filtered_probs = np.zeros_like(probs)
+        filtered_probs[sorted_indices[:cutoff_idx]] = sorted_probs[:cutoff_idx]
+        # Renormalize
+        filtered_probs = filtered_probs / np.sum(filtered_probs)
+        return filtered_probs

model_manager.py ADDED Viewed

	@@ -0,0 +1,148 @@

+import os
+import torch
+import json
+from transformers import AutoTokenizer
+from dataclasses import dataclass
+from typing import Dict, List, Tuple, Any
+import onnxruntime as ort
+import numpy as np
+@dataclass
+class AutomotiveSLMConfig:
+    model_name: str = "Automotive-SLM-Edge-3M"
+    d_model: int = 256
+    n_layer: int = 4
+    n_head: int = 4
+    vocab_size: int = 50257  # GPT2 tokenizer vocab size
+    n_positions: int = 256
+    use_moe: bool = True
+    n_experts: int = 4
+    expert_capacity: int = 2
+    moe_intermediate_size: int = 384
+    router_aux_loss_coef: float = 0.01
+    rotary_dim: int = 64
+    rope_base: float = 10000
+    dropout: float = 0.05
+    layer_norm_epsilon: float = 1e-5
+    max_gen_length: int = 50
+    temperature: float = 0.8
+    top_p: float = 0.9
+    top_k: int = 50
+    repetition_penalty: float = 1.1
+class ModelManager:
+    def __init__(self, assets_path: str):
+        self.assets_path = assets_path
+        self.models_cache = {}
+        self.supported_extensions = ['.pt', '.pth', '.onnx']
+        # Ensure assets directory exists
+        if not os.path.exists(assets_path):
+            os.makedirs(assets_path)
+            print(f"Created assets directory: {assets_path}")
+    def get_available_models(self) -> List[str]:
+        """Get list of available models in assets folder"""
+        models = []
+        if not os.path.exists(self.assets_path):
+            return models
+        for file in os.listdir(self.assets_path):
+            name, ext = os.path.splitext(file)
+            if ext.lower() in self.supported_extensions:
+                models.append(file)
+        return sorted(models)
+    def get_model_info(self, model_name: str) -> Dict[str, str]:
+        """Get model information"""
+        model_path = os.path.join(self.assets_path, model_name)
+        if not os.path.exists(model_path):
+            return {"error": "Model not found"}
+        # Get file size
+        size_bytes = os.path.getsize(model_path)
+        size_mb = size_bytes / (1024 * 1024)
+        # Determine model type
+        ext = os.path.splitext(model_name)[1].lower()
+        model_type = "PyTorch" if ext in ['.pt', '.pth'] else "ONNX"
+        # Estimate parameters (rough calculation)
+        if "int8" in model_name.lower():
+            parameters = "~17M (Quantized)"
+        else:
+            parameters = "~17M"
+        return {
+            "name": model_name,
+            "type": model_type,
+            "parameters": parameters,
+            "size": f"{size_mb:.1f} MB"
+        }
+    def load_model(self, model_name: str) -> Tuple[Any, Any, AutomotiveSLMConfig]:
+        """Load model, tokenizer, and config"""
+        if model_name in self.models_cache:
+            return self.models_cache[model_name]
+        model_path = os.path.join(self.assets_path, model_name)
+        if not os.path.exists(model_path):
+            raise FileNotFoundError(f"Model file not found: {model_path}")
+        # Load tokenizer (always GPT-2 for our models)
+        tokenizer = AutoTokenizer.from_pretrained("gpt2")
+        if tokenizer.pad_token is None:
+            tokenizer.pad_token = tokenizer.eos_token
+        # Load model based on extension
+        ext = os.path.splitext(model_name)[1].lower()
+        if ext in ['.pt', '.pth']:
+            model, config = self._load_pytorch_model(model_path)
+        elif ext == '.onnx':
+            model, config = self._load_onnx_model(model_path)
+        else:
+            raise ValueError(f"Unsupported model format: {ext}")
+        # Cache the loaded model
+        self.models_cache[model_name] = (model, tokenizer, config)
+        return model, tokenizer, config
+    def _load_pytorch_model(self, model_path: str) -> Tuple[Any, AutomotiveSLMConfig]:
+        """Load PyTorch model"""
+        # Import the model architecture (you'll need to include this)
+        from model_architecture import AutomotiveSLM
+        # Load checkpoint
+        checkpoint = torch.load(model_path, map_location='cpu')
+        # Load config
+        if 'config' in checkpoint:
+            config = AutomotiveSLMConfig(**checkpoint['config'])
+        else:
+            config = AutomotiveSLMConfig()  # Use default config
+        # Create and load model
+        model = AutomotiveSLM(config)
+        model.load_state_dict(checkpoint['model_state_dict'])
+        model.eval()
+        return model, config
+    def _load_onnx_model(self, model_path: str) -> Tuple[Any, AutomotiveSLMConfig]:
+        """Load ONNX model"""
+        # Create ONNX session
+        providers = ['CPUExecutionProvider']
+        sess_options = ort.SessionOptions()
+        sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
+        session = ort.InferenceSession(model_path, providers=providers, sess_options=sess_options)
+        # Use default config for ONNX models
+        config = AutomotiveSLMConfig()
+        return session, config

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+streamlit>=1.28.0
+torch>=2.0.0
+transformers>=4.30.0
+onnxruntime>=1.15.0
+numpy>=1.24.0
+pandas>=2.0.0
+pillow>=9.5.0