vasu24
/

groot-inosh-v3

+"""
+GROOT v3 - Hugging Face Inference Handler
+This file defines how to load and run the model on HF Inference Endpoints
+"""
+from typing import Dict, List, Any
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+from peft import PeftModel
+class EndpointHandler:
+    def __init__(self, path=""):
+        """
+        Initialize the model and tokenizer
+        path: Path to the model files (HF will provide this)
+        """
+        # Load base model
+        base_model_name = "unsloth/Llama-3.2-1B-Instruct"
+        print(f"Loading tokenizer from {base_model_name}...")
+        self.tokenizer = AutoTokenizer.from_pretrained(base_model_name)
+        print(f"Loading base model from {base_model_name}...")
+        self.model = AutoModelForCausalLM.from_pretrained(
+            base_model_name,
+            torch_dtype=torch.float16,
+            device_map="auto",
+            load_in_4bit=True,  # Use 4-bit quantization for faster loading
+        )
+        # Load LoRA adapter from the uploaded files
+        print(f"Loading LoRA adapter from {path}...")
+        self.model = PeftModel.from_pretrained(
+            self.model,
+            path,
+        )
+        # Merge for faster inference
+        print("Merging adapter with base model...")
+        self.model = self.model.merge_and_unload()
+        self.model.eval()
+        print("GROOT v3 loaded successfully!")
+    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, str]]:
+        """
+        Handle inference requests
+        Args:
+            data: {"inputs": "User message here"}
+        Returns:
+            [{"generated_text": "Response here"}]
+        """
+        # Extract user message
+        inputs = data.pop("inputs", data)
+        user_message = inputs if isinstance(inputs, str) else inputs.get("message", "")
+        # System prompt (matches training)
+        system_prompt = """You are GROOT, an AI kitchen assistant for iNosh - a smart pantry and meal planning app.
+Target Market: Australia & New Zealand (Multicultural)
+Your capabilities:
+- Manage pantry items (expiry tracking, low stock alerts, barcode scanning)
+- Create shopping lists (store-specific: Woolworths, Coles, Countdown, etc.)
+- Suggest recipes (15 cuisines, nutrition-focused, dietary restrictions)
+- Plan meals (weekly, budget-aware, nutrition-optimized)
+- Track fitness (Apple Health, Google Fit integration)
+- Log restaurant meals (AU/NZ chains)
+- Scan barcodes (instant nutrition lookup)
+- Plan kids meals (school lunch requirements)
+CRITICAL RESPONSE RULES:
+1. For action requests, respond with valid JSON
+2. For general conversation, respond naturally without JSON
+3. Always respect dietary restrictions (no pork in halal, no meat in vegan, etc.)
+4. Use metric units (g, kg, ml, L) - AU/NZ standard
+5. Price estimates in AUD/NZD
+6. Include nutrition data when relevant (calories, protein, carbs, fat)
+7. Suggest recipes from available pantry items when possible
+JSON Action Formats:
+- Pantry: {"action": "add_pantry", "item": {...}}
+- Shopping: {"action": "create_list", "list": {...}}
+- Recipes: {"action": "suggest_recipes", "recipes": [...]}
+- Meal Plan: {"action": "create_meal_plan", "plan": {...}}
+- Fitness: {"action": "log_workout", "workout": {...}}
+- Restaurant: {"action": "log_restaurant", "meal": {...}}
+- Barcode: {"action": "lookup_barcode", "product": {...}}
+Tone: Professional and helpful. Provide clear, concise responses."""
+        # Format using Llama chat template
+        messages = [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": user_message},
+        ]
+        prompt = self.tokenizer.apply_chat_template(
+            messages,
+            tokenize=False,
+            add_generation_prompt=True,
+        )
+        # Tokenize
+        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
+        # Generate
+        with torch.no_grad():
+            outputs = self.model.generate(
+                **inputs,
+                max_new_tokens=500,
+                temperature=0.7,
+                do_sample=True,
+                pad_token_id=self.tokenizer.eos_token_id,
+            )
+        # Decode
+        full_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+        # Extract assistant response (after the prompt)
+        assistant_response = full_response[len(prompt):].strip()
+        return [{"generated_text": assistant_response}]