import re
import json
from typing import List, Dict
from pathlib import Path
from simulation.manufacturer_db import ManufacturerDB

class EntityExtractor:
    def __init__(self, db: ManufacturerDB):
        self.db = db
        self.aliases = self._load_aliases()
        
        # Form keywords that indicate a medicine nearby
        self.form_keywords = {
            'tablet': ['tablet', 'tab', 'tabs', 'capsule', 'cap', 'caps'],
            'syrup': ['syrup', 'liquid', 'suspension'],
            'injection': ['injection', 'inj', 'vial', 'ampoule'],
            'cream': ['cream', 'gel', 'ointment', 'tube'],
            'spray': ['spray', 'inhaler', 'puff'],
            'drops': ['drops', 'eye drops', 'ear drops'],
            'sachet': ['sachet', 'powder', 'granules']
        }
        
        # Unit keywords for quantity extraction
        self.unit_keywords = ['strips', 'strip', 'slips', 'slip', 'bottles', 'bottle', 
                              'tablets', 'tabs', 'pieces', 'pcs', 'boxes', 'box', 
                              'packs', 'pack', 'vials', 'vial', 'ampoules']
        
        # Spoken number mapping
        self.spoken_numbers = {
            'one': 1, 'two': 2, 'three': 3, 'four': 4, 'five': 5,
            'six': 6, 'seven': 7, 'eight': 8, 'nine': 9, 'ten': 10,
            'eleven': 11, 'twelve': 12, 'fifteen': 15, 'twenty': 20,
            'twenty-five': 25, 'thirty': 30, 'forty': 40, 'fifty': 50,
            'sixty': 60, 'seventy': 70, 'eighty': 80, 'ninety': 90,
            'hundred': 100, 'two hundred': 200, 'three hundred': 300,
            'five hundred': 500, 'thousand': 1000
        }
        
    def _load_aliases(self) -> Dict:
        """Load pronunciation aliases from JSON file."""
        alias_path = Path("data/aliases.json")
        if alias_path.exists():
            with open(alias_path, 'r') as f:
                return json.load(f)
        return {}
    
    def _normalize_text(self, text: str) -> str:
        """Normalize input text for parsing."""
        text = text.lower()
        # Remove common ASR artifacts
        text = re.sub(r'</s>|<unk>|<s>', '', text)
        # Remove filler words
        text = re.sub(r'\b(uh|um|like|maybe|please|kindly)\b', '', text)
        # Normalize punctuation
        text = text.replace(",", " , ").replace(".", " ")
        # Convert spoken numbers to digits
        for word, num in self.spoken_numbers.items():
            text = re.sub(rf'\b{word}\b', str(num), text)
        return text.strip()
    
    def _resolve_alias(self, word: str) -> str:
        """Check if word is an alias for a known medicine."""
        word_lower = word.lower()
        for canonical, aliases in self.aliases.items():
            if word_lower in aliases or word_lower == canonical:
                return canonical
        return word
    
    def _extract_form(self, segment: str) -> str:
        """Extract form type from segment."""
        segment_lower = segment.lower()
        for form_type, keywords in self.form_keywords.items():
            for kw in keywords:
                if kw in segment_lower:
                    return form_type
        return "tablet"  # Default
    
    def _extract_quantity(self, segment: str) -> tuple:
        """Extract quantity and unit from segment."""
        # Pattern 1: Number followed by unit word
        # e.g., "300 strips", "20 bottles"
        qty_pattern = r'(\d+)\s*(' + '|'.join(self.unit_keywords) + r')?'
        match = re.search(qty_pattern, segment, re.IGNORECASE)
        
        if match:
            num = match.group(1)
            unit = match.group(2) if match.group(2) else "units"
            # Normalize common typos
            if unit in ['slips', 'slip']:
                unit = 'strips'
            return num, unit
        
        return "1", "units"  # Default
    
    def _extract_dosage(self, segment: str) -> str:
        """Extract dosage from segment."""
        # Pattern: Number followed by mg/ml/gm
        dosage_match = re.search(r'(\d+)\s*(mg|ml|gm|mcg)', segment, re.IGNORECASE)
        if dosage_match:
            return f"{dosage_match.group(1)}{dosage_match.group(2)}"
        return "-"
        
    def extract(self, text: str) -> List[Dict]:
        """
        Extract medicine entities from text.
        Returns: List of dicts {'medicine': str, 'form': str, 'quantity': str, 'dosage': str}
        """
        if not text:
            return []
            
        # Normalize text
        text = self._normalize_text(text)
        
        found_orders = []
        
        # Get all known medicines from DB for matching
        known_meds = self.db.medicines['medicine_name'].tolist()
        
        # Split by multiple delimiters for multi-item orders
        # Handles: "send", "order", "add", "also", "plus", "then", "and", comma
        delimiters = r'\b(?:send|add|want|need|order|also|plus|then)\b|,|\band\b'
        segments = re.split(delimiters, text)
        
        for segment in segments:
            segment = segment.strip()
            if not segment or len(segment) < 3:
                continue
            
            # Try to find a medicine match in this segment
            from rapidfuzz import process, fuzz
            
            # First, check if any word is a known alias
            words = segment.split()
            resolved_segment = ' '.join([self._resolve_alias(w) for w in words])
            
            # Fuzzy match against known medicines
            match = process.extractOne(resolved_segment, known_meds, scorer=fuzz.partial_ratio)
            
            if match and match[1] > 75:  # Confidence threshold
                med_name = match[0]
                
                # Extract form, quantity, dosage
                form = self._extract_form(segment)
                num, unit = self._extract_quantity(segment)
                quantity = f"{num} {unit}"
                
                dosage = self._extract_dosage(segment)
                if dosage == "-":
                    # Lookup default dosage from DB
                    med_row = self.db.medicines[self.db.medicines['medicine_name'] == med_name].iloc[0]
                    dosage = med_row['dosage']
                
                found_orders.append({
                    "medicine": med_name,
                    "form": form,
                    "quantity": quantity,
                    "dosage": dosage,
                    "confidence": match[1],
                    "original_segment": segment.strip()
                })
        
        return found_orders