"""
Document Processing for Case Analysis
Supports PDF, TXT, DOCX uploads
"""

import os
import tempfile
from typing import Dict, List, Optional
import PyPDF2
import docx

class DocumentProcessor:
    def __init__(self):
        self.supported_extensions = ['.pdf', '.txt', '.docx', '.doc']
    
    def process_uploaded_file(self, file_path: str, file_type: str = None) -> Dict:
        """
        Process uploaded document and extract text
        Returns: {
            "success": bool,
            "filename": str,
            "text": str,
            "word_count": int,
            "extracted_sections": Dict
        }
        """
        if not os.path.exists(file_path):
            return {"success": False, "error": "File not found"}
        
        try:
            # Determine file type
            if not file_type:
                _, ext = os.path.splitext(file_path)
                file_type = ext.lower()
            
            # Extract text based on file type
            text = ""
            if file_type == '.pdf':
                text = self._extract_from_pdf(file_path)
            elif file_type == '.txt':
                with open(file_path, 'r', encoding='utf-8') as f:
                    text = f.read()
            elif file_type in ['.docx', '.doc']:
                text = self._extract_from_docx(file_path)
            else:
                return {"success": False, "error": f"Unsupported file type: {file_type}"}
            
            # Analyze text for homeopathic keywords
            extracted = self._extract_homeopathic_info(text)
            
            return {
                "success": True,
                "filename": os.path.basename(file_path),
                "text": text[:5000],  # Limit for display
                "full_text": text,
                "word_count": len(text.split()),
                "extracted_sections": extracted,
                "summary": self._generate_summary(extracted)
            }
            
        except Exception as e:
            return {"success": False, "error": str(e)}
    
    def _extract_from_pdf(self, file_path: str) -> str:
        """Extract text from PDF"""
        text = ""
        with open(file_path, 'rb') as file:
            pdf_reader = PyPDF2.PdfReader(file)
            for page in pdf_reader.pages:
                text += page.extract_text()
        return text
    
    def _extract_from_docx(self, file_path: str) -> str:
        """Extract text from DOCX"""
        doc = docx.Document(file_path)
        text = ""
        for para in doc.paragraphs:
            text += para.text + "\n"
        return text
    
    def _extract_homeopathic_info(self, text: str) -> Dict:
        """Extract homeopathic information from text"""
        text_lower = text.lower()
        
        # Common homeopathic sections
        sections = {
            "symptoms": [],
            "modalities": [],
            "emotional_state": [],
            "physical_symptoms": [],
            "timing": [],
            "generalities": []
        }
        
        # Keywords to look for
        keyword_patterns = {
            "symptoms": ["symptom", "complaint", "pain", "ache", "discomfort"],
            "modalities": ["worse", "better", "aggravated", "ameliorated", "relieved"],
            "emotional_state": ["anxious", "fearful", "irritable", "sad", "depressed", "angry"],
            "timing": ["morning", "evening", "night", "afternoon", "periodic"],
            "generalities": ["thirst", "hunger", "cold", "hot", "sweat"]
        }
        
        # Extract sentences containing keywords
        sentences = text.split('.')
        
        for sentence in sentences:
            sentence_lower = sentence.lower()
            for category, keywords in keyword_patterns.items():
                if any(keyword in sentence_lower for keyword in keywords):
                    clean_sentence = sentence.strip()
                    if clean_sentence and len(clean_sentence) > 10:
                        sections[category].append(clean_sentence[:200])
        
        # Limit each section
        for category in sections:
            sections[category] = sections[category][:5]
        
        return sections
    
    def _generate_summary(self, extracted: Dict) -> str:
        """Generate summary from extracted information"""
        summary_parts = []
        
        if extracted["symptoms"]:
            summary_parts.append(f"Chief complaints: {len(extracted['symptoms'])} identified")
        
        if extracted["modalities"]:
            worse_count = sum(1 for s in extracted["modalities"] if "worse" in s.lower())
            better_count = sum(1 for s in extracted["modalities"] if "better" in s.lower())
            summary_parts.append(f"Modalities: {worse_count} aggravations, {better_count} ameliorations")
        
        if extracted["emotional_state"]:
            summary_parts.append(f"Emotional patterns: {len(extracted['emotional_state'])} noted")
        
        return "; ".join(summary_parts) if summary_parts else "No clear patterns identified"
    
    def extract_for_analysis(self, text: str) -> Dict:
        """Extract structured data for analysis"""
        extracted = self._extract_homeopathic_info(text)
        
        # Convert to analysis format
        analysis_data = {
            "chief_complaint": " ".join(extracted["symptoms"][:3]) if extracted["symptoms"] else "",
            "location": "",
            "sensation": "",
            "aggravations": "; ".join([s for s in extracted["modalities"] if "worse" in s.lower()][:3]),
            "ameliorations": "; ".join([s for s in extracted["modalities"] if "better" in s.lower()][:3]),
            "timing": "; ".join(extracted["timing"][:3]),
            "emotional_state": "; ".join(extracted["emotional_state"][:3]),
            "generalities": "; ".join(extracted["generalities"][:3]),
            "source": "document_upload"
        }
        
        return analysis_data

# Global instance
doc_processor = DocumentProcessor()