"""
Simple demo script for document text extraction.
Demonstrates the complete workflow from training to inference.
"""

import os
import sys
from pathlib import Path
import jso                    print(f"  {entity['entity']}: '{entity['text']}' ({confidence}%)")
        else:
            print(f"Error: {result['error']}")
    
    except Exception as e:
        print(f"Failed to process text: {e}") Add src to path for imports
sys.path.append(str(Path(__file__).parent))

from src.data_preparation import DocumentProcessor, NERDatasetCreator
from src.training_pipeline import TrainingPipeline, create_custom_config
from src.inference import DocumentInference


def run_quick_demo():
    """Run a quick demonstration of the text extraction system."""
    print("DOCUMENT TEXT EXTRACTION - QUICK DEMO")
    print("=" * 60)
    
    # Sample documents for demonstration
    demo_texts = [
        {
            "name": "Invoice Example 1",
            "text": "Invoice sent to Robert White on 15/09/2025 Invoice No: INV-1024 Amount: $1,250.00 Phone: (555) 123-4567"
        },
        {
            "name": "Invoice Example 2",
            "text": "Bill for Dr. Sarah Johnson dated March 10, 2025. Invoice Number: BL-2045. Total: $2,300.50 Email: sarah.johnson@email.com"
        },
        {
            "name": "Receipt Example",
            "text": "Receipt for Michael Brown 456 Oak Street Boston MA 02101 Invoice: REC-3089 Date: 2025-04-22 Amount: $890.75"
        }
    ]
    
    print("\nSample Documents:")
    for i, doc in enumerate(demo_texts, 1):
        print(f"{i}. {doc['name']}: {doc['text'][:60]}...")
    
    # Check if model exists
    model_path = "models/document_ner_model"
    if not Path(model_path).exists():
        print(f"\nModel not found at {model_path}")
        print("Training a new model first...")
        
        # Train model
        config = create_custom_config()
        config.num_epochs = 2  # Quick training for demo
        config.batch_size = 8
        
        pipeline = TrainingPipeline(config)
        model_path = pipeline.run_complete_pipeline()
        
        print(f"Model trained and saved to {model_path}")
    
    # Load inference pipeline
    print(f"\nLoading inference pipeline from {model_path}")
    try:
        inference = DocumentInference(model_path)
        print("Inference pipeline loaded successfully")
    except Exception as e:
        print(f"Failed to load inference pipeline: {e}")
        return
    
    # Process demo texts
    print(f"\nProcessing {len(demo_texts)} demo documents...")
    results = []
    
    for i, doc in enumerate(demo_texts, 1):
        print(f"\nProcessing Document {i}: {doc['name']}")
        print("-" * 50)
        print(f"Text: {doc['text']}")
        
        # Extract information
        result = inference.process_text_directly(doc['text'])
        results.append({
            'document_name': doc['name'],
            'original_text': doc['text'],
            'result': result
        })
        
        # Display results
        if 'error' not in result:
            structured_data = result.get('structured_data', {})
            entities = result.get('entities', [])
            
            print(f"\nExtraction Results:")
            if structured_data:
                print("Structured Data:")
                for key, value in structured_data.items():
                    print(f"   {key}: {value}")
            else:
                print("   No structured data extracted")
            
            if entities:
                print(f"Found {len(entities)} entities:")
                for entity in entities:
                    confidence = int(entity['confidence'] * 100)
                    print(f"   {entity['entity']}: '{entity['text']}' ({confidence}%)")
        else:
            print(f"Error: {result['error']}")
    
    # Save results
    output_path = "results/demo_results.json"
    Path(output_path).parent.mkdir(parents=True, exist_ok=True)
    with open(output_path, 'w', encoding='utf-8') as f:
        json.dump(results, f, indent=2, ensure_ascii=False)
    
    print(f"\nDemo results saved to: {output_path}")
    
    # Summary
    successful_extractions = sum(1 for r in results if 'error' not in r['result'])
    total_entities = sum(len(r['result'].get('entities', [])) for r in results if 'error' not in r['result'])
    total_structured_fields = sum(len(r['result'].get('structured_data', {})) for r in results if 'error' not in r['result'])
    
    print(f"\nDemo Summary:")
    print(f"   Successfully processed: {successful_extractions}/{len(demo_texts)} documents")
    print(f"   Total entities found: {total_entities}")
    print(f"   Total structured fields: {total_structured_fields}")
    
    print(f"\nDemo completed successfully!")
    print(f"You can now:")
    print(f"   - Run the web API: python api/app.py")
    print(f"   - Process your own documents using inference.py")
    print(f"   - Retrain with your data using training_pipeline.py")


def train_model_only():
    """Train the model without running inference demo."""
    print("TRAINING MODEL ONLY")
    print("=" * 40)
    
    config = create_custom_config()
    pipeline = TrainingPipeline(config)
    
    model_path = pipeline.run_complete_pipeline()
    
    print(f"Model training completed!")
    print(f"Model saved to: {model_path}")


def test_specific_text():
    """Test extraction on user-provided text."""
    print("CUSTOM TEXT EXTRACTION")
    print("=" * 40)
    
    # Check if model exists
    model_path = "models/document_ner_model"
    if not Path(model_path).exists():
        print("No trained model found. Please run training first.")
        return
    
    # Get text from user
    print("Enter text to extract information from:")
    print("(Example: Invoice sent to John Doe on 01/15/2025 Invoice No: INV-1001 Amount: $1,500.00)")
    text = input("Text: ").strip()
    
    if not text:
        print("No text provided.")
        return
    
    # Load inference and process
    try:
        inference = DocumentInference(model_path)
        result = inference.process_text_directly(text)
        
        print(f"\nExtraction Results:")
        if 'error' not in result:
            structured_data = result.get('structured_data', {})
            if structured_data:
                print("Structured Information:")
                for key, value in structured_data.items():
                    print(f"  {key}: {value}")
            else:
                print("No structured information found.")
            
            entities = result.get('entities', [])
            if entities:
                print(f"\nEntities Found ({len(entities)}):")
                for entity in entities:
                    confidence = int(entity['confidence'] * 100)
                    print(f"  {entity['entity']}: '{entity['text']}' ({confidence}%)")
        else:
            print(f"Error: {result['error']}")
    
    except Exception as e:
        print(f"Failed to process text: {e}")


def main():
    """Main demo function with options."""
    print("DOCUMENT TEXT EXTRACTION SYSTEM")
    print("=" * 50)
    print("Choose an option:")
    print("1. Run complete demo (train + inference)")
    print("2. Train model only")
    print("3. Test specific text (requires trained model)")
    print("4. Exit")
    
    while True:
        choice = input("\nEnter your choice (1-4): ").strip()
        
        if choice == '1':
            run_quick_demo()
            break
        elif choice == '2':
            train_model_only()
            break
        elif choice == '3':
            test_specific_text()
            break
        elif choice == '4':
            print("👋 Goodbye!")
            break
        else:
            print("Invalid choice. Please enter 1, 2, 3, or 4.")


if __name__ == "__main__":
    main()