import asyncio
import json
import statistics
from pathlib import Path
from typing import Dict, List, Any
from datetime import datetime
from collections import Counter, defaultdict
import argparse

# import modules
import sys
sys.path.append(str(Path(__file__).parent))

from ground_truth import get_ground_truth


class AccuracyTester:
    """test and evaluate extraction accuracy"""

    def __init__(self, api_url="http://localhost:8000"):
        self.api_url = api_url
        self.results = []

    async def run_single_extraction(self, bureau_path, gst_path):
        """run single extraction via API"""
        import aiohttp

        try:
            # read files first to avoid closed file error
            with open(bureau_path, 'rb') as f:
                bureau_content = f.read()

            with open(gst_path, 'rb') as f:
                gst_content = f.read()

            async with aiohttp.ClientSession() as session:
                data = aiohttp.FormData()

                # add bureau PDF
                data.add_field('bureau_pdf',
                               bureau_content,
                               filename=Path(bureau_path).name,
                               content_type='application/pdf')

                # add GST PDF
                data.add_field('gst_pdf',
                               gst_content,
                               filename=Path(gst_path).name,
                               content_type='application/pdf')

                async with session.post(f"{self.api_url}/generate-rule", data=data) as response:
                    if response.status == 200:
                        return await response.json()
                    else:
                        error_text = await response.text()
                        return {"error": f"Status {response.status}: {error_text}"}

        except Exception as e:
            return {"error": str(e)}

    async def run_multiple_extractions(self, bureau_path, gst_path, num_runs=100):
        """run extraction multiple times"""
        print(f"\n{'='*80}")
        print(f"RUNNING {num_runs} EXTRACTIONS")
        print(f"{'='*80}\n")

        print(f"Bureau PDF: {bureau_path}")
        print(f"GST PDF: {gst_path}")
        print(f"Number of runs: {num_runs}\n")

        results = []

        for i in range(num_runs):
            print(f"Run {i+1}/{num_runs}...", end='\r')
            result = await self.run_single_extraction(bureau_path, gst_path)
            results.append({
                "run_number": i + 1,
                "timestamp": datetime.now().isoformat(),
                "result": result
            })

            # small delay to avoid overwhelming API
            await asyncio.sleep(0.1)

        print(f"\nCompleted {num_runs} extractions!\n")
        self.results = results
        return results

    def evaluate_consistency(self):
        """evaluate consistency of values across runs"""
        print(f"\n{'='*80}")
        print("EVALUATING CONSISTENCY")
        print(f"{'='*80}\n")

        # collect values for each parameter
        parameter_values = defaultdict(list)

        for run in self.results:
            if "error" in run["result"]:
                continue

            # bureau parameters
            if "bureau" in run["result"]:
                for param_id, param_data in run["result"]["bureau"].items():
                    # handle both formats
                    if isinstance(param_data, dict):
                        if "value" in param_data and param_data["value"] is not None:
                            parameter_values[param_id].append(param_data["value"])
                    else:
                        if param_data is not None:
                            parameter_values[param_id].append(param_data)

            # GST sales
            if "gst_sales" in run["result"] and run["result"]["gst_sales"]:
                gst_sales_str = json.dumps(run["result"]["gst_sales"], sort_keys=True)
                parameter_values["gst_sales"].append(gst_sales_str)

        # calculate consistency
        consistency_report = {}

        for param_id, values in parameter_values.items():
            total_runs = len(values)
            value_counts = Counter(values)
            most_common = value_counts.most_common(1)[0]
            most_common_value = most_common[0]
            most_common_count = most_common[1]

            consistency_rate = 0
            if total_runs > 0:
                consistency_rate = (most_common_count / total_runs) * 100

            consistency_report[param_id] = {
                "total_extractions": total_runs,
                "unique_values": len(value_counts),
                "most_common_value": most_common_value,
                "most_common_count": most_common_count,
                "consistency_rate": consistency_rate,
                "all_values": dict(value_counts)
            }

            # print info
            print(f"Parameter: {param_id}")
            print(f"  Total extractions: {total_runs}")
            print(f"  Unique values: {len(value_counts)}")
            print(f"  Most common: {most_common_value} ({most_common_count}/{total_runs} = {consistency_rate:.1f}%)")

            if len(value_counts) > 1:
                print(f"  ⚠️  WARNING: Inconsistent values!")
                print(f"  All values: {dict(value_counts)}")
            else:
                print(f"  ✅ 100% consistent")

            print()

        return consistency_report

    def evaluate_accuracy(self, bureau_filename, gst_filename):
        """evaluate accuracy against ground truth"""
        print(f"\n{'='*80}")
        print("EVALUATING ACCURACY")
        print(f"{'='*80}\n")

        # get most common values
        consistency_report = self.evaluate_consistency()

        accuracy_report = {}
        correct_params = []
        incorrect_params = []
        missing_params = []

        # collect ground truth params
        all_ground_truth_params = set()

        from ground_truth import GROUND_TRUTH_BUREAU, GROUND_TRUTH_GST

        if bureau_filename in GROUND_TRUTH_BUREAU:
            for key in GROUND_TRUTH_BUREAU[bureau_filename].keys():
                all_ground_truth_params.add(key)

        if gst_filename in GROUND_TRUTH_GST:
            for key in GROUND_TRUTH_GST[gst_filename].keys():
                all_ground_truth_params.add(key)

        # check each parameter
        for param_id in all_ground_truth_params:
            ground_truth = get_ground_truth(bureau_filename, param_id)
            if not ground_truth:
                ground_truth = get_ground_truth(gst_filename, param_id)

            if not ground_truth:
                continue

            expected_value = ground_truth["expected_value"]

            # get extracted value
            if param_id in consistency_report:
                extracted_value = consistency_report[param_id]["most_common_value"]

                # parse GST sales JSON
                if param_id == "gst_sales":
                    try:
                        extracted_value = json.loads(extracted_value)
                    except:
                        pass

                consistency_rate = consistency_report[param_id]["consistency_rate"]

                # compare values
                is_correct = False
                if expected_value is None:
                    is_correct = extracted_value is None or extracted_value == "not_found"
                elif isinstance(expected_value, list):
                    expected_json = json.dumps(expected_value, sort_keys=True)
                    extracted_json = json.dumps(extracted_value, sort_keys=True)
                    is_correct = expected_json == extracted_json
                else:
                    is_correct = extracted_value == expected_value

                accuracy_report[param_id] = {
                    "expected": expected_value,
                    "extracted": extracted_value,
                    "correct": is_correct,
                    "consistency_rate": consistency_rate
                }

                if is_correct:
                    correct_params.append(param_id)
                    print(f"✅ {param_id}")
                    print(f"   Expected: {expected_value}")
                    print(f"   Extracted: {extracted_value}")
                    print(f"   Consistency: {consistency_rate:.1f}%")
                else:
                    incorrect_params.append(param_id)
                    print(f"❌ {param_id}")
                    print(f"   Expected: {expected_value}")
                    print(f"   Extracted: {extracted_value}")
                    print(f"   Consistency: {consistency_rate:.1f}%")
            else:
                # parameter not extracted
                if expected_value is None:
                    # correct - not found and expected None
                    correct_params.append(param_id)
                    accuracy_report[param_id] = {
                        "expected": None,
                        "extracted": None,
                        "correct": True,
                        "consistency_rate": 100.0
                    }
                    print(f"✅ {param_id}")
                    print(f"   Expected: None")
                    print(f"   Extracted: None")
                    print(f"   Consistency: 100.0%")
                else:
                    # missing - expected but not found
                    missing_params.append(param_id)
                    accuracy_report[param_id] = {
                        "expected": expected_value,
                        "extracted": None,
                        "correct": False,
                        "consistency_rate": 0
                    }
                    print(f"⚠️  {param_id}")
                    print(f"   Expected: {expected_value}")
                    print(f"   Extracted: NOT FOUND")

            print()

        # calculate overall accuracy
        total_params = len(all_ground_truth_params)
        correct_count = len(correct_params)
        overall_accuracy = 0
        if total_params > 0:
            overall_accuracy = (correct_count / total_params) * 100

        print(f"\n{'='*80}")
        print("ACCURACY SUMMARY")
        print(f"{'='*80}\n")

        print(f"Total parameters: {total_params}")
        print(f"Correct: {correct_count} ({overall_accuracy:.1f}%)")

        incorrect_pct = 0
        if total_params > 0:
            incorrect_pct = (len(incorrect_params) / total_params) * 100
        print(f"Incorrect: {len(incorrect_params)} ({incorrect_pct:.1f}%)")

        missing_pct = 0
        if total_params > 0:
            missing_pct = (len(missing_params) / total_params) * 100
        print(f"Missing: {len(missing_params)} ({missing_pct:.1f}%)")

        print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

        return {
            "total_parameters": total_params,
            "correct": correct_count,
            "incorrect": len(incorrect_params),
            "missing": len(missing_params),
            "overall_accuracy": overall_accuracy,
            "per_parameter": accuracy_report,
            "correct_params": correct_params,
            "incorrect_params": incorrect_params,
            "missing_params": missing_params
        }

    def generate_report(self, output_path, bureau_filename, gst_filename):
        """generate comprehensive test report"""
        print(f"\n{'='*80}")
        print("GENERATING REPORT")
        print(f"{'='*80}\n")

        # evaluate metrics
        consistency_report = self.evaluate_consistency()
        accuracy_report = self.evaluate_accuracy(bureau_filename, gst_filename)

        # build report
        report = {
            "test_metadata": {
                "timestamp": datetime.now().isoformat(),
                "total_runs": len(self.results),
                "bureau_file": bureau_filename,
                "gst_file": gst_filename
            },
            "consistency_metrics": consistency_report,
            "accuracy_metrics": accuracy_report,
            "all_runs": self.results
        }

        # save report
        with open(output_path, 'w') as f:
            json.dump(report, f, indent=2)

        print(f"✅ Report saved: {output_path}\n")

        return report


async def main():
    """main testing function"""
    parser = argparse.ArgumentParser(description="Test extraction accuracy")
    parser.add_argument("--runs", type=int, default=100, help="Number of test runs")
    parser.add_argument("--bureau", required=True, help="Path to bureau PDF")
    parser.add_argument("--gst", required=True, help="Path to GST PDF")
    parser.add_argument("--output", default="test_report.json", help="Output report path")
    parser.add_argument("--api-url", default="http://localhost:8000", help="API URL")

    args = parser.parse_args()

    # validate files
    if not Path(args.bureau).exists():
        print(f"❌ Bureau PDF not found: {args.bureau}")
        return

    if not Path(args.gst).exists():
        print(f"❌ GST PDF not found: {args.gst}")
        return

    # create tester
    tester = AccuracyTester(api_url=args.api_url)

    # run extractions
    await tester.run_multiple_extractions(
        bureau_path=args.bureau,
        gst_path=args.gst,
        num_runs=args.runs
    )

    # generate report
    tester.generate_report(
        output_path=args.output,
        bureau_filename=Path(args.bureau).name,
        gst_filename=Path(args.gst).name
    )

    print("\n✅ Testing complete!")
    print(f"📊 Report: {args.output}")


if __name__ == "__main__":
    asyncio.run(main())