#!/usr/bin/env python3
"""Shared AI hook logger – works on Linux, Windows CMD, and PowerShell.

Features:
* Accepts **two positional arguments** (`prompt` and `response`) – ideal for manual testing.
* Falls back to **stdin** when data is piped (e.g., `echo '{"prompt":"..."}' | python3 scripts/log_hook.py`).
* If **no input** is provided, the script exits silently – prevents the "hang" you experienced.
* Writes a JSON line to `.ai-log/session.jsonl` with a minimal set of fields required by the repository.
"""
import json
import os
import sys
import subprocess
from datetime import datetime, timezone, timedelta
from pathlib import Path

# Vietnam timezone (used by existing logs)
VN_TZ = timezone(timedelta(hours=7))
DEFAULT_STUDENT_EMAIL = 'akirahoang617@gmail.com'

# Typical artifacts when UTF-8 bytes are decoded with a legacy codepage first.
MOJIBAKE_TOKENS = ('Ã', 'Â', 'Ä', 'Å', 'Æ', 'â€', 'á»')


def _mojibake_score(text: str) -> int:
    return sum(text.count(token) for token in MOJIBAKE_TOKENS)


def _fix_mojibake_text(value: str) -> str:
    """Best-effort repair for common UTF-8 mojibake in Windows pipelines."""
    if not isinstance(value, str) or not value:
        return value

    original_score = _mojibake_score(value)
    if original_score == 0:
        return value

    best = value
    best_score = original_score
    for source_encoding in ('cp1252', 'latin-1'):
        try:
            candidate = value.encode(source_encoding).decode('utf-8')
        except (UnicodeEncodeError, UnicodeDecodeError):
            continue

        candidate_score = _mojibake_score(candidate)
        if candidate_score < best_score:
            best = candidate
            best_score = candidate_score

    return best


def _read_stdin_text() -> str:
    """Read stdin as bytes first to avoid locale-dependent mis-decoding."""
    raw_bytes = sys.stdin.buffer.read()
    if not raw_bytes:
        return ''

    for encoding in ('utf-8-sig', 'utf-8'):
        try:
            return raw_bytes.decode(encoding)
        except UnicodeDecodeError:
            continue

    # Last resort: decode with replacement to avoid crashing hooks.
    return raw_bytes.decode('utf-8', errors='replace')


def _get_git_metadata() -> dict:
    """Collect git metadata using individual commands for cross-platform compatibility.

    Avoids shell=True and '&&' chaining which fails on PowerShell 5.1 (Win 10 default).
    Each git command is run as a direct subprocess call for maximum portability.
    """
    metadata = {
        'repo': Path.cwd().name,
        'branch': 'unknown',
        'commit': 'unknown',
        'student': 'unknown'
    }

    git_commands = [
        ('repo',    ['git', 'remote', 'get-url', 'origin']),
        ('branch',  ['git', 'rev-parse', '--abbrev-ref', 'HEAD']),
        ('commit',  ['git', 'rev-parse', '--short', 'HEAD']),
        ('student', ['git', 'config', 'user.email']),
    ]

    for key, cmd in git_commands:
        try:
            result = subprocess.check_output(
                cmd, text=True, stderr=subprocess.DEVNULL).strip()
            if key == 'repo' and result:
                metadata['repo'] = result.split('/')[-1].replace('.git', '')
            elif result:
                metadata[key] = result
        except Exception:
            pass

    metadata['student'] = os.getenv('AI_LOG_STUDENT_EMAIL') or DEFAULT_STUDENT_EMAIL or metadata['student']
    return metadata


def _write_entry(entry: dict) -> None:
    log_dir = Path(os.getenv('AI_LOG_DIR', '.ai-log'))
    log_dir.mkdir(exist_ok=True)
    log_file = log_dir / 'session.jsonl'

    try:
        with open(log_file, 'a', encoding='utf-8') as f:
            f.write(json.dumps(entry, ensure_ascii=False) + '\n')
    except Exception as e:
        print(f"Error writing to log file: {e}")
        raise


def _read_existing_keys() -> set[tuple[str, str, str]]:
    log_file = Path(os.getenv('AI_LOG_DIR', '.ai-log')) / 'session.jsonl'
    keys: set[tuple[str, str, str]] = set()
    if not log_file.exists():
        return keys

    try:
        with open(log_file, 'r', encoding='utf-8') as f:
            for line in f:
                try:
                    item = json.loads(line)
                except json.JSONDecodeError:
                    continue
                keys.add((
                    str(item.get('tool', '')),
                    str(item.get('session_id', '')),
                    str(item.get('prompt', '')),
                ))
    except Exception:
        pass
    return keys


def _extract_text_from_content(content) -> str:
    if isinstance(content, str):
        return content
    if isinstance(content, list):
        parts = []
        for item in content:
            if isinstance(item, dict) and isinstance(item.get('text'), str):
                parts.append(item['text'])
        return '\n'.join(parts)
    return ''


def _latest_codex_session_file() -> Path | None:
    root = Path(os.getenv('CODEX_HOME', Path.home() / '.codex')) / 'sessions'
    if not root.exists():
        return None

    candidates = list(root.rglob('rollout-*.jsonl'))
    if not candidates:
        return None
    return max(candidates, key=lambda path: path.stat().st_mtime)


def _extract_codex_prompt_from_session(path: Path) -> dict | None:
    cwd = str(Path.cwd()).lower()
    session_id = path.stem
    current_cwd = ''
    last_prompt = ''
    model = ''

    try:
        with open(path, 'r', encoding='utf-8') as f:
            for line in f:
                try:
                    item = json.loads(line)
                except json.JSONDecodeError:
                    continue

                payload = item.get('payload') or {}
                if item.get('type') == 'turn_context':
                    current_cwd = str(payload.get('cwd', '')).lower()
                    model = str(payload.get('model') or model)
                    continue

                if current_cwd and current_cwd != cwd:
                    continue

                if payload.get('type') == 'user_message':
                    message = str(payload.get('message', '')).strip()
                    if message:
                        last_prompt = message
                    continue

                if payload.get('type') == 'message' and payload.get('role') == 'user':
                    message = _extract_text_from_content(payload.get('content')).strip()
                    if message:
                        last_prompt = message
    except Exception:
        return None

    if not last_prompt:
        return None
    return {
        'session_id': session_id,
        'model': model,
        'prompt': last_prompt,
    }


def _log_latest_codex_session() -> bool:
    session_file = _latest_codex_session_file()
    if not session_file:
        return False

    extracted = _extract_codex_prompt_from_session(session_file)
    if not extracted:
        return False

    prompt = _fix_mojibake_text(extracted['prompt'])[:1000]
    key = ('codex', extracted['session_id'], prompt)
    if key in _read_existing_keys():
        return True

    meta = _get_git_metadata()
    entry = {
        'ts': datetime.now(VN_TZ).isoformat(),
        'tool': 'codex',
        'event': 'codex_session_fallback',
        'session_id': extracted['session_id'],
        'model': extracted['model'],
        'repo': meta['repo'],
        'branch': meta['branch'],
        'commit': meta['commit'],
        'student': meta['student'],
        'prompt': prompt,
        'response_summary': '',
    }
    _write_entry(entry)
    return True


def _make_entry(prompt: str, response: str, tool: str = 'manual') -> dict:
    meta = _get_git_metadata()
    return {
        'ts': datetime.now(VN_TZ).isoformat(),
        'tool': tool,
        'event': '',
        'session_id': '',
        'model': '',
        'repo': meta['repo'],
        'branch': meta['branch'],
        'commit': meta['commit'],
        'student': meta['student'],
        'prompt': _fix_mojibake_text(prompt)[:1000],
        'response_summary': _fix_mojibake_text(response)[:500],
    }


def main() -> None:
    # 1️⃣ If two positional arguments are supplied, treat them as prompt/response.
    #    Skip this branch if the first arg looks like a flag (e.g. --tool).
    if len(sys.argv) == 3 and not sys.argv[1].startswith('--'):
        prompt, response = sys.argv[1], sys.argv[2]
        entry = _make_entry(
            prompt, response, os.getenv('AI_TOOL_NAME', 'manual'))
        _write_entry(entry)
        print(json.dumps({'status': 'logged'}))
        return

    # 2️⃣ Otherwise read from stdin (piped JSON). If nothing comes in, exit silently.
    parser_tool = None
    if '--tool' in sys.argv:
        idx = sys.argv.index('--tool')
        if idx + 1 < len(sys.argv):
            parser_tool = sys.argv[idx + 1]

    raw = _read_stdin_text().strip()
    if not raw:
        if parser_tool == 'codex':
            _log_latest_codex_session()
        sys.exit(0)

    raw = _fix_mojibake_text(raw)
    try:
        data = json.loads(raw)
    except json.JSONDecodeError:
        sys.exit(0)

    tool = parser_tool or os.getenv('AI_TOOL_NAME', 'manual')

    meta = _get_git_metadata()
    # Minimal normalisation – keep only fields we care about.
    entry = {
        'ts': datetime.now(VN_TZ).isoformat(),
        'tool': tool,
        'event': data.get('hook_event_name') or data.get('event') or '',
        'session_id': data.get('session_id') or data.get('conversation_id') or data.get('generation_id') or '',
        'model': data.get('model', ''),
        'repo': meta['repo'],
        'branch': meta['branch'],
        'commit': meta['commit'],
        'student': meta['student'],
        'prompt': _fix_mojibake_text(str(data.get('prompt', '')))[:1000],
        'response_summary': _fix_mojibake_text(str(data.get('response', data.get('response_summary', ''))))[:500],
    }
    _write_entry(entry)
    print(json.dumps({'status': 'logged'}))


if __name__ == '__main__':
    main()