| |
| """Shared AI hook logger – works on Linux, Windows CMD, and PowerShell. |
| |
| Features: |
| * Accepts **two positional arguments** (`prompt` and `response`) – ideal for manual testing. |
| * Falls back to **stdin** when data is piped (e.g., `echo '{"prompt":"..."}' | python3 scripts/log_hook.py`). |
| * If **no input** is provided, the script exits silently – prevents the "hang" you experienced. |
| * Writes a JSON line to `.ai-log/session.jsonl` with a minimal set of fields required by the repository. |
| """ |
| import json |
| import os |
| import sys |
| import subprocess |
| from datetime import datetime, timezone, timedelta |
| from pathlib import Path |
|
|
| |
| VN_TZ = timezone(timedelta(hours=7)) |
| DEFAULT_STUDENT_EMAIL = 'akirahoang617@gmail.com' |
|
|
| |
| MOJIBAKE_TOKENS = ('Ã', 'Â', 'Ä', 'Å', 'Æ', 'â€', 'á»') |
|
|
|
|
| def _mojibake_score(text: str) -> int: |
| return sum(text.count(token) for token in MOJIBAKE_TOKENS) |
|
|
|
|
| def _fix_mojibake_text(value: str) -> str: |
| """Best-effort repair for common UTF-8 mojibake in Windows pipelines.""" |
| if not isinstance(value, str) or not value: |
| return value |
|
|
| original_score = _mojibake_score(value) |
| if original_score == 0: |
| return value |
|
|
| best = value |
| best_score = original_score |
| for source_encoding in ('cp1252', 'latin-1'): |
| try: |
| candidate = value.encode(source_encoding).decode('utf-8') |
| except (UnicodeEncodeError, UnicodeDecodeError): |
| continue |
|
|
| candidate_score = _mojibake_score(candidate) |
| if candidate_score < best_score: |
| best = candidate |
| best_score = candidate_score |
|
|
| return best |
|
|
|
|
| def _read_stdin_text() -> str: |
| """Read stdin as bytes first to avoid locale-dependent mis-decoding.""" |
| raw_bytes = sys.stdin.buffer.read() |
| if not raw_bytes: |
| return '' |
|
|
| for encoding in ('utf-8-sig', 'utf-8'): |
| try: |
| return raw_bytes.decode(encoding) |
| except UnicodeDecodeError: |
| continue |
|
|
| |
| return raw_bytes.decode('utf-8', errors='replace') |
|
|
|
|
| def _get_git_metadata() -> dict: |
| """Collect git metadata using individual commands for cross-platform compatibility. |
| |
| Avoids shell=True and '&&' chaining which fails on PowerShell 5.1 (Win 10 default). |
| Each git command is run as a direct subprocess call for maximum portability. |
| """ |
| metadata = { |
| 'repo': Path.cwd().name, |
| 'branch': 'unknown', |
| 'commit': 'unknown', |
| 'student': 'unknown' |
| } |
|
|
| git_commands = [ |
| ('repo', ['git', 'remote', 'get-url', 'origin']), |
| ('branch', ['git', 'rev-parse', '--abbrev-ref', 'HEAD']), |
| ('commit', ['git', 'rev-parse', '--short', 'HEAD']), |
| ('student', ['git', 'config', 'user.email']), |
| ] |
|
|
| for key, cmd in git_commands: |
| try: |
| result = subprocess.check_output( |
| cmd, text=True, stderr=subprocess.DEVNULL).strip() |
| if key == 'repo' and result: |
| metadata['repo'] = result.split('/')[-1].replace('.git', '') |
| elif result: |
| metadata[key] = result |
| except Exception: |
| pass |
|
|
| metadata['student'] = os.getenv('AI_LOG_STUDENT_EMAIL') or DEFAULT_STUDENT_EMAIL or metadata['student'] |
| return metadata |
|
|
|
|
| def _write_entry(entry: dict) -> None: |
| log_dir = Path(os.getenv('AI_LOG_DIR', '.ai-log')) |
| log_dir.mkdir(exist_ok=True) |
| log_file = log_dir / 'session.jsonl' |
|
|
| try: |
| with open(log_file, 'a', encoding='utf-8') as f: |
| f.write(json.dumps(entry, ensure_ascii=False) + '\n') |
| except Exception as e: |
| print(f"Error writing to log file: {e}") |
| raise |
|
|
|
|
| def _read_existing_keys() -> set[tuple[str, str, str]]: |
| log_file = Path(os.getenv('AI_LOG_DIR', '.ai-log')) / 'session.jsonl' |
| keys: set[tuple[str, str, str]] = set() |
| if not log_file.exists(): |
| return keys |
|
|
| try: |
| with open(log_file, 'r', encoding='utf-8') as f: |
| for line in f: |
| try: |
| item = json.loads(line) |
| except json.JSONDecodeError: |
| continue |
| keys.add(( |
| str(item.get('tool', '')), |
| str(item.get('session_id', '')), |
| str(item.get('prompt', '')), |
| )) |
| except Exception: |
| pass |
| return keys |
|
|
|
|
| def _extract_text_from_content(content) -> str: |
| if isinstance(content, str): |
| return content |
| if isinstance(content, list): |
| parts = [] |
| for item in content: |
| if isinstance(item, dict) and isinstance(item.get('text'), str): |
| parts.append(item['text']) |
| return '\n'.join(parts) |
| return '' |
|
|
|
|
| def _latest_codex_session_file() -> Path | None: |
| root = Path(os.getenv('CODEX_HOME', Path.home() / '.codex')) / 'sessions' |
| if not root.exists(): |
| return None |
|
|
| candidates = list(root.rglob('rollout-*.jsonl')) |
| if not candidates: |
| return None |
| return max(candidates, key=lambda path: path.stat().st_mtime) |
|
|
|
|
| def _extract_codex_prompt_from_session(path: Path) -> dict | None: |
| cwd = str(Path.cwd()).lower() |
| session_id = path.stem |
| current_cwd = '' |
| last_prompt = '' |
| model = '' |
|
|
| try: |
| with open(path, 'r', encoding='utf-8') as f: |
| for line in f: |
| try: |
| item = json.loads(line) |
| except json.JSONDecodeError: |
| continue |
|
|
| payload = item.get('payload') or {} |
| if item.get('type') == 'turn_context': |
| current_cwd = str(payload.get('cwd', '')).lower() |
| model = str(payload.get('model') or model) |
| continue |
|
|
| if current_cwd and current_cwd != cwd: |
| continue |
|
|
| if payload.get('type') == 'user_message': |
| message = str(payload.get('message', '')).strip() |
| if message: |
| last_prompt = message |
| continue |
|
|
| if payload.get('type') == 'message' and payload.get('role') == 'user': |
| message = _extract_text_from_content(payload.get('content')).strip() |
| if message: |
| last_prompt = message |
| except Exception: |
| return None |
|
|
| if not last_prompt: |
| return None |
| return { |
| 'session_id': session_id, |
| 'model': model, |
| 'prompt': last_prompt, |
| } |
|
|
|
|
| def _log_latest_codex_session() -> bool: |
| session_file = _latest_codex_session_file() |
| if not session_file: |
| return False |
|
|
| extracted = _extract_codex_prompt_from_session(session_file) |
| if not extracted: |
| return False |
|
|
| prompt = _fix_mojibake_text(extracted['prompt'])[:1000] |
| key = ('codex', extracted['session_id'], prompt) |
| if key in _read_existing_keys(): |
| return True |
|
|
| meta = _get_git_metadata() |
| entry = { |
| 'ts': datetime.now(VN_TZ).isoformat(), |
| 'tool': 'codex', |
| 'event': 'codex_session_fallback', |
| 'session_id': extracted['session_id'], |
| 'model': extracted['model'], |
| 'repo': meta['repo'], |
| 'branch': meta['branch'], |
| 'commit': meta['commit'], |
| 'student': meta['student'], |
| 'prompt': prompt, |
| 'response_summary': '', |
| } |
| _write_entry(entry) |
| return True |
|
|
|
|
| def _make_entry(prompt: str, response: str, tool: str = 'manual') -> dict: |
| meta = _get_git_metadata() |
| return { |
| 'ts': datetime.now(VN_TZ).isoformat(), |
| 'tool': tool, |
| 'event': '', |
| 'session_id': '', |
| 'model': '', |
| 'repo': meta['repo'], |
| 'branch': meta['branch'], |
| 'commit': meta['commit'], |
| 'student': meta['student'], |
| 'prompt': _fix_mojibake_text(prompt)[:1000], |
| 'response_summary': _fix_mojibake_text(response)[:500], |
| } |
|
|
|
|
| def main() -> None: |
| |
| |
| if len(sys.argv) == 3 and not sys.argv[1].startswith('--'): |
| prompt, response = sys.argv[1], sys.argv[2] |
| entry = _make_entry( |
| prompt, response, os.getenv('AI_TOOL_NAME', 'manual')) |
| _write_entry(entry) |
| print(json.dumps({'status': 'logged'})) |
| return |
|
|
| |
| parser_tool = None |
| if '--tool' in sys.argv: |
| idx = sys.argv.index('--tool') |
| if idx + 1 < len(sys.argv): |
| parser_tool = sys.argv[idx + 1] |
|
|
| raw = _read_stdin_text().strip() |
| if not raw: |
| if parser_tool == 'codex': |
| _log_latest_codex_session() |
| sys.exit(0) |
|
|
| raw = _fix_mojibake_text(raw) |
| try: |
| data = json.loads(raw) |
| except json.JSONDecodeError: |
| sys.exit(0) |
|
|
| tool = parser_tool or os.getenv('AI_TOOL_NAME', 'manual') |
|
|
| meta = _get_git_metadata() |
| |
| entry = { |
| 'ts': datetime.now(VN_TZ).isoformat(), |
| 'tool': tool, |
| 'event': data.get('hook_event_name') or data.get('event') or '', |
| 'session_id': data.get('session_id') or data.get('conversation_id') or data.get('generation_id') or '', |
| 'model': data.get('model', ''), |
| 'repo': meta['repo'], |
| 'branch': meta['branch'], |
| 'commit': meta['commit'], |
| 'student': meta['student'], |
| 'prompt': _fix_mojibake_text(str(data.get('prompt', '')))[:1000], |
| 'response_summary': _fix_mojibake_text(str(data.get('response', data.get('response_summary', ''))))[:500], |
| } |
| _write_entry(entry) |
| print(json.dumps({'status': 'logged'})) |
|
|
|
|
| if __name__ == '__main__': |
| main() |
|
|