AI-Coach / scripts /log_hook.py
Hoang Duc Hung
feat: stabilize push-up feedback and VLM handling
350d731
#!/usr/bin/env python3
"""Shared AI hook logger – works on Linux, Windows CMD, and PowerShell.
Features:
* Accepts **two positional arguments** (`prompt` and `response`) – ideal for manual testing.
* Falls back to **stdin** when data is piped (e.g., `echo '{"prompt":"..."}' | python3 scripts/log_hook.py`).
* If **no input** is provided, the script exits silently – prevents the "hang" you experienced.
* Writes a JSON line to `.ai-log/session.jsonl` with a minimal set of fields required by the repository.
"""
import json
import os
import sys
import subprocess
from datetime import datetime, timezone, timedelta
from pathlib import Path
# Vietnam timezone (used by existing logs)
VN_TZ = timezone(timedelta(hours=7))
DEFAULT_STUDENT_EMAIL = 'akirahoang617@gmail.com'
# Typical artifacts when UTF-8 bytes are decoded with a legacy codepage first.
MOJIBAKE_TOKENS = ('Ã', 'Â', 'Ä', 'Å', 'Æ', 'â€', 'á»')
def _mojibake_score(text: str) -> int:
return sum(text.count(token) for token in MOJIBAKE_TOKENS)
def _fix_mojibake_text(value: str) -> str:
"""Best-effort repair for common UTF-8 mojibake in Windows pipelines."""
if not isinstance(value, str) or not value:
return value
original_score = _mojibake_score(value)
if original_score == 0:
return value
best = value
best_score = original_score
for source_encoding in ('cp1252', 'latin-1'):
try:
candidate = value.encode(source_encoding).decode('utf-8')
except (UnicodeEncodeError, UnicodeDecodeError):
continue
candidate_score = _mojibake_score(candidate)
if candidate_score < best_score:
best = candidate
best_score = candidate_score
return best
def _read_stdin_text() -> str:
"""Read stdin as bytes first to avoid locale-dependent mis-decoding."""
raw_bytes = sys.stdin.buffer.read()
if not raw_bytes:
return ''
for encoding in ('utf-8-sig', 'utf-8'):
try:
return raw_bytes.decode(encoding)
except UnicodeDecodeError:
continue
# Last resort: decode with replacement to avoid crashing hooks.
return raw_bytes.decode('utf-8', errors='replace')
def _get_git_metadata() -> dict:
"""Collect git metadata using individual commands for cross-platform compatibility.
Avoids shell=True and '&&' chaining which fails on PowerShell 5.1 (Win 10 default).
Each git command is run as a direct subprocess call for maximum portability.
"""
metadata = {
'repo': Path.cwd().name,
'branch': 'unknown',
'commit': 'unknown',
'student': 'unknown'
}
git_commands = [
('repo', ['git', 'remote', 'get-url', 'origin']),
('branch', ['git', 'rev-parse', '--abbrev-ref', 'HEAD']),
('commit', ['git', 'rev-parse', '--short', 'HEAD']),
('student', ['git', 'config', 'user.email']),
]
for key, cmd in git_commands:
try:
result = subprocess.check_output(
cmd, text=True, stderr=subprocess.DEVNULL).strip()
if key == 'repo' and result:
metadata['repo'] = result.split('/')[-1].replace('.git', '')
elif result:
metadata[key] = result
except Exception:
pass
metadata['student'] = os.getenv('AI_LOG_STUDENT_EMAIL') or DEFAULT_STUDENT_EMAIL or metadata['student']
return metadata
def _write_entry(entry: dict) -> None:
log_dir = Path(os.getenv('AI_LOG_DIR', '.ai-log'))
log_dir.mkdir(exist_ok=True)
log_file = log_dir / 'session.jsonl'
try:
with open(log_file, 'a', encoding='utf-8') as f:
f.write(json.dumps(entry, ensure_ascii=False) + '\n')
except Exception as e:
print(f"Error writing to log file: {e}")
raise
def _read_existing_keys() -> set[tuple[str, str, str]]:
log_file = Path(os.getenv('AI_LOG_DIR', '.ai-log')) / 'session.jsonl'
keys: set[tuple[str, str, str]] = set()
if not log_file.exists():
return keys
try:
with open(log_file, 'r', encoding='utf-8') as f:
for line in f:
try:
item = json.loads(line)
except json.JSONDecodeError:
continue
keys.add((
str(item.get('tool', '')),
str(item.get('session_id', '')),
str(item.get('prompt', '')),
))
except Exception:
pass
return keys
def _extract_text_from_content(content) -> str:
if isinstance(content, str):
return content
if isinstance(content, list):
parts = []
for item in content:
if isinstance(item, dict) and isinstance(item.get('text'), str):
parts.append(item['text'])
return '\n'.join(parts)
return ''
def _latest_codex_session_file() -> Path | None:
root = Path(os.getenv('CODEX_HOME', Path.home() / '.codex')) / 'sessions'
if not root.exists():
return None
candidates = list(root.rglob('rollout-*.jsonl'))
if not candidates:
return None
return max(candidates, key=lambda path: path.stat().st_mtime)
def _extract_codex_prompt_from_session(path: Path) -> dict | None:
cwd = str(Path.cwd()).lower()
session_id = path.stem
current_cwd = ''
last_prompt = ''
model = ''
try:
with open(path, 'r', encoding='utf-8') as f:
for line in f:
try:
item = json.loads(line)
except json.JSONDecodeError:
continue
payload = item.get('payload') or {}
if item.get('type') == 'turn_context':
current_cwd = str(payload.get('cwd', '')).lower()
model = str(payload.get('model') or model)
continue
if current_cwd and current_cwd != cwd:
continue
if payload.get('type') == 'user_message':
message = str(payload.get('message', '')).strip()
if message:
last_prompt = message
continue
if payload.get('type') == 'message' and payload.get('role') == 'user':
message = _extract_text_from_content(payload.get('content')).strip()
if message:
last_prompt = message
except Exception:
return None
if not last_prompt:
return None
return {
'session_id': session_id,
'model': model,
'prompt': last_prompt,
}
def _log_latest_codex_session() -> bool:
session_file = _latest_codex_session_file()
if not session_file:
return False
extracted = _extract_codex_prompt_from_session(session_file)
if not extracted:
return False
prompt = _fix_mojibake_text(extracted['prompt'])[:1000]
key = ('codex', extracted['session_id'], prompt)
if key in _read_existing_keys():
return True
meta = _get_git_metadata()
entry = {
'ts': datetime.now(VN_TZ).isoformat(),
'tool': 'codex',
'event': 'codex_session_fallback',
'session_id': extracted['session_id'],
'model': extracted['model'],
'repo': meta['repo'],
'branch': meta['branch'],
'commit': meta['commit'],
'student': meta['student'],
'prompt': prompt,
'response_summary': '',
}
_write_entry(entry)
return True
def _make_entry(prompt: str, response: str, tool: str = 'manual') -> dict:
meta = _get_git_metadata()
return {
'ts': datetime.now(VN_TZ).isoformat(),
'tool': tool,
'event': '',
'session_id': '',
'model': '',
'repo': meta['repo'],
'branch': meta['branch'],
'commit': meta['commit'],
'student': meta['student'],
'prompt': _fix_mojibake_text(prompt)[:1000],
'response_summary': _fix_mojibake_text(response)[:500],
}
def main() -> None:
# 1️⃣ If two positional arguments are supplied, treat them as prompt/response.
# Skip this branch if the first arg looks like a flag (e.g. --tool).
if len(sys.argv) == 3 and not sys.argv[1].startswith('--'):
prompt, response = sys.argv[1], sys.argv[2]
entry = _make_entry(
prompt, response, os.getenv('AI_TOOL_NAME', 'manual'))
_write_entry(entry)
print(json.dumps({'status': 'logged'}))
return
# 2️⃣ Otherwise read from stdin (piped JSON). If nothing comes in, exit silently.
parser_tool = None
if '--tool' in sys.argv:
idx = sys.argv.index('--tool')
if idx + 1 < len(sys.argv):
parser_tool = sys.argv[idx + 1]
raw = _read_stdin_text().strip()
if not raw:
if parser_tool == 'codex':
_log_latest_codex_session()
sys.exit(0)
raw = _fix_mojibake_text(raw)
try:
data = json.loads(raw)
except json.JSONDecodeError:
sys.exit(0)
tool = parser_tool or os.getenv('AI_TOOL_NAME', 'manual')
meta = _get_git_metadata()
# Minimal normalisation – keep only fields we care about.
entry = {
'ts': datetime.now(VN_TZ).isoformat(),
'tool': tool,
'event': data.get('hook_event_name') or data.get('event') or '',
'session_id': data.get('session_id') or data.get('conversation_id') or data.get('generation_id') or '',
'model': data.get('model', ''),
'repo': meta['repo'],
'branch': meta['branch'],
'commit': meta['commit'],
'student': meta['student'],
'prompt': _fix_mojibake_text(str(data.get('prompt', '')))[:1000],
'response_summary': _fix_mojibake_text(str(data.get('response', data.get('response_summary', ''))))[:500],
}
_write_entry(entry)
print(json.dumps({'status': 'logged'}))
if __name__ == '__main__':
main()