Spaces:

AnhLee0
/

AI-Coach

Sleeping

Hoang Duc Hung

feat: stabilize push-up feedback and VLM handling

350d731 25 days ago

10.1 kB

	#!/usr/bin/env python3
	"""Shared AI hook logger – works on Linux, Windows CMD, and PowerShell.

	Features:
	* Accepts two positional arguments (`prompt` and `response`) – ideal for manual testing.
	* Falls back to stdin when data is piped (e.g., `echo '{"prompt":"..."}' \| python3 scripts/log_hook.py`).
	* If no input is provided, the script exits silently – prevents the "hang" you experienced.
	* Writes a JSON line to `.ai-log/session.jsonl` with a minimal set of fields required by the repository.
	"""
	import json
	import os
	import sys
	import subprocess
	from datetime import datetime, timezone, timedelta
	from pathlib import Path

	# Vietnam timezone (used by existing logs)
	VN_TZ = timezone(timedelta(hours=7))
	DEFAULT_STUDENT_EMAIL = 'akirahoang617@gmail.com'

	# Typical artifacts when UTF-8 bytes are decoded with a legacy codepage first.
	MOJIBAKE_TOKENS = ('Ã', 'Â', 'Ä', 'Å', 'Æ', 'â€', 'á»')


	def _mojibake_score(text: str) -> int:
	return sum(text.count(token) for token in MOJIBAKE_TOKENS)


	def _fix_mojibake_text(value: str) -> str:
	"""Best-effort repair for common UTF-8 mojibake in Windows pipelines."""
	if not isinstance(value, str) or not value:
	return value

	original_score = _mojibake_score(value)
	if original_score == 0:
	return value

	best = value
	best_score = original_score
	for source_encoding in ('cp1252', 'latin-1'):
	try:
	candidate = value.encode(source_encoding).decode('utf-8')
	except (UnicodeEncodeError, UnicodeDecodeError):
	continue

	candidate_score = _mojibake_score(candidate)
	if candidate_score < best_score:
	best = candidate
	best_score = candidate_score

	return best


	def _read_stdin_text() -> str:
	"""Read stdin as bytes first to avoid locale-dependent mis-decoding."""
	raw_bytes = sys.stdin.buffer.read()
	if not raw_bytes:
	return ''

	for encoding in ('utf-8-sig', 'utf-8'):
	try:
	return raw_bytes.decode(encoding)
	except UnicodeDecodeError:
	continue

	# Last resort: decode with replacement to avoid crashing hooks.
	return raw_bytes.decode('utf-8', errors='replace')


	def _get_git_metadata() -> dict:
	"""Collect git metadata using individual commands for cross-platform compatibility.

	Avoids shell=True and '&&' chaining which fails on PowerShell 5.1 (Win 10 default).
	Each git command is run as a direct subprocess call for maximum portability.
	"""
	metadata = {
	'repo': Path.cwd().name,
	'branch': 'unknown',
	'commit': 'unknown',
	'student': 'unknown'
	}

	git_commands = [
	('repo', ['git', 'remote', 'get-url', 'origin']),
	('branch', ['git', 'rev-parse', '--abbrev-ref', 'HEAD']),
	('commit', ['git', 'rev-parse', '--short', 'HEAD']),
	('student', ['git', 'config', 'user.email']),
	]

	for key, cmd in git_commands:
	try:
	result = subprocess.check_output(
	cmd, text=True, stderr=subprocess.DEVNULL).strip()
	if key == 'repo' and result:
	metadata['repo'] = result.split('/')[-1].replace('.git', '')
	elif result:
	metadata[key] = result
	except Exception:
	pass

	metadata['student'] = os.getenv('AI_LOG_STUDENT_EMAIL') or DEFAULT_STUDENT_EMAIL or metadata['student']
	return metadata


	def _write_entry(entry: dict) -> None:
	log_dir = Path(os.getenv('AI_LOG_DIR', '.ai-log'))
	log_dir.mkdir(exist_ok=True)
	log_file = log_dir / 'session.jsonl'

	try:
	with open(log_file, 'a', encoding='utf-8') as f:
	f.write(json.dumps(entry, ensure_ascii=False) + '\n')
	except Exception as e:
	print(f"Error writing to log file: {e}")
	raise


	def _read_existing_keys() -> set[tuple[str, str, str]]:
	log_file = Path(os.getenv('AI_LOG_DIR', '.ai-log')) / 'session.jsonl'
	keys: set[tuple[str, str, str]] = set()
	if not log_file.exists():
	return keys

	try:
	with open(log_file, 'r', encoding='utf-8') as f:
	for line in f:
	try:
	item = json.loads(line)
	except json.JSONDecodeError:
	continue
	keys.add((
	str(item.get('tool', '')),
	str(item.get('session_id', '')),
	str(item.get('prompt', '')),
	))
	except Exception:
	pass
	return keys


	def _extract_text_from_content(content) -> str:
	if isinstance(content, str):
	return content
	if isinstance(content, list):
	parts = []
	for item in content:
	if isinstance(item, dict) and isinstance(item.get('text'), str):
	parts.append(item['text'])
	return '\n'.join(parts)
	return ''


	def _latest_codex_session_file() -> Path \| None:
	root = Path(os.getenv('CODEX_HOME', Path.home() / '.codex')) / 'sessions'
	if not root.exists():
	return None

	candidates = list(root.rglob('rollout-*.jsonl'))
	if not candidates:
	return None
	return max(candidates, key=lambda path: path.stat().st_mtime)


	def _extract_codex_prompt_from_session(path: Path) -> dict \| None:
	cwd = str(Path.cwd()).lower()
	session_id = path.stem
	current_cwd = ''
	last_prompt = ''
	model = ''

	try:
	with open(path, 'r', encoding='utf-8') as f:
	for line in f:
	try:
	item = json.loads(line)
	except json.JSONDecodeError:
	continue

	payload = item.get('payload') or {}
	if item.get('type') == 'turn_context':
	current_cwd = str(payload.get('cwd', '')).lower()
	model = str(payload.get('model') or model)
	continue

	if current_cwd and current_cwd != cwd:
	continue

	if payload.get('type') == 'user_message':
	message = str(payload.get('message', '')).strip()
	if message:
	last_prompt = message
	continue

	if payload.get('type') == 'message' and payload.get('role') == 'user':
	message = _extract_text_from_content(payload.get('content')).strip()
	if message:
	last_prompt = message
	except Exception:
	return None

	if not last_prompt:
	return None
	return {
	'session_id': session_id,
	'model': model,
	'prompt': last_prompt,
	}


	def _log_latest_codex_session() -> bool:
	session_file = _latest_codex_session_file()
	if not session_file:
	return False

	extracted = _extract_codex_prompt_from_session(session_file)
	if not extracted:
	return False

	prompt = _fix_mojibake_text(extracted['prompt'])[:1000]
	key = ('codex', extracted['session_id'], prompt)
	if key in _read_existing_keys():
	return True

	meta = _get_git_metadata()
	entry = {
	'ts': datetime.now(VN_TZ).isoformat(),
	'tool': 'codex',
	'event': 'codex_session_fallback',
	'session_id': extracted['session_id'],
	'model': extracted['model'],
	'repo': meta['repo'],
	'branch': meta['branch'],
	'commit': meta['commit'],
	'student': meta['student'],
	'prompt': prompt,
	'response_summary': '',
	}
	_write_entry(entry)
	return True


	def _make_entry(prompt: str, response: str, tool: str = 'manual') -> dict:
	meta = _get_git_metadata()
	return {
	'ts': datetime.now(VN_TZ).isoformat(),
	'tool': tool,
	'event': '',
	'session_id': '',
	'model': '',
	'repo': meta['repo'],
	'branch': meta['branch'],
	'commit': meta['commit'],
	'student': meta['student'],
	'prompt': _fix_mojibake_text(prompt)[:1000],
	'response_summary': _fix_mojibake_text(response)[:500],
	}


	def main() -> None:
	# 1️⃣ If two positional arguments are supplied, treat them as prompt/response.
	# Skip this branch if the first arg looks like a flag (e.g. --tool).
	if len(sys.argv) == 3 and not sys.argv[1].startswith('--'):
	prompt, response = sys.argv[1], sys.argv[2]
	entry = _make_entry(
	prompt, response, os.getenv('AI_TOOL_NAME', 'manual'))
	_write_entry(entry)
	print(json.dumps({'status': 'logged'}))
	return

	# 2️⃣ Otherwise read from stdin (piped JSON). If nothing comes in, exit silently.
	parser_tool = None
	if '--tool' in sys.argv:
	idx = sys.argv.index('--tool')
	if idx + 1 < len(sys.argv):
	parser_tool = sys.argv[idx + 1]

	raw = _read_stdin_text().strip()
	if not raw:
	if parser_tool == 'codex':
	_log_latest_codex_session()
	sys.exit(0)

	raw = _fix_mojibake_text(raw)
	try:
	data = json.loads(raw)
	except json.JSONDecodeError:
	sys.exit(0)

	tool = parser_tool or os.getenv('AI_TOOL_NAME', 'manual')

	meta = _get_git_metadata()
	# Minimal normalisation – keep only fields we care about.
	entry = {
	'ts': datetime.now(VN_TZ).isoformat(),
	'tool': tool,
	'event': data.get('hook_event_name') or data.get('event') or '',
	'session_id': data.get('session_id') or data.get('conversation_id') or data.get('generation_id') or '',
	'model': data.get('model', ''),
	'repo': meta['repo'],
	'branch': meta['branch'],
	'commit': meta['commit'],
	'student': meta['student'],
	'prompt': _fix_mojibake_text(str(data.get('prompt', '')))[:1000],
	'response_summary': _fix_mojibake_text(str(data.get('response', data.get('response_summary', ''))))[:500],
	}
	_write_entry(entry)
	print(json.dumps({'status': 'logged'}))


	if __name__ == '__main__':
	main()