tekitoutarou
/

First-RoKAN-Model

Model card Files Files and versions

First-RoKAN-Model / agent_monitor.py

tekitoutarou's picture

Upload 12 files

f73ae00 verified about 1 month ago

history blame contribute delete

4.42 kB

	import os
	import sys
	import time
	import subprocess
	import json
	import urllib.request

	# ==========================================================
	# Terminal Agent (Gemini API) for BS-RoKAN 監視
	# VRAM消費: 0GB / CPU負荷: 極小
	# ==========================================================

	# APIキーをファイルから読み込む
	KEY_FILE = "APIKey From Google AI Studio.txt"
	if os.path.exists(KEY_FILE):
	with open(KEY_FILE, "r") as f:
	API_KEY = f.read().strip()
	else:
	API_KEY = os.environ.get("GEMINI_API_KEY", "")

	MODEL_NAME = "gemini-3.1-flash-lite"

	def analyze_logs_with_llm(log_buffer):
	if not API_KEY:
	print("[Agent] API_KEYがないため判定をスキップ(OK)")
	return "OK"

	system_instruction = "あなたは音声分離モデルBS-RoKANの学習監視エージェントです。以下の学習ログを見て、学習が順調か評価してください。"
	prompt = f"{system_instruction} 出力は OK, LOWER_LR, RESTART のいずれか1語のみにしてください。 \n\nログ:\n" + "\n".join(log_buffer)

	url = f"https://generativelanguage.googleapis.com/v1beta/models/{MODEL_NAME}:generateContent?key={API_KEY}"

	# Gemini API (REST) format
	payload = {
	"contents": [{
	"parts": [{"text": prompt}]
	}],
	"generationConfig": {
	"temperature": 0.1,
	"maxOutputTokens": 10
	}
	}

	try:
	req = urllib.request.Request(url, data=json.dumps(payload).encode(), headers={"Content-Type": "application/json"})
	with urllib.request.urlopen(req, timeout=15) as r:
	response = json.loads(r.read())
	# Extract text from Gemini response structure
	decision = response["candidates"][0]["content"]["parts"][0]["text"].strip().upper()

	if "LOWER_LR" in decision: return "LOWER_LR"
	if "RESTART" in decision: return "RESTART"
	return "OK"
	except Exception as e:
	print(f"[Agent] Gemini APIエラー: {e}")
	return "OK"

	def main():
	print(f"[*] Gemini Terminal Agent 起動成功 (Model: {MODEL_NAME})")
	print(f"[*] 学習プロセスを起動中...")

	# RX 9070 XT想定: WSL2上でバッチサイズ2で開始
	cmd = ["python", "-u", "train_rokan.py", "--batch_size", "2"]

	while True:
	print(f"\n[Agent] 訓練開始: {' '.join(cmd)}")
	process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1)
	log_buffer = []

	try:
	for line in process.stdout:
	line = line.strip()
	if not line: continue
	print(line)

	if "Loss" in line or "Saved:" in line:
	log_buffer.append(line)

	# セーブ(Epoch終了)ごとにGeminiで診断を行う
	if "Saved:" in line and len(log_buffer) > 5:
	decision = analyze_logs_with_llm(log_buffer[-30:])
	if decision == "LOWER_LR":
	print(f"[Agent] Geminiの判定: {decision} (学習率を下げて再開します)")
	process.terminate()
	if "--gate_lr" not in cmd:
	cmd.extend(["--gate_lr", "5e-4"]) # 1e-3 -> 5e-4
	break
	elif decision == "RESTART":
	print(f"[Agent] Geminiの判定: {decision} (異常検知につき再起動します)")
	process.terminate()
	time.sleep(5)
	break
	else:
	print(f"[Agent] Geminiの判定: {decision} (順調です)")
	log_buffer = [] # バッファをクリア

	except KeyboardInterrupt:
	print("\n[Agent] ユーザーによる中断。プロセスを終了します。")
	process.terminate()
	sys.exit(0)

	process.wait()
	if process.returncode != 0 and process.returncode is not None:
	print(f"[Agent] 訓練プロセスが終了しました (Code: {process.returncode})。10秒後に再起動を試みます。")
	time.sleep(10)

	if __name__ == "__main__":
	main()