hfproxydemo / start_system.py
OpenCode Deployer
监控系统开发: 2026-02-01 15:40:53
14f6b4f
#!/usr/bin/env python3
import asyncio
import argparse
import logging
import os
import sys
from integration_orchestrator import RepairOrchestrator
from repair_loop_engine import RepairLoopEngine, LoopConfig
from auto_repair_executor import AutoRepairExecutor
from rollback_manager import RollbackManager
from data_models import SpaceInfo, SpaceStatus
from datetime import datetime
def setup_logging(level: str = "INFO"):
logging.basicConfig(
level=getattr(logging, level.upper()),
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.StreamHandler(sys.stdout),
logging.FileHandler('hf_repair_system.log')
]
)
async def start_monitoring(spaces: list, config: LoopConfig):
logger = logging.getLogger(__name__)
hf_client = None
repair_executor = AutoRepairExecutor(hf_client, repo_path=".")
rollback_manager = RollbackManager()
loop_engine = RepairLoopEngine(repair_executor, config)
orchestrator = RepairOrchestrator(hf_client)
orchestrator.set_components(repair_executor, loop_engine, rollback_manager)
for space_id in spaces:
space_info = SpaceInfo(
space_id=space_id,
name=space_id.split('/')[-1],
repository_url=f"https://huggingface.co/spaces/{space_id}",
current_status=SpaceStatus.RUNNING,
last_updated=datetime.now()
)
loop_engine.add_space(space_info)
logger.info(f"Added monitoring for Space: {space_id}")
await orchestrator.start_monitoring()
logger.info("Repair system monitoring started")
try:
while True:
await asyncio.sleep(60)
stats = orchestrator.get_orchestrator_stats()
logger.info(f"System stats: {stats}")
except KeyboardInterrupt:
logger.info("Received interrupt signal")
finally:
await orchestrator.stop_monitoring()
logger.info("Repair system monitoring stopped")
async def repair_single_space(space_id: str, dry_run: bool = False):
logger = logging.getLogger(__name__)
hf_client = None
orchestrator = RepairOrchestrator(hf_client)
logger.info(f"Starting repair for Space: {space_id}")
if dry_run:
logger.info("DRY RUN: Would analyze and plan repair, but not execute")
return
logger.info("Repair functionality requires full system setup")
def validate_spaces(spaces: list) -> bool:
for space_id in spaces:
if '/' not in space_id or len(space_id.split('/')) != 2:
print(f"Invalid Space ID format: {space_id}")
print("Expected format: username/space-name")
return False
return True
def main():
parser = argparse.ArgumentParser(
description="HuggingFace Spaces 自动修复系统",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="示例:\n # 监控模式\n python start_system.py --monitor user/space1 user/space2\n \n # 修复单个 Space\n python start_system.py --repair user/space1\n \n # 试运行\n python start_system.py --repair user/space1 --dry-run"
)
mode_group = parser.add_mutually_exclusive_group(required=True)
mode_group.add_argument('--monitor', nargs='+', metavar='SPACE', help='监控指定的 Spaces')
mode_group.add_argument('--repair', metavar='SPACE', help='修复指定的 Space')
parser.add_argument('--max-attempts', type=int, default=5, help='最大修复尝试次数')
parser.add_argument('--timeout', type=int, default=60, help='超时时间(分钟)')
parser.add_argument('--check-interval', type=int, default=60, help='检查间隔(秒)')
parser.add_argument('--max-concurrent', type=int, default=3, help='最大并发修复数')
parser.add_argument('--log-level', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR'], default='INFO', help='日志级别')
parser.add_argument('--dry-run', action='store_true', help='试运行模式')
parser.add_argument('--version', action='version', version='%(prog)s 1.0.0')
args = parser.parse_args()
setup_logging(args.log_level)
logger = logging.getLogger(__name__)
if not os.getenv('HF_TOKEN') and not args.dry_run:
logger.error("HF_TOKEN environment variable is required")
sys.exit(1)
try:
if args.monitor:
if not validate_spaces(args.monitor):
sys.exit(1)
config = LoopConfig(
max_iterations=args.max_attempts,
timeout_minutes=args.timeout,
check_interval_seconds=args.check_interval,
max_concurrent_repairs=args.max_concurrent
)
logger.info(f"Starting monitoring for {len(args.monitor)} spaces")
asyncio.run(start_monitoring(args.monitor, config))
elif args.repair:
if not validate_spaces([args.repair]):
sys.exit(1)
asyncio.run(repair_single_space(args.repair, args.dry_run))
except KeyboardInterrupt:
sys.exit(0)
except Exception as e:
logger.error(f"Fatal error: {e}")
sys.exit(1)
if __name__ == "__main__":
main()