ipad-vad-training / trigger_gpu_training.py
Claude Code
Add auto-start training on Space rebuild
d14d520
#!/usr/bin/env python3
"""
Trigger GPU training through Gradio interface
Uses HTTP POST to call the Gradio API endpoint
"""
import requests
import json
import time
from datetime import datetime
print("="*70)
print("πŸš€ IPAD VAD GPU Training Trigger via Gradio API")
print("="*70)
print(f"Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print()
# Gradio API endpoint (local)
GRADIO_URL = "http://localhost:7860"
# Check if Gradio is running
print("[Step 1] Checking Gradio interface...")
try:
response = requests.get(GRADIO_URL, timeout=5)
if response.status_code == 200:
print(f"βœ… Gradio interface is running at {GRADIO_URL}")
else:
print(f"⚠️ Gradio returned status {response.status_code}")
except Exception as e:
print(f"❌ Cannot connect to Gradio: {e}")
print(" Make sure app.py is running")
exit(1)
print()
# Get API info
print("[Step 2] Getting API endpoints...")
try:
api_response = requests.get(f"{GRADIO_URL}/info", timeout=10)
if api_response.status_code == 200:
api_info = api_response.json()
print(f"βœ… API info retrieved")
print(f" Named endpoints: {len(api_info.get('named_endpoints', {}))}")
else:
print(f"⚠️ Could not get API info: {api_response.status_code}")
except Exception as e:
print(f"⚠️ Could not get API info: {e}")
print()
# Method 1: Try gradio_client (if available)
print("[Step 3] Attempting to trigger training via gradio_client...")
try:
from gradio_client import Client
client = Client(GRADIO_URL)
print(f"βœ… Connected to Gradio client")
print()
# Configuration
device_name = "S01"
epochs = 10
batch_size = 4
lr = 1e-4
print("πŸ“‹ Training Configuration:")
print(f" Device: {device_name}")
print(f" Epochs: {epochs}")
print(f" Batch Size: {batch_size}")
print(f" Learning Rate: {lr}")
print()
print("πŸš€ Triggering GPU training...")
print(" This will request ZeroGPU allocation (H200, 80GB)")
print(" Expected time: ~10-15 minutes")
print()
# Call the quick training endpoint
start_time = time.time()
result = client.predict(
device_name=device_name,
epochs=epochs,
batch_size=batch_size,
lr=lr,
api_name="/train_quick_baseline"
)
end_time = time.time()
print()
print("="*70)
print(f"βœ… Training request completed in {(end_time - start_time) / 60:.1f} minutes!")
print("="*70)
print()
print("πŸ“Š Result:")
print(result)
print()
except ImportError:
print("⚠️ gradio_client not available, trying HTTP POST...")
print()
# Method 2: HTTP POST (fallback)
print("[Step 3b] Attempting to trigger training via HTTP POST...")
try:
endpoint = f"{GRADIO_URL}/api/predict"
payload = {
"fn_index": 2, # Index of train_quick_baseline function
"data": [
"S01", # device_name
10, # epochs
4, # batch_size
0.0001 # lr
]
}
print("πŸ“‹ Sending training request...")
print(f" Endpoint: {endpoint}")
print(f" Payload: {json.dumps(payload, indent=2)}")
print()
response = requests.post(
endpoint,
json=payload,
headers={"Content-Type": "application/json"},
timeout=3600 # 1 hour timeout
)
if response.status_code == 200:
result = response.json()
print("βœ… Training completed!")
print()
print("πŸ“Š Result:")
print(json.dumps(result, indent=2))
else:
print(f"❌ Training request failed: {response.status_code}")
print(response.text)
except Exception as e:
print(f"❌ HTTP POST failed: {e}")
import traceback
traceback.print_exc()
print()
print("="*70)
print("πŸ’‘ Alternative: Manual Trigger")
print("="*70)
print()
print("If automatic trigger doesn't work, manually trigger via web interface:")
print(f"1. Open: https://huggingface.co/spaces/MSherbinii/ipad-vad-training")
print(f"2. Go to '⚑ Quick Test (10 epochs)' tab")
print(f"3. Click 'πŸš€ Start Quick Training'")
print(f"4. Wait ~10-15 minutes for completion")
print()
print("Or trigger via Python code:")
print("""
from gradio_client import Client
client = Client("https://huggingface.co/spaces/MSherbinii/ipad-vad-training")
result = client.predict(
quick_device="S01",
quick_epochs=10,
quick_batch=4,
quick_lr=1e-4,
api_name="/train_quick_baseline"
)
print(result)
""")
print()
print("="*70)