Spaces:
Runtime error
Runtime error
File size: 4,823 Bytes
55055c7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
import os
import sys
import asyncio
import json
from flask import Blueprint, request, jsonify
from flask_cors import cross_origin
# Add the virtual GPU path to sys.path
vgpu_path = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'virtual_gpu_setup', 'virtual_gpu')
sys.path.insert(0, vgpu_path)
from vgpu import VirtualGPU
from vram import VRAM
from driver import GPUDriver
from render import Renderer
from ai import AIAccelerator
import numpy as np
# Import the Hugging Face GPT model from the same directory
from .huggingface_gpt_model import HuggingFaceModelManager
ai_chat_bp = Blueprint('ai_chat', __name__)
# Global variables to store GPU components
vgpu_instance = None
ai_accelerator = None
driver = None
hf_model_manager = None
def initialize_vgpu():
"""Initialize the virtual GPU components."""
global vgpu_instance, ai_accelerator, driver, hf_model_manager
if vgpu_instance is None:
print("Initializing Virtual GPU with 500GB VRAM...")
# Create VRAM (500GB - full virtual GPU capacity)
vram = VRAM(memory_size_gb=500)
# Create renderer
renderer = Renderer(vram)
# Create AI accelerator
ai_accelerator = AIAccelerator(vram)
# Create vGPU with 800 SMs and 50,000 cores
vgpu_instance = VirtualGPU(num_sms=800, total_cores=50000)
vgpu_instance.set_modules(vram, renderer, ai_accelerator, None)
# Create driver
driver = GPUDriver(vgpu_instance)
vgpu_instance.driver = driver
print("Virtual GPU initialized successfully!")
print(f"VRAM: {vram.get_stats()['total_memory_gb']} GB")
print(f"Cores: {vgpu_instance.total_cores:,}")
print(f"SMs: {vgpu_instance.num_sms}")
# Initialize the Hugging Face model manager
print("Loading Hugging Face pre-trained model onto virtual GPU...")
hf_model_manager = HuggingFaceModelManager(ai_accelerator)
print("Hugging Face model loaded successfully!")
@ai_chat_bp.route('/chat', methods=['POST'])
@cross_origin()
def chat():
"""Handle chat requests using the Hugging Face pre-trained model."""
global hf_model_manager
try:
# Initialize vGPU if not already done
initialize_vgpu()
# Get the message from request
data = request.get_json()
if not data or 'message' not in data:
return jsonify({'error': 'No message provided'}), 400
user_message = data['message']
# Generate response using Hugging Face model on virtual GPU
response = hf_model_manager.chat(user_message)
# Get GPU stats
vgpu_stats = vgpu_instance.get_stats()
ai_stats = ai_accelerator.get_stats()
vram_stats = vgpu_instance.vram.get_stats()
# Get model info
model_info = hf_model_manager.get_model_info()
return jsonify({
'response': response,
'gpu_stats': {
'clock_cycles': vgpu_stats['clock_cycle'],
'tasks_processed': vgpu_stats['total_tasks_processed'],
'busy_sms': vgpu_stats['busy_sms'],
'total_sms': vgpu_stats['total_sms'],
'ai_operations': ai_stats['operations_performed'],
'flops_performed': ai_stats['flops_performed'],
'vram_utilization': vram_stats['utilization_percent'],
'matrices_in_memory': ai_stats['matrices_in_memory']
},
'model_info': model_info
})
except Exception as e:
return jsonify({'error': f'Hugging Face model error: {str(e)}'}), 500
@ai_chat_bp.route('/gpu-status', methods=['GET'])
@cross_origin()
def gpu_status():
"""Get current GPU status."""
try:
initialize_vgpu()
vgpu_stats = vgpu_instance.get_stats()
ai_stats = ai_accelerator.get_stats()
vram_stats = vgpu_instance.vram.get_stats()
return jsonify({
'vgpu': vgpu_stats,
'ai_accelerator': ai_stats,
'vram': vram_stats,
'status': 'online'
})
except Exception as e:
return jsonify({'error': f'Failed to get GPU status: {str(e)}'}), 500
@ai_chat_bp.route('/reset-gpu', methods=['POST'])
@cross_origin()
def reset_gpu():
"""Reset the virtual GPU."""
global vgpu_instance, ai_accelerator, driver, ai_model
try:
vgpu_instance = None
ai_accelerator = None
driver = None
ai_model = None
return jsonify({'message': 'Virtual GPU reset successfully'})
except Exception as e:
return jsonify({'error': f'Failed to reset GPU: {str(e)}'}), 500
|