NEWORLD / src /routes /ai_chat.py
Factor Studios
Upload 32 files
55055c7 verified
import os
import sys
import asyncio
import json
from flask import Blueprint, request, jsonify
from flask_cors import cross_origin
# Add the virtual GPU path to sys.path
vgpu_path = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'virtual_gpu_setup', 'virtual_gpu')
sys.path.insert(0, vgpu_path)
from vgpu import VirtualGPU
from vram import VRAM
from driver import GPUDriver
from render import Renderer
from ai import AIAccelerator
import numpy as np
# Import the Hugging Face GPT model from the same directory
from .huggingface_gpt_model import HuggingFaceModelManager
ai_chat_bp = Blueprint('ai_chat', __name__)
# Global variables to store GPU components
vgpu_instance = None
ai_accelerator = None
driver = None
hf_model_manager = None
def initialize_vgpu():
"""Initialize the virtual GPU components."""
global vgpu_instance, ai_accelerator, driver, hf_model_manager
if vgpu_instance is None:
print("Initializing Virtual GPU with 500GB VRAM...")
# Create VRAM (500GB - full virtual GPU capacity)
vram = VRAM(memory_size_gb=500)
# Create renderer
renderer = Renderer(vram)
# Create AI accelerator
ai_accelerator = AIAccelerator(vram)
# Create vGPU with 800 SMs and 50,000 cores
vgpu_instance = VirtualGPU(num_sms=800, total_cores=50000)
vgpu_instance.set_modules(vram, renderer, ai_accelerator, None)
# Create driver
driver = GPUDriver(vgpu_instance)
vgpu_instance.driver = driver
print("Virtual GPU initialized successfully!")
print(f"VRAM: {vram.get_stats()['total_memory_gb']} GB")
print(f"Cores: {vgpu_instance.total_cores:,}")
print(f"SMs: {vgpu_instance.num_sms}")
# Initialize the Hugging Face model manager
print("Loading Hugging Face pre-trained model onto virtual GPU...")
hf_model_manager = HuggingFaceModelManager(ai_accelerator)
print("Hugging Face model loaded successfully!")
@ai_chat_bp.route('/chat', methods=['POST'])
@cross_origin()
def chat():
"""Handle chat requests using the Hugging Face pre-trained model."""
global hf_model_manager
try:
# Initialize vGPU if not already done
initialize_vgpu()
# Get the message from request
data = request.get_json()
if not data or 'message' not in data:
return jsonify({'error': 'No message provided'}), 400
user_message = data['message']
# Generate response using Hugging Face model on virtual GPU
response = hf_model_manager.chat(user_message)
# Get GPU stats
vgpu_stats = vgpu_instance.get_stats()
ai_stats = ai_accelerator.get_stats()
vram_stats = vgpu_instance.vram.get_stats()
# Get model info
model_info = hf_model_manager.get_model_info()
return jsonify({
'response': response,
'gpu_stats': {
'clock_cycles': vgpu_stats['clock_cycle'],
'tasks_processed': vgpu_stats['total_tasks_processed'],
'busy_sms': vgpu_stats['busy_sms'],
'total_sms': vgpu_stats['total_sms'],
'ai_operations': ai_stats['operations_performed'],
'flops_performed': ai_stats['flops_performed'],
'vram_utilization': vram_stats['utilization_percent'],
'matrices_in_memory': ai_stats['matrices_in_memory']
},
'model_info': model_info
})
except Exception as e:
return jsonify({'error': f'Hugging Face model error: {str(e)}'}), 500
@ai_chat_bp.route('/gpu-status', methods=['GET'])
@cross_origin()
def gpu_status():
"""Get current GPU status."""
try:
initialize_vgpu()
vgpu_stats = vgpu_instance.get_stats()
ai_stats = ai_accelerator.get_stats()
vram_stats = vgpu_instance.vram.get_stats()
return jsonify({
'vgpu': vgpu_stats,
'ai_accelerator': ai_stats,
'vram': vram_stats,
'status': 'online'
})
except Exception as e:
return jsonify({'error': f'Failed to get GPU status: {str(e)}'}), 500
@ai_chat_bp.route('/reset-gpu', methods=['POST'])
@cross_origin()
def reset_gpu():
"""Reset the virtual GPU."""
global vgpu_instance, ai_accelerator, driver, ai_model
try:
vgpu_instance = None
ai_accelerator = None
driver = None
ai_model = None
return jsonify({'message': 'Virtual GPU reset successfully'})
except Exception as e:
return jsonify({'error': f'Failed to reset GPU: {str(e)}'}), 500