File size: 4,823 Bytes
55055c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import os
import sys
import asyncio
import json
from flask import Blueprint, request, jsonify
from flask_cors import cross_origin

# Add the virtual GPU path to sys.path
vgpu_path = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'virtual_gpu_setup', 'virtual_gpu')
sys.path.insert(0, vgpu_path)

from vgpu import VirtualGPU
from vram import VRAM
from driver import GPUDriver
from render import Renderer
from ai import AIAccelerator
import numpy as np

# Import the Hugging Face GPT model from the same directory
from .huggingface_gpt_model import HuggingFaceModelManager

ai_chat_bp = Blueprint('ai_chat', __name__)

# Global variables to store GPU components
vgpu_instance = None
ai_accelerator = None
driver = None
hf_model_manager = None

def initialize_vgpu():
    """Initialize the virtual GPU components."""
    global vgpu_instance, ai_accelerator, driver, hf_model_manager
    
    if vgpu_instance is None:
        print("Initializing Virtual GPU with 500GB VRAM...")
        
        # Create VRAM (500GB - full virtual GPU capacity)
        vram = VRAM(memory_size_gb=500)
        
        # Create renderer
        renderer = Renderer(vram)
        
        # Create AI accelerator
        ai_accelerator = AIAccelerator(vram)
        
        # Create vGPU with 800 SMs and 50,000 cores
        vgpu_instance = VirtualGPU(num_sms=800, total_cores=50000)
        vgpu_instance.set_modules(vram, renderer, ai_accelerator, None)
        
        # Create driver
        driver = GPUDriver(vgpu_instance)
        vgpu_instance.driver = driver
        
        print("Virtual GPU initialized successfully!")
        print(f"VRAM: {vram.get_stats()['total_memory_gb']} GB")
        print(f"Cores: {vgpu_instance.total_cores:,}")
        print(f"SMs: {vgpu_instance.num_sms}")
        
        # Initialize the Hugging Face model manager
        print("Loading Hugging Face pre-trained model onto virtual GPU...")
        hf_model_manager = HuggingFaceModelManager(ai_accelerator)
        print("Hugging Face model loaded successfully!")

@ai_chat_bp.route('/chat', methods=['POST'])
@cross_origin()
def chat():
    """Handle chat requests using the Hugging Face pre-trained model."""
    global hf_model_manager
    
    try:
        # Initialize vGPU if not already done
        initialize_vgpu()
        
        # Get the message from request
        data = request.get_json()
        if not data or 'message' not in data:
            return jsonify({'error': 'No message provided'}), 400
        
        user_message = data['message']
        
        # Generate response using Hugging Face model on virtual GPU
        response = hf_model_manager.chat(user_message)
        
        # Get GPU stats
        vgpu_stats = vgpu_instance.get_stats()
        ai_stats = ai_accelerator.get_stats()
        vram_stats = vgpu_instance.vram.get_stats()
        
        # Get model info
        model_info = hf_model_manager.get_model_info()
        
        return jsonify({
            'response': response,
            'gpu_stats': {
                'clock_cycles': vgpu_stats['clock_cycle'],
                'tasks_processed': vgpu_stats['total_tasks_processed'],
                'busy_sms': vgpu_stats['busy_sms'],
                'total_sms': vgpu_stats['total_sms'],
                'ai_operations': ai_stats['operations_performed'],
                'flops_performed': ai_stats['flops_performed'],
                'vram_utilization': vram_stats['utilization_percent'],
                'matrices_in_memory': ai_stats['matrices_in_memory']
            },
            'model_info': model_info
        })
        
    except Exception as e:
        return jsonify({'error': f'Hugging Face model error: {str(e)}'}), 500

@ai_chat_bp.route('/gpu-status', methods=['GET'])
@cross_origin()
def gpu_status():
    """Get current GPU status."""
    try:
        initialize_vgpu()
        
        vgpu_stats = vgpu_instance.get_stats()
        ai_stats = ai_accelerator.get_stats()
        vram_stats = vgpu_instance.vram.get_stats()
        
        return jsonify({
            'vgpu': vgpu_stats,
            'ai_accelerator': ai_stats,
            'vram': vram_stats,
            'status': 'online'
        })
        
    except Exception as e:
        return jsonify({'error': f'Failed to get GPU status: {str(e)}'}), 500

@ai_chat_bp.route('/reset-gpu', methods=['POST'])
@cross_origin()
def reset_gpu():
    """Reset the virtual GPU."""
    global vgpu_instance, ai_accelerator, driver, ai_model
    
    try:
        vgpu_instance = None
        ai_accelerator = None
        driver = None
        ai_model = None
        
        return jsonify({'message': 'Virtual GPU reset successfully'})
        
    except Exception as e:
        return jsonify({'error': f'Failed to reset GPU: {str(e)}'}), 500