Spaces:
Runtime error
Runtime error
| import os | |
| import sys | |
| import asyncio | |
| import json | |
| from flask import Blueprint, request, jsonify | |
| from flask_cors import cross_origin | |
| # Add the virtual GPU path to sys.path | |
| vgpu_path = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'virtual_gpu_setup', 'virtual_gpu') | |
| sys.path.insert(0, vgpu_path) | |
| from vgpu import VirtualGPU | |
| from vram import VRAM | |
| from driver import GPUDriver | |
| from render import Renderer | |
| from ai import AIAccelerator | |
| import numpy as np | |
| # Import the Hugging Face GPT model from the same directory | |
| from .huggingface_gpt_model import HuggingFaceModelManager | |
| ai_chat_bp = Blueprint('ai_chat', __name__) | |
| # Global variables to store GPU components | |
| vgpu_instance = None | |
| ai_accelerator = None | |
| driver = None | |
| hf_model_manager = None | |
| def initialize_vgpu(): | |
| """Initialize the virtual GPU components.""" | |
| global vgpu_instance, ai_accelerator, driver, hf_model_manager | |
| if vgpu_instance is None: | |
| print("Initializing Virtual GPU with 500GB VRAM...") | |
| # Create VRAM (500GB - full virtual GPU capacity) | |
| vram = VRAM(memory_size_gb=500) | |
| # Create renderer | |
| renderer = Renderer(vram) | |
| # Create AI accelerator | |
| ai_accelerator = AIAccelerator(vram) | |
| # Create vGPU with 800 SMs and 50,000 cores | |
| vgpu_instance = VirtualGPU(num_sms=800, total_cores=50000) | |
| vgpu_instance.set_modules(vram, renderer, ai_accelerator, None) | |
| # Create driver | |
| driver = GPUDriver(vgpu_instance) | |
| vgpu_instance.driver = driver | |
| print("Virtual GPU initialized successfully!") | |
| print(f"VRAM: {vram.get_stats()['total_memory_gb']} GB") | |
| print(f"Cores: {vgpu_instance.total_cores:,}") | |
| print(f"SMs: {vgpu_instance.num_sms}") | |
| # Initialize the Hugging Face model manager | |
| print("Loading Hugging Face pre-trained model onto virtual GPU...") | |
| hf_model_manager = HuggingFaceModelManager(ai_accelerator) | |
| print("Hugging Face model loaded successfully!") | |
| def chat(): | |
| """Handle chat requests using the Hugging Face pre-trained model.""" | |
| global hf_model_manager | |
| try: | |
| # Initialize vGPU if not already done | |
| initialize_vgpu() | |
| # Get the message from request | |
| data = request.get_json() | |
| if not data or 'message' not in data: | |
| return jsonify({'error': 'No message provided'}), 400 | |
| user_message = data['message'] | |
| # Generate response using Hugging Face model on virtual GPU | |
| response = hf_model_manager.chat(user_message) | |
| # Get GPU stats | |
| vgpu_stats = vgpu_instance.get_stats() | |
| ai_stats = ai_accelerator.get_stats() | |
| vram_stats = vgpu_instance.vram.get_stats() | |
| # Get model info | |
| model_info = hf_model_manager.get_model_info() | |
| return jsonify({ | |
| 'response': response, | |
| 'gpu_stats': { | |
| 'clock_cycles': vgpu_stats['clock_cycle'], | |
| 'tasks_processed': vgpu_stats['total_tasks_processed'], | |
| 'busy_sms': vgpu_stats['busy_sms'], | |
| 'total_sms': vgpu_stats['total_sms'], | |
| 'ai_operations': ai_stats['operations_performed'], | |
| 'flops_performed': ai_stats['flops_performed'], | |
| 'vram_utilization': vram_stats['utilization_percent'], | |
| 'matrices_in_memory': ai_stats['matrices_in_memory'] | |
| }, | |
| 'model_info': model_info | |
| }) | |
| except Exception as e: | |
| return jsonify({'error': f'Hugging Face model error: {str(e)}'}), 500 | |
| def gpu_status(): | |
| """Get current GPU status.""" | |
| try: | |
| initialize_vgpu() | |
| vgpu_stats = vgpu_instance.get_stats() | |
| ai_stats = ai_accelerator.get_stats() | |
| vram_stats = vgpu_instance.vram.get_stats() | |
| return jsonify({ | |
| 'vgpu': vgpu_stats, | |
| 'ai_accelerator': ai_stats, | |
| 'vram': vram_stats, | |
| 'status': 'online' | |
| }) | |
| except Exception as e: | |
| return jsonify({'error': f'Failed to get GPU status: {str(e)}'}), 500 | |
| def reset_gpu(): | |
| """Reset the virtual GPU.""" | |
| global vgpu_instance, ai_accelerator, driver, ai_model | |
| try: | |
| vgpu_instance = None | |
| ai_accelerator = None | |
| driver = None | |
| ai_model = None | |
| return jsonify({'message': 'Virtual GPU reset successfully'}) | |
| except Exception as e: | |
| return jsonify({'error': f'Failed to reset GPU: {str(e)}'}), 500 | |