Spaces:
Building
Building
File size: 6,214 Bytes
4d2898f bb65e54 4d2898f bb65e54 4d2898f bb65e54 4d2898f bb65e54 4d2898f bb65e54 4d2898f bb65e54 4d2898f bb65e54 4d2898f bb65e54 4d2898f bb65e54 4d2898f bb65e54 4d2898f bb65e54 4d2898f bb65e54 4d2898f bb65e54 4d2898f bb65e54 4d2898f bb65e54 4d2898f bb65e54 4d2898f bb65e54 4d2898f bb65e54 4d2898f bb65e54 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 |
#!/usr/bin/env python3
"""Debug GPU issues in HuggingFace Spaces environment."""
import os
import sys
print("=== Debugging GPU in HuggingFace Spaces ===")
# Set environment variables BEFORE any imports
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
os.environ['SPACY_PREFER_GPU'] = '1'
# Now import libraries
import torch
import spacy
print("\n1. Environment Check:")
print(f" Platform: {sys.platform}")
print(f" Python: {sys.version}")
print(f" Working dir: {os.getcwd()}")
print(f" CUDA_VISIBLE_DEVICES: {os.environ.get('CUDA_VISIBLE_DEVICES', 'Not set')}")
print(f" SPACY_PREFER_GPU: {os.environ.get('SPACY_PREFER_GPU', 'Not set')}")
print(f" SPACES: {os.environ.get('SPACES', 'Not set')}")
print("\n2. PyTorch GPU Status:")
print(f" PyTorch version: {torch.__version__}")
print(f" CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
print(f" CUDA version: {torch.version.cuda}")
print(f" GPU count: {torch.cuda.device_count()}")
print(f" Current device: {torch.cuda.current_device()}")
print(f" GPU 0: {torch.cuda.get_device_name(0)}")
# Force CUDA initialization
torch.cuda.init()
print(" β CUDA initialized")
# Set default device
torch.cuda.set_device(0)
print(" β Set default CUDA device to 0")
print("\n3. SpaCy GPU Configuration:")
print(f" SpaCy version: {spacy.__version__}")
# Try multiple methods to enable GPU
print("\n Attempting spacy.prefer_gpu()...")
gpu_id = spacy.prefer_gpu(gpu_id=0)
print(f" Result: {gpu_id}")
if torch.cuda.is_available():
print("\n Attempting spacy.require_gpu()...")
try:
spacy.require_gpu(gpu_id=0)
print(" β spacy.require_gpu() succeeded")
except Exception as e:
print(f" β spacy.require_gpu() failed: {e}")
print("\n4. Test Model Loading:")
try:
# Try loading a small model first
print(" Loading en_core_web_md...")
nlp_md = spacy.load("en_core_web_md")
# Check if components are on GPU
print(" Checking MD model components:")
for name, component in nlp_md.pipeline:
device = "Unknown"
if hasattr(component, 'model'):
if hasattr(component.model, 'device'):
device = str(component.model.device)
elif hasattr(component.model, 'parameters'):
try:
param = next(component.model.parameters())
device = str(param.device)
except:
pass
print(f" {name}: {device}")
# Test processing
doc = nlp_md("Test sentence")
print(f" β MD model processed {len(doc)} tokens")
except Exception as e:
print(f" β MD model failed: {e}")
print("\n5. Test Transformer Model with GPU:")
try:
# Force GPU before loading transformer
if torch.cuda.is_available():
torch.cuda.set_device(0)
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
print(" Loading en_core_web_trf with GPU config...")
# Load with explicit GPU configuration
config = {
"nlp": {
"pipeline": ["transformer", "tagger", "parser", "ner", "lemmatizer"]
},
"components": {
"transformer": {
"model": {
"mixed_precision": True,
"@architectures": "spacy-transformers.TransformerModel.v3",
"get_spans": {
"@span_getters": "spacy-transformers.strided_spans.v1",
"window": 128,
"stride": 96
}
}
}
}
}
nlp_trf = spacy.load("en_core_web_trf")
# Force components to GPU
print(" Forcing transformer components to GPU...")
for name, component in nlp_trf.pipeline:
if hasattr(component, 'model'):
if hasattr(component.model, 'to'):
try:
component.model.to('cuda:0')
print(f" β Moved {name} to GPU")
except Exception as e:
print(f" β Failed to move {name}: {e}")
# Verify GPU usage
print("\n Verifying GPU usage:")
for name, component in nlp_trf.pipeline:
on_gpu = False
device_info = "Unknown"
if hasattr(component, 'model'):
# Check parameters
if hasattr(component.model, 'parameters'):
try:
for param in component.model.parameters():
if param.is_cuda:
on_gpu = True
device_info = str(param.device)
break
except:
pass
# Check device attribute
if hasattr(component.model, 'device'):
device_info = str(component.model.device)
on_gpu = 'cuda' in device_info
status = "β GPU" if on_gpu else "β CPU"
print(f" {name}: {status} ({device_info})")
# Test processing with timing
print("\n Testing transformer processing...")
import time
text = "The quick brown fox jumps over the lazy dog. " * 5
start = time.time()
doc = nlp_trf(text)
end = time.time()
print(f" β Processed {len(doc)} tokens in {end-start:.2f}s")
# Check memory usage
if torch.cuda.is_available():
mem_allocated = torch.cuda.memory_allocated(0) / 1024**3
mem_reserved = torch.cuda.memory_reserved(0) / 1024**3
print(f"\n GPU Memory:")
print(f" Allocated: {mem_allocated:.2f} GB")
print(f" Reserved: {mem_reserved:.2f} GB")
except Exception as e:
print(f" β Transformer model failed: {e}")
import traceback
traceback.print_exc()
print("\n=== Summary ===")
if torch.cuda.is_available():
print("β CUDA is available")
print("β PyTorch can see GPU")
print("β Check if SpaCy models are using GPU above")
else:
print("β No GPU detected in this environment")
print("β This script should be run in HuggingFace Spaces with GPU")
|