Spaces:
Building
Building
File size: 11,577 Bytes
89e2764 ce8e901 89e2764 305a33d 89e2764 305a33d 89e2764 eab1374 89e2764 eab1374 89e2764 ce8e901 eab1374 ce8e901 eab1374 ce8e901 eab1374 ce8e901 eab1374 ce8e901 eab1374 ce8e901 eab1374 89e2764 4d2898f 3f10400 4d2898f 3f10400 4d2898f 3f10400 4d2898f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 |
"""Debug utilities for troubleshooting Huggingface Spaces issues."""
import streamlit as st
import os
import sys
from web_app.utils import MemoryFileHandler
def show_environment_info():
"""Display environment information for debugging."""
st.write("### Environment Information")
col1, col2 = st.columns(2)
with col1:
st.write("**System Info:**")
st.write(f"- Python: {sys.version}")
st.write(f"- Platform: {sys.platform}")
st.write(f"- Working Dir: {os.getcwd()}")
st.write(f"- User: {os.environ.get('USER', 'N/A')}")
st.write(f"- Home: {os.environ.get('HOME', 'N/A')}")
# Add process info
st.write("**Process Info:**")
st.write(f"- UID: {os.getuid()}")
st.write(f"- GID: {os.getgid()}")
st.write(f"- PID: {os.getpid()}")
with col2:
st.write("**Directory Permissions:**")
dirs_to_check = ['/tmp', '.', './web_app', os.environ.get('HOME', '/')]
for dir_path in dirs_to_check:
if os.path.exists(dir_path):
try:
# Check if we can write to the directory
test_file = os.path.join(dir_path, '.write_test')
with open(test_file, 'w') as f:
f.write('test')
os.remove(test_file)
st.write(f"- {dir_path}: β
Writable")
except:
st.write(f"- {dir_path}: β Not writable")
else:
st.write(f"- {dir_path}: β οΈ Not found")
st.write("**Environment Variables:**")
important_vars = ['STREAMLIT_SERVER_PORT', 'STREAMLIT_SERVER_ADDRESS',
'UV_CACHE_DIR', 'TMPDIR', 'TEMP', 'TMP', 'SPACES',
'SPACE_ID', 'SPACE_HOST']
for var in important_vars:
value = os.environ.get(var, 'Not set')
st.write(f"- {var}: {value}")
def test_file_operations():
"""Test various file operations to identify issues."""
st.write("### File Operation Tests")
tests = []
# Test 1: StringIO
try:
from io import StringIO
sio = StringIO("test content")
content = sio.read()
tests.append(("StringIO operations", "β
Success", None))
except Exception as e:
tests.append(("StringIO operations", "β Failed", str(e)))
# Test 2: BytesIO
try:
from io import BytesIO
bio = BytesIO(b"test content")
content = bio.read()
tests.append(("BytesIO operations", "β
Success", None))
except Exception as e:
tests.append(("BytesIO operations", "β Failed", str(e)))
# Test 3: Session state
try:
st.session_state.test_key = "test_value"
_ = st.session_state.test_key
del st.session_state.test_key
tests.append(("Session state operations", "β
Success", None))
except Exception as e:
tests.append(("Session state operations", "β Failed", str(e)))
# Display results
for test_name, status, error in tests:
col1, col2 = st.columns([3, 1])
with col1:
st.write(f"**{test_name}**")
if error:
st.write(f" Error: {error}")
with col2:
st.write(status)
def debug_file_upload():
"""Debug file upload functionality."""
st.write("### File Upload Debug")
uploaded_file = st.file_uploader("Test file upload", type=['txt', 'csv'])
if uploaded_file:
st.write("**File Info:**")
st.write(f"- Name: {uploaded_file.name}")
st.write(f"- Type: {uploaded_file.type}")
st.write(f"- Size: {uploaded_file.size} bytes")
try:
# Test direct read methods first
st.write("**Direct Read Methods:**")
try:
uploaded_file.seek(0)
content = uploaded_file.read()
st.write(f"- Read method: β
Success ({len(content)} bytes)")
except Exception as e:
st.write(f"- Read method: β Failed - {str(e)}")
# Try getvalue if available
try:
uploaded_file.seek(0)
value = uploaded_file.getvalue()
st.write(f"- GetValue method: β
Success ({len(value)} bytes)")
except Exception as e:
st.write(f"- GetValue method: β Failed - {str(e)}")
# Test memory-based approach
st.write("\n**Memory-based Approach:**")
try:
uploaded_file.seek(0)
content = MemoryFileHandler.process_uploaded_file(uploaded_file, as_text=False)
if content:
st.write(f"- Process file (binary): β
Success ({len(content)} bytes)")
# Try text mode
uploaded_file.seek(0)
text_content = MemoryFileHandler.process_uploaded_file(uploaded_file, as_text=True)
if text_content:
st.write(f"- Process file (text): β
Success ({len(text_content)} chars)")
else:
st.write("- Process file (text): β Failed")
else:
st.write("- Process file (binary): β Failed")
except Exception as e:
st.error(f"Error with memory-based approach: {e}")
import traceback
st.code(traceback.format_exc())
except Exception as e:
st.error(f"Error processing file: {e}")
import traceback
st.code(traceback.format_exc())
def show_gpu_status():
"""Display GPU/CUDA status information for debugging."""
st.write("### GPU Status Information")
# Check PyTorch/CUDA availability
st.write("**PyTorch/CUDA Status:**")
try:
import torch
col1, col2 = st.columns(2)
with col1:
st.write(f"- PyTorch version: {torch.__version__}")
if torch.cuda.is_available():
st.write(f"- CUDA available: β
Yes")
st.write(f"- CUDA version: {torch.version.cuda}")
st.write(f"- Number of GPUs: {torch.cuda.device_count()}")
# Show GPU details
for i in range(torch.cuda.device_count()):
st.write(f"\n**GPU {i}: {torch.cuda.get_device_name(i)}**")
memory_allocated = torch.cuda.memory_allocated(i) / 1024**3 # GB
memory_reserved = torch.cuda.memory_reserved(i) / 1024**3 # GB
memory_total = torch.cuda.get_device_properties(i).total_memory / 1024**3 # GB
st.write(f" - Total memory: {memory_total:.2f} GB")
st.write(f" - Allocated: {memory_allocated:.2f} GB")
st.write(f" - Reserved: {memory_reserved:.2f} GB")
st.write(f" - Free: {memory_total - memory_reserved:.2f} GB")
else:
st.write("- CUDA available: β No")
st.write("- Running on: CPU only")
with col2:
# Check spaCy GPU configuration
st.write("**SpaCy GPU Configuration:**")
try:
import spacy
# Test GPU preference
gpu_id = spacy.prefer_gpu()
if gpu_id is not False:
st.write(f"- SpaCy GPU: β
Enabled (device {gpu_id})")
else:
st.write("- SpaCy GPU: β Disabled")
# Check transformer packages
transformer_status = []
# Check spacy-transformers
try:
import spacy_transformers
transformer_status.append("spacy-transformers: β
Installed")
except ImportError:
transformer_status.append("spacy-transformers: β Not installed")
# Check spacy-curated-transformers
try:
import spacy_curated_transformers
transformer_status.append("spacy-curated-transformers: β
Installed")
except ImportError:
transformer_status.append("spacy-curated-transformers: β Not installed")
for status in transformer_status:
st.write(f"- {status}")
except Exception as e:
st.write(f"- SpaCy GPU check failed: {str(e)}")
except ImportError:
st.warning("PyTorch not installed - GPU support unavailable")
st.write("To enable GPU support, install PyTorch with CUDA support")
except Exception as e:
st.error(f"Error checking GPU status: {str(e)}")
# Active model GPU status
st.write("\n**Active Model GPU Status:**")
try:
# Try to get analyzer from session state
analyzer = None
if hasattr(st.session_state, 'analyzer') and st.session_state.analyzer:
analyzer = st.session_state.analyzer
elif hasattr(st.session_state, 'parser') and st.session_state.parser:
analyzer = st.session_state.parser
if analyzer:
model_info = analyzer.get_model_info()
col1, col2 = st.columns(2)
with col1:
st.write("**Current Model:**")
st.write(f"- Model: {model_info.get('name', 'N/A')}")
st.write(f"- Language: {model_info.get('language', 'N/A')}")
st.write(f"- Size: {model_info.get('model_size', 'N/A')}")
with col2:
st.write("**Device Configuration:**")
st.write(f"- Device: {model_info.get('device', 'N/A')}")
gpu_enabled = model_info.get('gpu_enabled', False)
st.write(f"- GPU Enabled: {'β
Yes' if gpu_enabled else 'β No'}")
st.write(f"- SpaCy version: {model_info.get('version', 'N/A')}")
# Show optimization status for transformer models
if model_info.get('model_size') == 'trf' and gpu_enabled:
st.write("\n**GPU Optimizations:**")
st.write("- Mixed precision: β
Enabled")
st.write("- Batch size: Optimized for GPU")
st.write("- Memory efficiency: Enhanced")
else:
st.info("No model currently loaded. Load a model to see its GPU configuration.")
except Exception as e:
st.write(f"Could not retrieve active model info: {str(e)}")
# Performance tips
with st.expander("π‘ GPU Performance Tips", expanded=False):
st.write("""
**Optimization Tips:**
- Transformer models benefit most from GPU acceleration
- Batch processing is automatically optimized when GPU is enabled
- Mixed precision is enabled for transformer models on GPU
- GPU memory is managed automatically with fallback to CPU if needed
**Common Issues:**
- If GPU is not detected, ensure CUDA-compatible PyTorch is installed
- Memory errors: Try smaller batch sizes or use CPU for very large texts
- Performance: GPU shows most benefit with batch processing
""")
|