simple-text-analyzer / web_app /debug_utils.py
egumasa's picture
trigger GPU
3f10400
"""Debug utilities for troubleshooting Huggingface Spaces issues."""
import streamlit as st
import os
import sys
from web_app.utils import MemoryFileHandler
def show_environment_info():
"""Display environment information for debugging."""
st.write("### Environment Information")
col1, col2 = st.columns(2)
with col1:
st.write("**System Info:**")
st.write(f"- Python: {sys.version}")
st.write(f"- Platform: {sys.platform}")
st.write(f"- Working Dir: {os.getcwd()}")
st.write(f"- User: {os.environ.get('USER', 'N/A')}")
st.write(f"- Home: {os.environ.get('HOME', 'N/A')}")
# Add process info
st.write("**Process Info:**")
st.write(f"- UID: {os.getuid()}")
st.write(f"- GID: {os.getgid()}")
st.write(f"- PID: {os.getpid()}")
with col2:
st.write("**Directory Permissions:**")
dirs_to_check = ['/tmp', '.', './web_app', os.environ.get('HOME', '/')]
for dir_path in dirs_to_check:
if os.path.exists(dir_path):
try:
# Check if we can write to the directory
test_file = os.path.join(dir_path, '.write_test')
with open(test_file, 'w') as f:
f.write('test')
os.remove(test_file)
st.write(f"- {dir_path}: βœ… Writable")
except:
st.write(f"- {dir_path}: ❌ Not writable")
else:
st.write(f"- {dir_path}: ⚠️ Not found")
st.write("**Environment Variables:**")
important_vars = ['STREAMLIT_SERVER_PORT', 'STREAMLIT_SERVER_ADDRESS',
'UV_CACHE_DIR', 'TMPDIR', 'TEMP', 'TMP', 'SPACES',
'SPACE_ID', 'SPACE_HOST']
for var in important_vars:
value = os.environ.get(var, 'Not set')
st.write(f"- {var}: {value}")
def test_file_operations():
"""Test various file operations to identify issues."""
st.write("### File Operation Tests")
tests = []
# Test 1: StringIO
try:
from io import StringIO
sio = StringIO("test content")
content = sio.read()
tests.append(("StringIO operations", "βœ… Success", None))
except Exception as e:
tests.append(("StringIO operations", "❌ Failed", str(e)))
# Test 2: BytesIO
try:
from io import BytesIO
bio = BytesIO(b"test content")
content = bio.read()
tests.append(("BytesIO operations", "βœ… Success", None))
except Exception as e:
tests.append(("BytesIO operations", "❌ Failed", str(e)))
# Test 3: Session state
try:
st.session_state.test_key = "test_value"
_ = st.session_state.test_key
del st.session_state.test_key
tests.append(("Session state operations", "βœ… Success", None))
except Exception as e:
tests.append(("Session state operations", "❌ Failed", str(e)))
# Display results
for test_name, status, error in tests:
col1, col2 = st.columns([3, 1])
with col1:
st.write(f"**{test_name}**")
if error:
st.write(f" Error: {error}")
with col2:
st.write(status)
def debug_file_upload():
"""Debug file upload functionality."""
st.write("### File Upload Debug")
uploaded_file = st.file_uploader("Test file upload", type=['txt', 'csv'])
if uploaded_file:
st.write("**File Info:**")
st.write(f"- Name: {uploaded_file.name}")
st.write(f"- Type: {uploaded_file.type}")
st.write(f"- Size: {uploaded_file.size} bytes")
try:
# Test direct read methods first
st.write("**Direct Read Methods:**")
try:
uploaded_file.seek(0)
content = uploaded_file.read()
st.write(f"- Read method: βœ… Success ({len(content)} bytes)")
except Exception as e:
st.write(f"- Read method: ❌ Failed - {str(e)}")
# Try getvalue if available
try:
uploaded_file.seek(0)
value = uploaded_file.getvalue()
st.write(f"- GetValue method: βœ… Success ({len(value)} bytes)")
except Exception as e:
st.write(f"- GetValue method: ❌ Failed - {str(e)}")
# Test memory-based approach
st.write("\n**Memory-based Approach:**")
try:
uploaded_file.seek(0)
content = MemoryFileHandler.process_uploaded_file(uploaded_file, as_text=False)
if content:
st.write(f"- Process file (binary): βœ… Success ({len(content)} bytes)")
# Try text mode
uploaded_file.seek(0)
text_content = MemoryFileHandler.process_uploaded_file(uploaded_file, as_text=True)
if text_content:
st.write(f"- Process file (text): βœ… Success ({len(text_content)} chars)")
else:
st.write("- Process file (text): ❌ Failed")
else:
st.write("- Process file (binary): ❌ Failed")
except Exception as e:
st.error(f"Error with memory-based approach: {e}")
import traceback
st.code(traceback.format_exc())
except Exception as e:
st.error(f"Error processing file: {e}")
import traceback
st.code(traceback.format_exc())
def show_gpu_status():
"""Display GPU/CUDA status information for debugging."""
st.write("### GPU Status Information")
# Check PyTorch/CUDA availability
st.write("**PyTorch/CUDA Status:**")
try:
import torch
col1, col2 = st.columns(2)
with col1:
st.write(f"- PyTorch version: {torch.__version__}")
if torch.cuda.is_available():
st.write(f"- CUDA available: βœ… Yes")
st.write(f"- CUDA version: {torch.version.cuda}")
st.write(f"- Number of GPUs: {torch.cuda.device_count()}")
# Show GPU details
for i in range(torch.cuda.device_count()):
st.write(f"\n**GPU {i}: {torch.cuda.get_device_name(i)}**")
memory_allocated = torch.cuda.memory_allocated(i) / 1024**3 # GB
memory_reserved = torch.cuda.memory_reserved(i) / 1024**3 # GB
memory_total = torch.cuda.get_device_properties(i).total_memory / 1024**3 # GB
st.write(f" - Total memory: {memory_total:.2f} GB")
st.write(f" - Allocated: {memory_allocated:.2f} GB")
st.write(f" - Reserved: {memory_reserved:.2f} GB")
st.write(f" - Free: {memory_total - memory_reserved:.2f} GB")
else:
st.write("- CUDA available: ❌ No")
st.write("- Running on: CPU only")
with col2:
# Check spaCy GPU configuration
st.write("**SpaCy GPU Configuration:**")
try:
import spacy
# Test GPU preference
gpu_id = spacy.prefer_gpu()
if gpu_id is not False:
st.write(f"- SpaCy GPU: βœ… Enabled (device {gpu_id})")
else:
st.write("- SpaCy GPU: ❌ Disabled")
# Check transformer packages
transformer_status = []
# Check spacy-transformers
try:
import spacy_transformers
transformer_status.append("spacy-transformers: βœ… Installed")
except ImportError:
transformer_status.append("spacy-transformers: ❌ Not installed")
# Check spacy-curated-transformers
try:
import spacy_curated_transformers
transformer_status.append("spacy-curated-transformers: βœ… Installed")
except ImportError:
transformer_status.append("spacy-curated-transformers: ❌ Not installed")
for status in transformer_status:
st.write(f"- {status}")
except Exception as e:
st.write(f"- SpaCy GPU check failed: {str(e)}")
except ImportError:
st.warning("PyTorch not installed - GPU support unavailable")
st.write("To enable GPU support, install PyTorch with CUDA support")
except Exception as e:
st.error(f"Error checking GPU status: {str(e)}")
# Active model GPU status
st.write("\n**Active Model GPU Status:**")
try:
# Try to get analyzer from session state
analyzer = None
if hasattr(st.session_state, 'analyzer') and st.session_state.analyzer:
analyzer = st.session_state.analyzer
elif hasattr(st.session_state, 'parser') and st.session_state.parser:
analyzer = st.session_state.parser
if analyzer:
model_info = analyzer.get_model_info()
col1, col2 = st.columns(2)
with col1:
st.write("**Current Model:**")
st.write(f"- Model: {model_info.get('name', 'N/A')}")
st.write(f"- Language: {model_info.get('language', 'N/A')}")
st.write(f"- Size: {model_info.get('model_size', 'N/A')}")
with col2:
st.write("**Device Configuration:**")
st.write(f"- Device: {model_info.get('device', 'N/A')}")
gpu_enabled = model_info.get('gpu_enabled', False)
st.write(f"- GPU Enabled: {'βœ… Yes' if gpu_enabled else '❌ No'}")
st.write(f"- SpaCy version: {model_info.get('version', 'N/A')}")
# Show optimization status for transformer models
if model_info.get('model_size') == 'trf' and gpu_enabled:
st.write("\n**GPU Optimizations:**")
st.write("- Mixed precision: βœ… Enabled")
st.write("- Batch size: Optimized for GPU")
st.write("- Memory efficiency: Enhanced")
else:
st.info("No model currently loaded. Load a model to see its GPU configuration.")
except Exception as e:
st.write(f"Could not retrieve active model info: {str(e)}")
# Performance tips
with st.expander("πŸ’‘ GPU Performance Tips", expanded=False):
st.write("""
**Optimization Tips:**
- Transformer models benefit most from GPU acceleration
- Batch processing is automatically optimized when GPU is enabled
- Mixed precision is enabled for transformer models on GPU
- GPU memory is managed automatically with fallback to CPU if needed
**Common Issues:**
- If GPU is not detected, ensure CUDA-compatible PyTorch is installed
- Memory errors: Try smaller batch sizes or use CPU for very large texts
- Performance: GPU shows most benefit with batch processing
""")