"""Debug utilities for troubleshooting Huggingface Spaces issues.""" import streamlit as st import os import sys from web_app.utils import MemoryFileHandler def show_environment_info(): """Display environment information for debugging.""" st.write("### Environment Information") col1, col2 = st.columns(2) with col1: st.write("**System Info:**") st.write(f"- Python: {sys.version}") st.write(f"- Platform: {sys.platform}") st.write(f"- Working Dir: {os.getcwd()}") st.write(f"- User: {os.environ.get('USER', 'N/A')}") st.write(f"- Home: {os.environ.get('HOME', 'N/A')}") # Add process info st.write("**Process Info:**") st.write(f"- UID: {os.getuid()}") st.write(f"- GID: {os.getgid()}") st.write(f"- PID: {os.getpid()}") with col2: st.write("**Directory Permissions:**") dirs_to_check = ['/tmp', '.', './web_app', os.environ.get('HOME', '/')] for dir_path in dirs_to_check: if os.path.exists(dir_path): try: # Check if we can write to the directory test_file = os.path.join(dir_path, '.write_test') with open(test_file, 'w') as f: f.write('test') os.remove(test_file) st.write(f"- {dir_path}: ✅ Writable") except: st.write(f"- {dir_path}: ❌ Not writable") else: st.write(f"- {dir_path}: ⚠️ Not found") st.write("**Environment Variables:**") important_vars = ['STREAMLIT_SERVER_PORT', 'STREAMLIT_SERVER_ADDRESS', 'UV_CACHE_DIR', 'TMPDIR', 'TEMP', 'TMP', 'SPACES', 'SPACE_ID', 'SPACE_HOST'] for var in important_vars: value = os.environ.get(var, 'Not set') st.write(f"- {var}: {value}") def test_file_operations(): """Test various file operations to identify issues.""" st.write("### File Operation Tests") tests = [] # Test 1: StringIO try: from io import StringIO sio = StringIO("test content") content = sio.read() tests.append(("StringIO operations", "✅ Success", None)) except Exception as e: tests.append(("StringIO operations", "❌ Failed", str(e))) # Test 2: BytesIO try: from io import BytesIO bio = BytesIO(b"test content") content = bio.read() tests.append(("BytesIO operations", "✅ Success", None)) except Exception as e: tests.append(("BytesIO operations", "❌ Failed", str(e))) # Test 3: Session state try: st.session_state.test_key = "test_value" _ = st.session_state.test_key del st.session_state.test_key tests.append(("Session state operations", "✅ Success", None)) except Exception as e: tests.append(("Session state operations", "❌ Failed", str(e))) # Display results for test_name, status, error in tests: col1, col2 = st.columns([3, 1]) with col1: st.write(f"**{test_name}**") if error: st.write(f" Error: {error}") with col2: st.write(status) def debug_file_upload(): """Debug file upload functionality.""" st.write("### File Upload Debug") uploaded_file = st.file_uploader("Test file upload", type=['txt', 'csv']) if uploaded_file: st.write("**File Info:**") st.write(f"- Name: {uploaded_file.name}") st.write(f"- Type: {uploaded_file.type}") st.write(f"- Size: {uploaded_file.size} bytes") try: # Test direct read methods first st.write("**Direct Read Methods:**") try: uploaded_file.seek(0) content = uploaded_file.read() st.write(f"- Read method: ✅ Success ({len(content)} bytes)") except Exception as e: st.write(f"- Read method: ❌ Failed - {str(e)}") # Try getvalue if available try: uploaded_file.seek(0) value = uploaded_file.getvalue() st.write(f"- GetValue method: ✅ Success ({len(value)} bytes)") except Exception as e: st.write(f"- GetValue method: ❌ Failed - {str(e)}") # Test memory-based approach st.write("\n**Memory-based Approach:**") try: uploaded_file.seek(0) content = MemoryFileHandler.process_uploaded_file(uploaded_file, as_text=False) if content: st.write(f"- Process file (binary): ✅ Success ({len(content)} bytes)") # Try text mode uploaded_file.seek(0) text_content = MemoryFileHandler.process_uploaded_file(uploaded_file, as_text=True) if text_content: st.write(f"- Process file (text): ✅ Success ({len(text_content)} chars)") else: st.write("- Process file (text): ❌ Failed") else: st.write("- Process file (binary): ❌ Failed") except Exception as e: st.error(f"Error with memory-based approach: {e}") import traceback st.code(traceback.format_exc()) except Exception as e: st.error(f"Error processing file: {e}") import traceback st.code(traceback.format_exc()) def show_gpu_status(): """Display GPU/CUDA status information for debugging.""" st.write("### GPU Status Information") # Check PyTorch/CUDA availability st.write("**PyTorch/CUDA Status:**") try: import torch col1, col2 = st.columns(2) with col1: st.write(f"- PyTorch version: {torch.__version__}") if torch.cuda.is_available(): st.write(f"- CUDA available: ✅ Yes") st.write(f"- CUDA version: {torch.version.cuda}") st.write(f"- Number of GPUs: {torch.cuda.device_count()}") # Show GPU details for i in range(torch.cuda.device_count()): st.write(f"\n**GPU {i}: {torch.cuda.get_device_name(i)}**") memory_allocated = torch.cuda.memory_allocated(i) / 1024**3 # GB memory_reserved = torch.cuda.memory_reserved(i) / 1024**3 # GB memory_total = torch.cuda.get_device_properties(i).total_memory / 1024**3 # GB st.write(f" - Total memory: {memory_total:.2f} GB") st.write(f" - Allocated: {memory_allocated:.2f} GB") st.write(f" - Reserved: {memory_reserved:.2f} GB") st.write(f" - Free: {memory_total - memory_reserved:.2f} GB") else: st.write("- CUDA available: ❌ No") st.write("- Running on: CPU only") with col2: # Check spaCy GPU configuration st.write("**SpaCy GPU Configuration:**") try: import spacy # Test GPU preference gpu_id = spacy.prefer_gpu() if gpu_id is not False: st.write(f"- SpaCy GPU: ✅ Enabled (device {gpu_id})") else: st.write("- SpaCy GPU: ❌ Disabled") # Check transformer packages transformer_status = [] # Check spacy-transformers try: import spacy_transformers transformer_status.append("spacy-transformers: ✅ Installed") except ImportError: transformer_status.append("spacy-transformers: ❌ Not installed") # Check spacy-curated-transformers try: import spacy_curated_transformers transformer_status.append("spacy-curated-transformers: ✅ Installed") except ImportError: transformer_status.append("spacy-curated-transformers: ❌ Not installed") for status in transformer_status: st.write(f"- {status}") except Exception as e: st.write(f"- SpaCy GPU check failed: {str(e)}") except ImportError: st.warning("PyTorch not installed - GPU support unavailable") st.write("To enable GPU support, install PyTorch with CUDA support") except Exception as e: st.error(f"Error checking GPU status: {str(e)}") # Active model GPU status st.write("\n**Active Model GPU Status:**") try: # Try to get analyzer from session state analyzer = None if hasattr(st.session_state, 'analyzer') and st.session_state.analyzer: analyzer = st.session_state.analyzer elif hasattr(st.session_state, 'parser') and st.session_state.parser: analyzer = st.session_state.parser if analyzer: model_info = analyzer.get_model_info() col1, col2 = st.columns(2) with col1: st.write("**Current Model:**") st.write(f"- Model: {model_info.get('name', 'N/A')}") st.write(f"- Language: {model_info.get('language', 'N/A')}") st.write(f"- Size: {model_info.get('model_size', 'N/A')}") with col2: st.write("**Device Configuration:**") st.write(f"- Device: {model_info.get('device', 'N/A')}") gpu_enabled = model_info.get('gpu_enabled', False) st.write(f"- GPU Enabled: {'✅ Yes' if gpu_enabled else '❌ No'}") st.write(f"- SpaCy version: {model_info.get('version', 'N/A')}") # Show optimization status for transformer models if model_info.get('model_size') == 'trf' and gpu_enabled: st.write("\n**GPU Optimizations:**") st.write("- Mixed precision: ✅ Enabled") st.write("- Batch size: Optimized for GPU") st.write("- Memory efficiency: Enhanced") else: st.info("No model currently loaded. Load a model to see its GPU configuration.") except Exception as e: st.write(f"Could not retrieve active model info: {str(e)}") # Performance tips with st.expander("💡 GPU Performance Tips", expanded=False): st.write(""" **Optimization Tips:** - Transformer models benefit most from GPU acceleration - Batch processing is automatically optimized when GPU is enabled - Mixed precision is enabled for transformer models on GPU - GPU memory is managed automatically with fallback to CPU if needed **Common Issues:** - If GPU is not detected, ensure CUDA-compatible PyTorch is installed - Memory errors: Try smaller batch sizes or use CPU for very large texts - Performance: GPU shows most benefit with batch processing """)