Spaces:
Building
Building
| """Debug utilities for troubleshooting Huggingface Spaces issues.""" | |
| import streamlit as st | |
| import os | |
| import sys | |
| from web_app.utils import MemoryFileHandler | |
| def show_environment_info(): | |
| """Display environment information for debugging.""" | |
| st.write("### Environment Information") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.write("**System Info:**") | |
| st.write(f"- Python: {sys.version}") | |
| st.write(f"- Platform: {sys.platform}") | |
| st.write(f"- Working Dir: {os.getcwd()}") | |
| st.write(f"- User: {os.environ.get('USER', 'N/A')}") | |
| st.write(f"- Home: {os.environ.get('HOME', 'N/A')}") | |
| # Add process info | |
| st.write("**Process Info:**") | |
| st.write(f"- UID: {os.getuid()}") | |
| st.write(f"- GID: {os.getgid()}") | |
| st.write(f"- PID: {os.getpid()}") | |
| with col2: | |
| st.write("**Directory Permissions:**") | |
| dirs_to_check = ['/tmp', '.', './web_app', os.environ.get('HOME', '/')] | |
| for dir_path in dirs_to_check: | |
| if os.path.exists(dir_path): | |
| try: | |
| # Check if we can write to the directory | |
| test_file = os.path.join(dir_path, '.write_test') | |
| with open(test_file, 'w') as f: | |
| f.write('test') | |
| os.remove(test_file) | |
| st.write(f"- {dir_path}: β Writable") | |
| except: | |
| st.write(f"- {dir_path}: β Not writable") | |
| else: | |
| st.write(f"- {dir_path}: β οΈ Not found") | |
| st.write("**Environment Variables:**") | |
| important_vars = ['STREAMLIT_SERVER_PORT', 'STREAMLIT_SERVER_ADDRESS', | |
| 'UV_CACHE_DIR', 'TMPDIR', 'TEMP', 'TMP', 'SPACES', | |
| 'SPACE_ID', 'SPACE_HOST'] | |
| for var in important_vars: | |
| value = os.environ.get(var, 'Not set') | |
| st.write(f"- {var}: {value}") | |
| def test_file_operations(): | |
| """Test various file operations to identify issues.""" | |
| st.write("### File Operation Tests") | |
| tests = [] | |
| # Test 1: StringIO | |
| try: | |
| from io import StringIO | |
| sio = StringIO("test content") | |
| content = sio.read() | |
| tests.append(("StringIO operations", "β Success", None)) | |
| except Exception as e: | |
| tests.append(("StringIO operations", "β Failed", str(e))) | |
| # Test 2: BytesIO | |
| try: | |
| from io import BytesIO | |
| bio = BytesIO(b"test content") | |
| content = bio.read() | |
| tests.append(("BytesIO operations", "β Success", None)) | |
| except Exception as e: | |
| tests.append(("BytesIO operations", "β Failed", str(e))) | |
| # Test 3: Session state | |
| try: | |
| st.session_state.test_key = "test_value" | |
| _ = st.session_state.test_key | |
| del st.session_state.test_key | |
| tests.append(("Session state operations", "β Success", None)) | |
| except Exception as e: | |
| tests.append(("Session state operations", "β Failed", str(e))) | |
| # Display results | |
| for test_name, status, error in tests: | |
| col1, col2 = st.columns([3, 1]) | |
| with col1: | |
| st.write(f"**{test_name}**") | |
| if error: | |
| st.write(f" Error: {error}") | |
| with col2: | |
| st.write(status) | |
| def debug_file_upload(): | |
| """Debug file upload functionality.""" | |
| st.write("### File Upload Debug") | |
| uploaded_file = st.file_uploader("Test file upload", type=['txt', 'csv']) | |
| if uploaded_file: | |
| st.write("**File Info:**") | |
| st.write(f"- Name: {uploaded_file.name}") | |
| st.write(f"- Type: {uploaded_file.type}") | |
| st.write(f"- Size: {uploaded_file.size} bytes") | |
| try: | |
| # Test direct read methods first | |
| st.write("**Direct Read Methods:**") | |
| try: | |
| uploaded_file.seek(0) | |
| content = uploaded_file.read() | |
| st.write(f"- Read method: β Success ({len(content)} bytes)") | |
| except Exception as e: | |
| st.write(f"- Read method: β Failed - {str(e)}") | |
| # Try getvalue if available | |
| try: | |
| uploaded_file.seek(0) | |
| value = uploaded_file.getvalue() | |
| st.write(f"- GetValue method: β Success ({len(value)} bytes)") | |
| except Exception as e: | |
| st.write(f"- GetValue method: β Failed - {str(e)}") | |
| # Test memory-based approach | |
| st.write("\n**Memory-based Approach:**") | |
| try: | |
| uploaded_file.seek(0) | |
| content = MemoryFileHandler.process_uploaded_file(uploaded_file, as_text=False) | |
| if content: | |
| st.write(f"- Process file (binary): β Success ({len(content)} bytes)") | |
| # Try text mode | |
| uploaded_file.seek(0) | |
| text_content = MemoryFileHandler.process_uploaded_file(uploaded_file, as_text=True) | |
| if text_content: | |
| st.write(f"- Process file (text): β Success ({len(text_content)} chars)") | |
| else: | |
| st.write("- Process file (text): β Failed") | |
| else: | |
| st.write("- Process file (binary): β Failed") | |
| except Exception as e: | |
| st.error(f"Error with memory-based approach: {e}") | |
| import traceback | |
| st.code(traceback.format_exc()) | |
| except Exception as e: | |
| st.error(f"Error processing file: {e}") | |
| import traceback | |
| st.code(traceback.format_exc()) | |
| def show_gpu_status(): | |
| """Display GPU/CUDA status information for debugging.""" | |
| st.write("### GPU Status Information") | |
| # Check PyTorch/CUDA availability | |
| st.write("**PyTorch/CUDA Status:**") | |
| try: | |
| import torch | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.write(f"- PyTorch version: {torch.__version__}") | |
| if torch.cuda.is_available(): | |
| st.write(f"- CUDA available: β Yes") | |
| st.write(f"- CUDA version: {torch.version.cuda}") | |
| st.write(f"- Number of GPUs: {torch.cuda.device_count()}") | |
| # Show GPU details | |
| for i in range(torch.cuda.device_count()): | |
| st.write(f"\n**GPU {i}: {torch.cuda.get_device_name(i)}**") | |
| memory_allocated = torch.cuda.memory_allocated(i) / 1024**3 # GB | |
| memory_reserved = torch.cuda.memory_reserved(i) / 1024**3 # GB | |
| memory_total = torch.cuda.get_device_properties(i).total_memory / 1024**3 # GB | |
| st.write(f" - Total memory: {memory_total:.2f} GB") | |
| st.write(f" - Allocated: {memory_allocated:.2f} GB") | |
| st.write(f" - Reserved: {memory_reserved:.2f} GB") | |
| st.write(f" - Free: {memory_total - memory_reserved:.2f} GB") | |
| else: | |
| st.write("- CUDA available: β No") | |
| st.write("- Running on: CPU only") | |
| with col2: | |
| # Check spaCy GPU configuration | |
| st.write("**SpaCy GPU Configuration:**") | |
| try: | |
| import spacy | |
| # Test GPU preference | |
| gpu_id = spacy.prefer_gpu() | |
| if gpu_id is not False: | |
| st.write(f"- SpaCy GPU: β Enabled (device {gpu_id})") | |
| else: | |
| st.write("- SpaCy GPU: β Disabled") | |
| # Check transformer packages | |
| transformer_status = [] | |
| # Check spacy-transformers | |
| try: | |
| import spacy_transformers | |
| transformer_status.append("spacy-transformers: β Installed") | |
| except ImportError: | |
| transformer_status.append("spacy-transformers: β Not installed") | |
| # Check spacy-curated-transformers | |
| try: | |
| import spacy_curated_transformers | |
| transformer_status.append("spacy-curated-transformers: β Installed") | |
| except ImportError: | |
| transformer_status.append("spacy-curated-transformers: β Not installed") | |
| for status in transformer_status: | |
| st.write(f"- {status}") | |
| except Exception as e: | |
| st.write(f"- SpaCy GPU check failed: {str(e)}") | |
| except ImportError: | |
| st.warning("PyTorch not installed - GPU support unavailable") | |
| st.write("To enable GPU support, install PyTorch with CUDA support") | |
| except Exception as e: | |
| st.error(f"Error checking GPU status: {str(e)}") | |
| # Active model GPU status | |
| st.write("\n**Active Model GPU Status:**") | |
| try: | |
| # Try to get analyzer from session state | |
| analyzer = None | |
| if hasattr(st.session_state, 'analyzer') and st.session_state.analyzer: | |
| analyzer = st.session_state.analyzer | |
| elif hasattr(st.session_state, 'parser') and st.session_state.parser: | |
| analyzer = st.session_state.parser | |
| if analyzer: | |
| model_info = analyzer.get_model_info() | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.write("**Current Model:**") | |
| st.write(f"- Model: {model_info.get('name', 'N/A')}") | |
| st.write(f"- Language: {model_info.get('language', 'N/A')}") | |
| st.write(f"- Size: {model_info.get('model_size', 'N/A')}") | |
| with col2: | |
| st.write("**Device Configuration:**") | |
| st.write(f"- Device: {model_info.get('device', 'N/A')}") | |
| gpu_enabled = model_info.get('gpu_enabled', False) | |
| st.write(f"- GPU Enabled: {'β Yes' if gpu_enabled else 'β No'}") | |
| st.write(f"- SpaCy version: {model_info.get('version', 'N/A')}") | |
| # Show optimization status for transformer models | |
| if model_info.get('model_size') == 'trf' and gpu_enabled: | |
| st.write("\n**GPU Optimizations:**") | |
| st.write("- Mixed precision: β Enabled") | |
| st.write("- Batch size: Optimized for GPU") | |
| st.write("- Memory efficiency: Enhanced") | |
| else: | |
| st.info("No model currently loaded. Load a model to see its GPU configuration.") | |
| except Exception as e: | |
| st.write(f"Could not retrieve active model info: {str(e)}") | |
| # Performance tips | |
| with st.expander("π‘ GPU Performance Tips", expanded=False): | |
| st.write(""" | |
| **Optimization Tips:** | |
| - Transformer models benefit most from GPU acceleration | |
| - Batch processing is automatically optimized when GPU is enabled | |
| - Mixed precision is enabled for transformer models on GPU | |
| - GPU memory is managed automatically with fallback to CPU if needed | |
| **Common Issues:** | |
| - If GPU is not detected, ensure CUDA-compatible PyTorch is installed | |
| - Memory errors: Try smaller batch sizes or use CPU for very large texts | |
| - Performance: GPU shows most benefit with batch processing | |
| """) | |