Spaces:

egumasa
/

simple-text-analyzer

Building

File size: 11,577 Bytes

"""Debug utilities for troubleshooting Huggingface Spaces issues."""

import streamlit as st
import os
import sys
from web_app.utils import MemoryFileHandler

def show_environment_info():
    """Display environment information for debugging."""
    st.write("### Environment Information")
    
    col1, col2 = st.columns(2)
    
    with col1:
        st.write("**System Info:**")
        st.write(f"- Python: {sys.version}")
        st.write(f"- Platform: {sys.platform}")
        st.write(f"- Working Dir: {os.getcwd()}")
        st.write(f"- User: {os.environ.get('USER', 'N/A')}")
        st.write(f"- Home: {os.environ.get('HOME', 'N/A')}")
        
        # Add process info
        st.write("**Process Info:**")
        st.write(f"- UID: {os.getuid()}")
        st.write(f"- GID: {os.getgid()}")
        st.write(f"- PID: {os.getpid()}")
    
    with col2:
        st.write("**Directory Permissions:**")
        dirs_to_check = ['/tmp', '.', './web_app', os.environ.get('HOME', '/')]
        for dir_path in dirs_to_check:
            if os.path.exists(dir_path):
                try:
                    # Check if we can write to the directory
                    test_file = os.path.join(dir_path, '.write_test')
                    with open(test_file, 'w') as f:
                        f.write('test')
                    os.remove(test_file)
                    st.write(f"- {dir_path}: ✅ Writable")
                except:
                    st.write(f"- {dir_path}: ❌ Not writable")
            else:
                st.write(f"- {dir_path}: ⚠️ Not found")
    
    st.write("**Environment Variables:**")
    important_vars = ['STREAMLIT_SERVER_PORT', 'STREAMLIT_SERVER_ADDRESS', 
                     'UV_CACHE_DIR', 'TMPDIR', 'TEMP', 'TMP', 'SPACES', 
                     'SPACE_ID', 'SPACE_HOST']
    for var in important_vars:
        value = os.environ.get(var, 'Not set')
        st.write(f"- {var}: {value}")

def test_file_operations():
    """Test various file operations to identify issues."""
    st.write("### File Operation Tests")
    
    tests = []
    
    # Test 1: StringIO
    try:
        from io import StringIO
        sio = StringIO("test content")
        content = sio.read()
        tests.append(("StringIO operations", "✅ Success", None))
    except Exception as e:
        tests.append(("StringIO operations", "❌ Failed", str(e)))
    
    # Test 2: BytesIO
    try:
        from io import BytesIO
        bio = BytesIO(b"test content")
        content = bio.read()
        tests.append(("BytesIO operations", "✅ Success", None))
    except Exception as e:
        tests.append(("BytesIO operations", "❌ Failed", str(e)))
    
    # Test 3: Session state
    try:
        st.session_state.test_key = "test_value"
        _ = st.session_state.test_key
        del st.session_state.test_key
        tests.append(("Session state operations", "✅ Success", None))
    except Exception as e:
        tests.append(("Session state operations", "❌ Failed", str(e)))
    
    # Display results
    for test_name, status, error in tests:
        col1, col2 = st.columns([3, 1])
        with col1:
            st.write(f"**{test_name}**")
            if error:
                st.write(f"  Error: {error}")
        with col2:
            st.write(status)

def debug_file_upload():
    """Debug file upload functionality."""
    st.write("### File Upload Debug")
    
    uploaded_file = st.file_uploader("Test file upload", type=['txt', 'csv'])
    
    if uploaded_file:
        st.write("**File Info:**")
        st.write(f"- Name: {uploaded_file.name}")
        st.write(f"- Type: {uploaded_file.type}")
        st.write(f"- Size: {uploaded_file.size} bytes")
        
        try:
            # Test direct read methods first
            st.write("**Direct Read Methods:**")
            try:
                uploaded_file.seek(0)
                content = uploaded_file.read()
                st.write(f"- Read method: ✅ Success ({len(content)} bytes)")
            except Exception as e:
                st.write(f"- Read method: ❌ Failed - {str(e)}")
            
            # Try getvalue if available
            try:
                uploaded_file.seek(0)
                value = uploaded_file.getvalue()
                st.write(f"- GetValue method: ✅ Success ({len(value)} bytes)")
            except Exception as e:
                st.write(f"- GetValue method: ❌ Failed - {str(e)}")
            
            # Test memory-based approach
            st.write("\n**Memory-based Approach:**")
            try:
                uploaded_file.seek(0)
                content = MemoryFileHandler.process_uploaded_file(uploaded_file, as_text=False)
                if content:
                    st.write(f"- Process file (binary): ✅ Success ({len(content)} bytes)")
                    
                    # Try text mode
                    uploaded_file.seek(0)
                    text_content = MemoryFileHandler.process_uploaded_file(uploaded_file, as_text=True)
                    if text_content:
                        st.write(f"- Process file (text): ✅ Success ({len(text_content)} chars)")
                    else:
                        st.write("- Process file (text): ❌ Failed")
                else:
                    st.write("- Process file (binary): ❌ Failed")
                    
            except Exception as e:
                st.error(f"Error with memory-based approach: {e}")
                import traceback
                st.code(traceback.format_exc())
            
        except Exception as e:
            st.error(f"Error processing file: {e}")
            import traceback
            st.code(traceback.format_exc())

def show_gpu_status():
    """Display GPU/CUDA status information for debugging."""
    st.write("### GPU Status Information")
    
    # Check PyTorch/CUDA availability
    st.write("**PyTorch/CUDA Status:**")
    try:
        import torch
        
        col1, col2 = st.columns(2)
        
        with col1:
            st.write(f"- PyTorch version: {torch.__version__}")
            
            if torch.cuda.is_available():
                st.write(f"- CUDA available: ✅ Yes")
                st.write(f"- CUDA version: {torch.version.cuda}")
                st.write(f"- Number of GPUs: {torch.cuda.device_count()}")
                
                # Show GPU details
                for i in range(torch.cuda.device_count()):
                    st.write(f"\n**GPU {i}: {torch.cuda.get_device_name(i)}**")
                    memory_allocated = torch.cuda.memory_allocated(i) / 1024**3  # GB
                    memory_reserved = torch.cuda.memory_reserved(i) / 1024**3  # GB
                    memory_total = torch.cuda.get_device_properties(i).total_memory / 1024**3  # GB
                    st.write(f"  - Total memory: {memory_total:.2f} GB")
                    st.write(f"  - Allocated: {memory_allocated:.2f} GB")
                    st.write(f"  - Reserved: {memory_reserved:.2f} GB")
                    st.write(f"  - Free: {memory_total - memory_reserved:.2f} GB")
            else:
                st.write("- CUDA available: ❌ No")
                st.write("- Running on: CPU only")
                
        with col2:
            # Check spaCy GPU configuration
            st.write("**SpaCy GPU Configuration:**")
            try:
                import spacy
                
                # Test GPU preference
                gpu_id = spacy.prefer_gpu()
                if gpu_id is not False:
                    st.write(f"- SpaCy GPU: ✅ Enabled (device {gpu_id})")
                else:
                    st.write("- SpaCy GPU: ❌ Disabled")
                
                # Check transformer packages
                transformer_status = []
                
                # Check spacy-transformers
                try:
                    import spacy_transformers
                    transformer_status.append("spacy-transformers: ✅ Installed")
                except ImportError:
                    transformer_status.append("spacy-transformers: ❌ Not installed")
                
                # Check spacy-curated-transformers
                try:
                    import spacy_curated_transformers
                    transformer_status.append("spacy-curated-transformers: ✅ Installed")
                except ImportError:
                    transformer_status.append("spacy-curated-transformers: ❌ Not installed")
                
                for status in transformer_status:
                    st.write(f"- {status}")
                    
            except Exception as e:
                st.write(f"- SpaCy GPU check failed: {str(e)}")
                
    except ImportError:
        st.warning("PyTorch not installed - GPU support unavailable")
        st.write("To enable GPU support, install PyTorch with CUDA support")
    except Exception as e:
        st.error(f"Error checking GPU status: {str(e)}")
    
    # Active model GPU status
    st.write("\n**Active Model GPU Status:**")
    try:
        # Try to get analyzer from session state
        analyzer = None
        if hasattr(st.session_state, 'analyzer') and st.session_state.analyzer:
            analyzer = st.session_state.analyzer
        elif hasattr(st.session_state, 'parser') and st.session_state.parser:
            analyzer = st.session_state.parser
            
        if analyzer:
            model_info = analyzer.get_model_info()
            col1, col2 = st.columns(2)
            
            with col1:
                st.write("**Current Model:**")
                st.write(f"- Model: {model_info.get('name', 'N/A')}")
                st.write(f"- Language: {model_info.get('language', 'N/A')}")
                st.write(f"- Size: {model_info.get('model_size', 'N/A')}")
                
            with col2:
                st.write("**Device Configuration:**")
                st.write(f"- Device: {model_info.get('device', 'N/A')}")
                gpu_enabled = model_info.get('gpu_enabled', False)
                st.write(f"- GPU Enabled: {'✅ Yes' if gpu_enabled else '❌ No'}")
                st.write(f"- SpaCy version: {model_info.get('version', 'N/A')}")
                
            # Show optimization status for transformer models
            if model_info.get('model_size') == 'trf' and gpu_enabled:
                st.write("\n**GPU Optimizations:**")
                st.write("- Mixed precision: ✅ Enabled")
                st.write("- Batch size: Optimized for GPU")
                st.write("- Memory efficiency: Enhanced")
        else:
            st.info("No model currently loaded. Load a model to see its GPU configuration.")
            
    except Exception as e:
        st.write(f"Could not retrieve active model info: {str(e)}")
    
    # Performance tips
    with st.expander("💡 GPU Performance Tips", expanded=False):
        st.write("""
        **Optimization Tips:**
        - Transformer models benefit most from GPU acceleration
        - Batch processing is automatically optimized when GPU is enabled
        - Mixed precision is enabled for transformer models on GPU
        - GPU memory is managed automatically with fallback to CPU if needed
        
        **Common Issues:**
        - If GPU is not detected, ensure CUDA-compatible PyTorch is installed
        - Memory errors: Try smaller batch sizes or use CPU for very large texts
        - Performance: GPU shows most benefit with batch processing
        """)