Spaces:

egumasa
/

simple-text-analyzer

Building

App Files Files Community

simple-text-analyzer / web_app /debug_utils.py

egumasa

trigger GPU

3f10400 7 months ago

raw

history blame contribute delete

11.6 kB

	"""Debug utilities for troubleshooting Huggingface Spaces issues."""

	import streamlit as st
	import os
	import sys
	from web_app.utils import MemoryFileHandler

	def show_environment_info():
	"""Display environment information for debugging."""
	st.write("### Environment Information")

	col1, col2 = st.columns(2)

	with col1:
	st.write("System Info:")
	st.write(f"- Python: {sys.version}")
	st.write(f"- Platform: {sys.platform}")
	st.write(f"- Working Dir: {os.getcwd()}")
	st.write(f"- User: {os.environ.get('USER', 'N/A')}")
	st.write(f"- Home: {os.environ.get('HOME', 'N/A')}")

	# Add process info
	st.write("Process Info:")
	st.write(f"- UID: {os.getuid()}")
	st.write(f"- GID: {os.getgid()}")
	st.write(f"- PID: {os.getpid()}")

	with col2:
	st.write("Directory Permissions:")
	dirs_to_check = ['/tmp', '.', './web_app', os.environ.get('HOME', '/')]
	for dir_path in dirs_to_check:
	if os.path.exists(dir_path):
	try:
	# Check if we can write to the directory
	test_file = os.path.join(dir_path, '.write_test')
	with open(test_file, 'w') as f:
	f.write('test')
	os.remove(test_file)
	st.write(f"- {dir_path}: ✅ Writable")
	except:
	st.write(f"- {dir_path}: ❌ Not writable")
	else:
	st.write(f"- {dir_path}: ⚠️ Not found")

	st.write("Environment Variables:")
	important_vars = ['STREAMLIT_SERVER_PORT', 'STREAMLIT_SERVER_ADDRESS',
	'UV_CACHE_DIR', 'TMPDIR', 'TEMP', 'TMP', 'SPACES',
	'SPACE_ID', 'SPACE_HOST']
	for var in important_vars:
	value = os.environ.get(var, 'Not set')
	st.write(f"- {var}: {value}")

	def test_file_operations():
	"""Test various file operations to identify issues."""
	st.write("### File Operation Tests")

	tests = []

	# Test 1: StringIO
	try:
	from io import StringIO
	sio = StringIO("test content")
	content = sio.read()
	tests.append(("StringIO operations", "✅ Success", None))
	except Exception as e:
	tests.append(("StringIO operations", "❌ Failed", str(e)))

	# Test 2: BytesIO
	try:
	from io import BytesIO
	bio = BytesIO(b"test content")
	content = bio.read()
	tests.append(("BytesIO operations", "✅ Success", None))
	except Exception as e:
	tests.append(("BytesIO operations", "❌ Failed", str(e)))

	# Test 3: Session state
	try:
	st.session_state.test_key = "test_value"
	_ = st.session_state.test_key
	del st.session_state.test_key
	tests.append(("Session state operations", "✅ Success", None))
	except Exception as e:
	tests.append(("Session state operations", "❌ Failed", str(e)))

	# Display results
	for test_name, status, error in tests:
	col1, col2 = st.columns([3, 1])
	with col1:
	st.write(f"{test_name}")
	if error:
	st.write(f" Error: {error}")
	with col2:
	st.write(status)

	def debug_file_upload():
	"""Debug file upload functionality."""
	st.write("### File Upload Debug")

	uploaded_file = st.file_uploader("Test file upload", type=['txt', 'csv'])

	if uploaded_file:
	st.write("File Info:")
	st.write(f"- Name: {uploaded_file.name}")
	st.write(f"- Type: {uploaded_file.type}")
	st.write(f"- Size: {uploaded_file.size} bytes")

	try:
	# Test direct read methods first
	st.write("Direct Read Methods:")
	try:
	uploaded_file.seek(0)
	content = uploaded_file.read()
	st.write(f"- Read method: ✅ Success ({len(content)} bytes)")
	except Exception as e:
	st.write(f"- Read method: ❌ Failed - {str(e)}")

	# Try getvalue if available
	try:
	uploaded_file.seek(0)
	value = uploaded_file.getvalue()
	st.write(f"- GetValue method: ✅ Success ({len(value)} bytes)")
	except Exception as e:
	st.write(f"- GetValue method: ❌ Failed - {str(e)}")

	# Test memory-based approach
	st.write("\nMemory-based Approach:")
	try:
	uploaded_file.seek(0)
	content = MemoryFileHandler.process_uploaded_file(uploaded_file, as_text=False)
	if content:
	st.write(f"- Process file (binary): ✅ Success ({len(content)} bytes)")

	# Try text mode
	uploaded_file.seek(0)
	text_content = MemoryFileHandler.process_uploaded_file(uploaded_file, as_text=True)
	if text_content:
	st.write(f"- Process file (text): ✅ Success ({len(text_content)} chars)")
	else:
	st.write("- Process file (text): ❌ Failed")
	else:
	st.write("- Process file (binary): ❌ Failed")

	except Exception as e:
	st.error(f"Error with memory-based approach: {e}")
	import traceback
	st.code(traceback.format_exc())

	except Exception as e:
	st.error(f"Error processing file: {e}")
	import traceback
	st.code(traceback.format_exc())

	def show_gpu_status():
	"""Display GPU/CUDA status information for debugging."""
	st.write("### GPU Status Information")

	# Check PyTorch/CUDA availability
	st.write("PyTorch/CUDA Status:")
	try:
	import torch

	col1, col2 = st.columns(2)

	with col1:
	st.write(f"- PyTorch version: {torch.__version__}")

	if torch.cuda.is_available():
	st.write(f"- CUDA available: ✅ Yes")
	st.write(f"- CUDA version: {torch.version.cuda}")
	st.write(f"- Number of GPUs: {torch.cuda.device_count()}")

	# Show GPU details
	for i in range(torch.cuda.device_count()):
	st.write(f"\nGPU {i}: {torch.cuda.get_device_name(i)}")
	memory_allocated = torch.cuda.memory_allocated(i) / 1024**3 # GB
	memory_reserved = torch.cuda.memory_reserved(i) / 1024**3 # GB
	memory_total = torch.cuda.get_device_properties(i).total_memory / 1024**3 # GB
	st.write(f" - Total memory: {memory_total:.2f} GB")
	st.write(f" - Allocated: {memory_allocated:.2f} GB")
	st.write(f" - Reserved: {memory_reserved:.2f} GB")
	st.write(f" - Free: {memory_total - memory_reserved:.2f} GB")
	else:
	st.write("- CUDA available: ❌ No")
	st.write("- Running on: CPU only")

	with col2:
	# Check spaCy GPU configuration
	st.write("SpaCy GPU Configuration:")
	try:
	import spacy

	# Test GPU preference
	gpu_id = spacy.prefer_gpu()
	if gpu_id is not False:
	st.write(f"- SpaCy GPU: ✅ Enabled (device {gpu_id})")
	else:
	st.write("- SpaCy GPU: ❌ Disabled")

	# Check transformer packages
	transformer_status = []

	# Check spacy-transformers
	try:
	import spacy_transformers
	transformer_status.append("spacy-transformers: ✅ Installed")
	except ImportError:
	transformer_status.append("spacy-transformers: ❌ Not installed")

	# Check spacy-curated-transformers
	try:
	import spacy_curated_transformers
	transformer_status.append("spacy-curated-transformers: ✅ Installed")
	except ImportError:
	transformer_status.append("spacy-curated-transformers: ❌ Not installed")

	for status in transformer_status:
	st.write(f"- {status}")

	except Exception as e:
	st.write(f"- SpaCy GPU check failed: {str(e)}")

	except ImportError:
	st.warning("PyTorch not installed - GPU support unavailable")
	st.write("To enable GPU support, install PyTorch with CUDA support")
	except Exception as e:
	st.error(f"Error checking GPU status: {str(e)}")

	# Active model GPU status
	st.write("\nActive Model GPU Status:")
	try:
	# Try to get analyzer from session state
	analyzer = None
	if hasattr(st.session_state, 'analyzer') and st.session_state.analyzer:
	analyzer = st.session_state.analyzer
	elif hasattr(st.session_state, 'parser') and st.session_state.parser:
	analyzer = st.session_state.parser

	if analyzer:
	model_info = analyzer.get_model_info()
	col1, col2 = st.columns(2)

	with col1:
	st.write("Current Model:")
	st.write(f"- Model: {model_info.get('name', 'N/A')}")
	st.write(f"- Language: {model_info.get('language', 'N/A')}")
	st.write(f"- Size: {model_info.get('model_size', 'N/A')}")

	with col2:
	st.write("Device Configuration:")
	st.write(f"- Device: {model_info.get('device', 'N/A')}")
	gpu_enabled = model_info.get('gpu_enabled', False)
	st.write(f"- GPU Enabled: {'✅ Yes' if gpu_enabled else '❌ No'}")
	st.write(f"- SpaCy version: {model_info.get('version', 'N/A')}")

	# Show optimization status for transformer models
	if model_info.get('model_size') == 'trf' and gpu_enabled:
	st.write("\nGPU Optimizations:")
	st.write("- Mixed precision: ✅ Enabled")
	st.write("- Batch size: Optimized for GPU")
	st.write("- Memory efficiency: Enhanced")
	else:
	st.info("No model currently loaded. Load a model to see its GPU configuration.")

	except Exception as e:
	st.write(f"Could not retrieve active model info: {str(e)}")

	# Performance tips
	with st.expander("💡 GPU Performance Tips", expanded=False):
	st.write("""
	Optimization Tips:
	- Transformer models benefit most from GPU acceleration
	- Batch processing is automatically optimized when GPU is enabled
	- Mixed precision is enabled for transformer models on GPU
	- GPU memory is managed automatically with fallback to CPU if needed

	Common Issues:
	- If GPU is not detected, ensure CUDA-compatible PyTorch is installed
	- Memory errors: Try smaller batch sizes or use CPU for very large texts
	- Performance: GPU shows most benefit with batch processing
	""")