l / app.py
Princess3's picture
Upload app.py
b2fbd32 verified
#!/usr/bin/env python3
"""
NZ Legislation Loophole Analysis - Hugging Face Spaces App
Root-level app.py for Hugging Face Spaces deployment.
Adapted for Spaces memory constraints and session-based caching.
"""
import streamlit as st
import sys
import os
import warnings
from pathlib import Path
# Add current directory to Python path for imports
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
# Suppress warnings for cleaner output
warnings.filterwarnings('ignore')
# Import core modules with error handling for Spaces
try:
from streamlit_app.core.cache_manager import CacheManager, get_cache_manager
from streamlit_app.core.text_processor import TextProcessor
from streamlit_app.core.llm_analyzer import LLMAnalyzer
from streamlit_app.core.dataset_builder import DatasetBuilder
from streamlit_app.utils.config import ConfigManager
from streamlit_app.utils.performance import PerformanceMonitor
from streamlit_app.utils.ui_helpers import UIHelpers
except ImportError as e:
st.error(f"❌ Import Error: {e}")
st.error("Please ensure all required packages are installed.")
st.stop()
# Configure page settings for Spaces
st.set_page_config(
page_title="NZ Legislation Loophole Analyzer",
page_icon="βš–οΈ",
layout="wide",
initial_sidebar_state="expanded",
menu_items={
'Get Help': 'https://huggingface.co/spaces/your-space',
'Report a bug': 'https://github.com/your-repo/issues',
'About': '''
## NZ Legislation Loophole Analyzer
AI-powered analysis of New Zealand legislation to identify
potential loopholes, ambiguities, and unintended consequences.
**Version:** 1.0.0 (Spaces Edition)
**Platform:** Hugging Face Spaces
**Built with:** Streamlit & Llama.cpp
'''
}
)
# Spaces-specific configuration
SPACES_CONFIG = {
'max_memory_mb': 512, # Conservative memory limit for Spaces
'cache_enabled': True,
'persistent_cache': False, # Session-based only
'model_path': 'qwen3.gguf', # Default model
'context_length': 4096, # Smaller context for memory
'max_tokens': 2048, # Smaller responses
}
def initialize_spaces_session():
"""Initialize session state optimized for Spaces"""
if 'cache_manager' not in st.session_state:
# Initialize with Spaces-optimized settings
st.session_state.cache_manager = CacheManager(
max_memory_mb=SPACES_CONFIG['max_memory_mb'],
persistent=False, # No persistent storage in Spaces
ttl_hours=1 # Shorter TTL for memory efficiency
)
if 'config_manager' not in st.session_state:
st.session_state.config_manager = ConfigManager()
# Override with Spaces-optimized defaults
spaces_defaults = {
'model': {
'path': SPACES_CONFIG['model_path'],
'context_length': SPACES_CONFIG['context_length'],
'max_tokens': SPACES_CONFIG['max_tokens'],
'temperature': 0.3,
'top_p': 0.85,
},
'cache': {
'enabled': SPACES_CONFIG['cache_enabled'],
'max_size_mb': SPACES_CONFIG['max_memory_mb'],
'persistent': False,
'ttl_hours': 1,
},
'processing': {
'chunk_size': 2048, # Smaller chunks for memory
'chunk_overlap': 128,
'batch_size': 4, # Smaller batch size
'clean_text': True,
}
}
# Update configuration with Spaces defaults
st.session_state.config_manager.update_config(spaces_defaults)
if 'performance_monitor' not in st.session_state:
st.session_state.performance_monitor = PerformanceMonitor(max_history=100)
if 'text_processor' not in st.session_state:
st.session_state.text_processor = TextProcessor()
if 'current_analysis' not in st.session_state:
st.session_state.current_analysis = None
if 'analysis_results' not in st.session_state:
st.session_state.analysis_results = []
if 'processing_status' not in st.session_state:
st.session_state.processing_status = {
'is_running': False,
'progress': 0,
'current_task': '',
'total_chunks': 0,
'processed_chunks': 0
}
if 'model_loaded' not in st.session_state:
st.session_state.model_loaded = False
if 'llm_analyzer' not in st.session_state:
st.session_state.llm_analyzer = None
def show_spaces_optimized_home_page():
"""Home page optimized for Spaces"""
st.title("🏠 NZ Legislation Loophole Analyzer")
st.markdown("### AI-Powered Legal Analysis (Spaces Edition)")
# Spaces-specific warnings and info
with st.expander("⚠️ Spaces Environment Notes", expanded=False):
st.info("""
**Running on Hugging Face Spaces**
- Memory optimized for cloud deployment
- Session-based caching (resets between visits)
- Use smaller models for best performance
- Analysis results persist during your session
""")
col1, col2 = st.columns([2, 1])
with col1:
st.markdown("""
This AI-powered tool analyzes New Zealand legislation to identify:
πŸ” **Potential Loopholes** - Legal ambiguities that could be exploited
πŸ“‹ **Unintended Consequences** - Hidden implications in legislative language
βš–οΈ **Ambiguities** - Vague or unclear legal provisions
🎯 **Circumvention Strategies** - Ways legislation might be bypassed
**Key Features:**
- **Smart Caching**: Avoid re-processing identical content during your session
- **Memory Optimized**: Designed for Spaces memory constraints
- **Real-time Progress**: Live processing status and performance metrics
- **Export Options**: Download results in multiple formats
""")
st.markdown("### Quick Start")
st.markdown("""
1. **Upload** your NZ legislation files (JSON lines or raw text)
2. **Configure** analysis parameters (use smaller models for Spaces)
3. **Process** the legislation with AI-powered analysis
4. **Review** results with interactive visualizations
5. **Export** findings before your session ends
""")
with col2:
st.markdown("### Current Configuration")
config = st.session_state.config_manager.get_config()
# Model settings
st.subheader("πŸ€– Model Settings")
st.info(f"**Model:** {config['model']['path']}")
st.info(f"**Context Length:** {config['model']['context_length']}")
st.info(f"**Max Tokens:** {config['model']['max_tokens']}")
# Processing settings
st.subheader("βš™οΈ Processing")
st.info(f"**Chunk Size:** {config['processing']['chunk_size']}")
st.info(f"**Overlap:** {config['processing']['chunk_overlap']}")
st.info(f"**Batch Size:** {config['processing']['batch_size']}")
# Cache settings
st.subheader("🧠 Cache")
cache_stats = st.session_state.cache_manager.get_stats()
st.info(f"**Status:** {'Active' if cache_stats['enabled'] else 'Disabled'}")
st.info(f"**Max Memory:** {SPACES_CONFIG['max_memory_mb']}MB")
st.info(f"**Hit Rate:** {cache_stats['hit_rate']:.1f}%")
# Memory warning
perf_stats = st.session_state.performance_monitor.get_stats()
memory_usage = perf_stats['memory_usage_mb']
if memory_usage > SPACES_CONFIG['max_memory_mb'] * 0.8:
st.warning(f"⚠️ High Memory Usage: {memory_usage:.1f}MB")
else:
st.success(f"βœ… Memory Usage: {memory_usage:.1f}MB")
if st.button("πŸš€ Start Analysis", type="primary", use_container_width=True):
st.switch_page("pages/1_upload.py")
def show_spaces_optimized_upload_page():
"""Upload page optimized for Spaces"""
st.title("πŸ“€ Upload & Process Legislation")
# Memory warning for Spaces
with st.expander("πŸ’‘ Spaces Optimization Tips", expanded=False):
st.info("""
**For Best Performance on Spaces:**
- Use smaller models (0.8B-1.5B parameters)
- Process files individually for large documents
- Keep chunk sizes under 2048 characters
- Monitor memory usage in the sidebar
""")
# File upload section
st.subheader("πŸ“ Upload Legislation Files")
col1, col2 = st.columns([1, 1])
with col1:
uploaded_files = st.file_uploader(
"Select NZ legislation files",
accept_multiple_files=True,
type=['json', 'txt', 'jsonl'],
help="Upload JSON lines format (.jsonl), JSON arrays (.json), or raw text (.txt) files",
key="spaces_file_uploader"
)
if uploaded_files:
st.success(f"πŸ“„ {len(uploaded_files)} file(s) selected")
# Show file details with size warnings
for file in uploaded_files:
with st.expander(f"πŸ“‹ {file.name}"):
size_mb = file.size / (1024 * 1024)
if size_mb > 10: # Warning for large files
st.warning(".1f")
else:
st.info(".1f")
st.write(f"**Type:** {file.type}")
# Preview content
if file.type in ['text/plain', 'application/json']:
content = file.read().decode('utf-8')
preview_length = min(300, len(content))
st.text_area("Preview", content[:preview_length] + "..." if len(content) > preview_length else content,
height=100, disabled=True)
file.seek(0) # Reset file pointer
with col2:
# Processing configuration optimized for Spaces
st.subheader("βš™οΈ Processing Configuration")
config = st.session_state.config_manager.get_config()
# Model settings with Spaces warnings
with st.expander("πŸ€– Model Configuration", expanded=True):
st.info("πŸ’‘ Use smaller models (0.8B-1.5B) for best Spaces performance")
model_path = st.text_input(
"Model Path",
value=config['model']['path'],
help="Path to your GGUF model file (use small models for Spaces)"
)
context_length = st.slider(
"Context Length",
min_value=1024,
max_value=8192, # Reduced max for Spaces
value=min(config['model']['context_length'], 4096),
step=512,
help="Maximum context length for the model"
)
max_tokens = st.slider(
"Max Response Tokens",
min_value=256,
max_value=4096,
value=min(config['model']['max_tokens'], 2048),
step=128,
help="Maximum tokens in model response"
)
# Text processing settings
with st.expander("πŸ“ Text Processing", expanded=True):
chunk_size = st.slider(
"Chunk Size",
min_value=512,
max_value=4096, # Reduced for Spaces memory
value=min(config['processing']['chunk_size'], 2048),
step=256,
help="Size of text chunks for processing"
)
chunk_overlap = st.slider(
"Chunk Overlap",
min_value=32,
max_value=512,
value=config['processing']['chunk_overlap'],
step=32,
help="Overlap between chunks for context preservation"
)
# Analysis settings
with st.expander("πŸ” Analysis Settings", expanded=True):
analysis_depth = st.select_slider(
"Analysis Depth",
options=["Basic", "Standard", "Detailed"], # Removed comprehensive for memory
value=config['analysis']['depth'],
help="Level of detail in legal analysis (use Standard for Spaces)"
)
include_recommendations = st.checkbox(
"Include Recommendations",
value=config['analysis']['include_recommendations'],
help="Generate specific recommendations for addressing identified issues"
)
# Process button and status
col1, col2, col3 = st.columns([1, 1, 1])
with col1:
if st.button("πŸ”„ Start Processing", type="primary", use_container_width=True):
if not uploaded_files:
st.error("Please upload at least one legislation file")
else:
start_spaces_processing(uploaded_files, {
'model': {
'path': model_path,
'context_length': context_length,
'max_tokens': max_tokens
},
'processing': {
'chunk_size': chunk_size,
'chunk_overlap': chunk_overlap
},
'analysis': {
'depth': analysis_depth,
'include_recommendations': include_recommendations
}
})
with col2:
if st.button("⏹️ Stop Processing", use_container_width=True):
stop_processing()
with col3:
if st.button("πŸ“Š View Results", use_container_width=True):
st.switch_page("pages/2_analysis.py")
def start_spaces_processing(files, config):
"""Start processing optimized for Spaces"""
# Check memory before starting
perf_stats = st.session_state.performance_monitor.get_stats()
if perf_stats['memory_usage_mb'] > SPACES_CONFIG['max_memory_mb'] * 0.9:
st.warning("⚠️ High memory usage detected. Consider clearing cache first.")
if st.button("Clear Cache and Continue"):
st.session_state.cache_manager.clear_cache()
st.rerun()
return
st.session_state.processing_status = {
'is_running': True,
'progress': 0,
'current_task': 'Initializing...',
'total_chunks': 0,
'processed_chunks': 0
}
# Update configuration
st.session_state.config_manager.update_config(config)
# Add memory warning
st.info("πŸ’‘ Processing on Spaces - this may take longer than local execution")
st.rerun()
def stop_processing():
"""Stop the current processing"""
st.session_state.processing_status['is_running'] = False
st.session_state.processing_status['current_task'] = 'Stopped by user'
def show_spaces_optimized_results_page():
"""Results page optimized for Spaces"""
st.title("πŸ“Š Analysis Results")
# Session warning for Spaces
with st.expander("πŸ’Ύ Session-Based Storage", expanded=False):
st.warning("""
**Important:** Results are stored in your session only.
- Download results before closing your browser
- Cache resets between visits
- Consider using smaller models for faster processing
""")
if not st.session_state.analysis_results:
st.info("No analysis results available. Please upload and process legislation files first.")
return
# Results overview
st.subheader("πŸ“ˆ Results Overview")
col1, col2, col3, col4 = st.columns(4)
total_results = len(st.session_state.analysis_results)
total_loopholes = sum(len(result.get('loopholes', [])) for result in st.session_state.analysis_results)
avg_confidence = sum(result.get('confidence', 0) for result in st.session_state.analysis_results) / max(total_results, 1)
with col1:
st.metric("Total Analyses", total_results)
with col2:
st.metric("Loopholes Found", total_loopholes)
with col3:
st.metric("Avg Confidence", ".2f")
with col4:
cache_stats = st.session_state.cache_manager.get_stats()
st.metric("Cache Hit Rate", ".1f")
# Results display
st.subheader("πŸ” Detailed Results")
for i, result in enumerate(st.session_state.analysis_results):
with st.expander(f"πŸ“‹ Analysis {i+1}: {result.get('title', 'Unknown Title')}", expanded=i==0):
col1, col2 = st.columns([2, 1])
with col1:
st.markdown("**Summary:**")
st.write(result.get('summary', 'No summary available'))
st.markdown("**Key Findings:**")
for finding in result.get('loopholes', []):
st.markdown(f"- {finding}")
with col2:
st.metric("Confidence", ".2f")
st.metric("Processing Time", ".2f")
st.metric("Chunks Processed", result.get('chunks_processed', 0))
# Export options with Spaces warning
st.subheader("πŸ’Ύ Export Results")
col1, col2, col3 = st.columns(3)
with col1:
if st.button("πŸ“„ Export as JSON", use_container_width=True):
export_results('json')
with col2:
if st.button("πŸ“Š Export as CSV", use_container_width=True):
export_results('csv')
with col3:
if st.button("πŸ“‹ Export as Excel", use_container_width=True):
export_results('excel')
def export_results(format_type):
"""Export analysis results in specified format"""
# TODO: Implement export functionality
st.success(f"Results exported as {format_type.upper()} - download will be available in the next version")
def show_spaces_optimized_settings_page():
"""Settings page optimized for Spaces"""
st.title("βš™οΈ Settings & Configuration")
# Spaces-specific info
with st.expander("🌐 Spaces Environment", expanded=False):
st.info("""
**Spaces-Specific Settings:**
- Memory limit: 512MB cache (conservative)
- Session-based storage only
- No persistent data between visits
- Optimized for cloud performance
""")
tabs = st.tabs(["πŸ€– Model Settings", "πŸ“ Processing", "🧠 Cache", "πŸ“Š Performance"])
with tabs[0]:
st.subheader("πŸ€– Model Configuration")
config = st.session_state.config_manager.get_config()
st.info("πŸ’‘ For Spaces: Use smaller models (0.8B-1.5B parameters) for best performance")
model_path = st.text_input(
"Model Path",
value=config['model']['path'],
help="Path to your GGUF model file (smaller models recommended)"
)
context_length = st.slider(
"Context Length",
min_value=1024,
max_value=8192,
value=config['model']['context_length'],
step=512,
help="Maximum context length (smaller = faster processing)"
)
max_tokens = st.slider(
"Max Response Tokens",
min_value=256,
max_value=4096,
value=config['model']['max_tokens'],
step=128,
help="Maximum tokens in response (smaller = faster)"
)
temperature = st.slider(
"Temperature",
min_value=0.0,
max_value=1.0,
value=config['model']['temperature'],
step=0.1,
help="Controls randomness (lower = more consistent)"
)
with tabs[1]:
st.subheader("πŸ“ Text Processing")
chunk_size = st.slider(
"Chunk Size",
min_value=512,
max_value=4096,
value=config['processing']['chunk_size'],
step=256,
help="Text chunk size (smaller = more memory efficient)"
)
chunk_overlap = st.slider(
"Chunk Overlap",
min_value=32,
max_value=512,
value=config['processing']['chunk_overlap'],
step=32,
help="Overlap between chunks for context"
)
batch_size = st.slider(
"Batch Size",
min_value=1,
max_value=8, # Reduced for Spaces
value=config['processing']['batch_size'],
step=1,
help="Number of chunks to process at once (lower = less memory)"
)
with tabs[2]:
st.subheader("🧠 Cache Configuration")
enable_cache = st.checkbox(
"Enable Caching",
value=config['cache']['enabled'],
help="Use cache to avoid re-processing (recommended)"
)
st.info(f"πŸ’‘ Max cache size: {SPACES_CONFIG['max_memory_mb']}MB (fixed for Spaces)")
cache_ttl = st.slider(
"Cache TTL (hours)",
min_value=0.5,
max_value=2.0,
value=config['cache']['ttl_hours'],
step=0.5,
help="How long to keep cached results (shorter = less memory)"
)
with tabs[3]:
st.subheader("πŸ“Š Performance Monitoring")
perf_stats = st.session_state.performance_monitor.get_stats()
col1, col2, col3 = st.columns(3)
with col1:
st.metric("Memory Usage", ".1f", "MB")
with col2:
st.metric("Cache Hit Rate", ".1f", "%")
with col3:
st.metric("Active Threads", perf_stats.get('active_threads', 0))
# Performance recommendations
recommendations = st.session_state.performance_monitor.get_recommendations()
if recommendations:
st.subheader("πŸ’‘ Recommendations")
for rec in recommendations:
if "High" in rec or "Low" in rec:
st.warning(rec)
else:
st.info(rec)
# Save settings
col1, col2 = st.columns([1, 1])
with col1:
if st.button("πŸ’Ύ Save Settings", type="primary", use_container_width=True):
new_config = {
'model': {
'path': model_path,
'context_length': context_length,
'max_tokens': max_tokens,
'temperature': temperature
},
'processing': {
'chunk_size': chunk_size,
'chunk_overlap': chunk_overlap,
'batch_size': batch_size
},
'cache': {
'enabled': enable_cache,
'ttl_hours': cache_ttl
}
}
st.session_state.config_manager.update_config(new_config)
st.success("Settings saved successfully!")
with col2:
if st.button("πŸ”„ Reset to Defaults", use_container_width=True):
st.session_state.config_manager.reset_to_defaults()
st.success("Settings reset to defaults!")
st.rerun()
def show_spaces_optimized_performance_page():
"""Performance page optimized for Spaces"""
st.title("πŸ“ˆ Performance Dashboard")
# Spaces-specific info
with st.expander("🌐 Spaces Performance Notes", expanded=False):
st.info("""
**Spaces Environment:**
- Memory limit: ~2-8GB shared
- Cache: Session-based only
- Performance: Optimized for cloud
- Monitoring: Real-time metrics
""")
# Real-time metrics
st.subheader("πŸ“Š Real-time Metrics")
col1, col2, col3, col4 = st.columns(4)
perf_stats = st.session_state.performance_monitor.get_stats()
with col1:
st.metric("Memory Usage", ".1f", "MB")
with col2:
st.metric("Memory %", ".1f", "%")
with col3:
st.metric("CPU Usage", ".1f", "%")
with col4:
cache_stats = st.session_state.cache_manager.get_stats()
st.metric("Cache Hit Rate", ".1f", "%")
# Memory warning for Spaces
memory_percent = perf_stats.get('memory_percent', 0)
if memory_percent > 80:
st.error("⚠️ High memory usage - consider clearing cache")
elif memory_percent > 60:
st.warning("⚠️ Moderate memory usage")
else:
st.success("βœ… Memory usage within limits")
# Cache performance
st.subheader("🧠 Cache Performance")
cache_stats = st.session_state.cache_manager.get_stats()
col1, col2, col3, col4 = st.columns(4)
with col1:
st.metric("Total Requests", cache_stats['hits'] + cache_stats['misses'])
with col2:
st.metric("Cache Hits", cache_stats['hits'])
with col3:
st.metric("Cache Misses", cache_stats['misses'])
with col4:
st.metric("Hit Rate", ".1f")
# Performance recommendations
st.subheader("πŸ’‘ Performance Recommendations")
recommendations = st.session_state.performance_monitor.get_recommendations()
if recommendations:
for rec in recommendations:
if "High" in rec or "Low" in rec:
st.error(rec)
elif "Moderate" in rec or "Consider" in rec:
st.warning(rec)
else:
st.info(rec)
else:
st.success("βœ… Performance is optimal!")
# Cache management
st.subheader("🧠 Cache Management")
col1, col2 = st.columns(2)
with col1:
if st.button("πŸ”„ Clear Cache", type="secondary", use_container_width=True):
st.session_state.cache_manager.clear_cache()
st.success("Cache cleared successfully!")
st.rerun()
with col2:
if st.button("πŸ“Š Reset Statistics", use_container_width=True):
st.session_state.performance_monitor.reset_stats()
st.success("Statistics reset!")
st.rerun()
def main():
"""Main application function for Spaces"""
# Initialize session state
initialize_spaces_session()
# Create sidebar with navigation and status
with st.sidebar:
st.title("βš–οΈ NZ Legislation Analyzer")
st.markdown("---")
st.markdown("**Spaces Edition**")
st.markdown("---")
# Navigation
pages = {
"🏠 Home": "home",
"πŸ“€ Upload & Process": "upload",
"πŸ“Š Analysis Results": "results",
"βš™οΈ Settings": "settings",
"πŸ“ˆ Performance": "performance"
}
selected_page = st.selectbox(
"Navigate to:",
list(pages.keys()),
key="nav_select"
)
st.markdown("---")
# Cache status
with st.expander("🧠 Cache Status", expanded=True):
cache_stats = st.session_state.cache_manager.get_stats()
st.metric("Cache Hits", cache_stats['hits'])
st.metric("Cache Misses", cache_stats['misses'])
st.metric("Hit Rate", ".1f")
st.metric("Cached Chunks", cache_stats['entries'])
if st.button("Clear Cache", type="secondary"):
st.session_state.cache_manager.clear_cache()
st.rerun()
# Performance metrics
with st.expander("πŸ“Š Performance", expanded=True):
perf_stats = st.session_state.performance_monitor.get_stats()
st.metric("Memory Usage", ".1f")
st.metric("CPU Usage", ".1f")
# Processing status
if st.session_state.processing_status['is_running']:
with st.expander("πŸ”„ Processing Status", expanded=True):
st.progress(st.session_state.processing_status['progress'])
st.text(st.session_state.processing_status['current_task'])
st.text(f"Chunk {st.session_state.processing_status['processed_chunks']}/"
f"{st.session_state.processing_status['total_chunks']}")
# Main content area
page = pages[selected_page]
if page == "home":
show_spaces_optimized_home_page()
elif page == "upload":
show_spaces_optimized_upload_page()
elif page == "results":
show_spaces_optimized_results_page()
elif page == "settings":
show_spaces_optimized_settings_page()
elif page == "performance":
show_spaces_optimized_performance_page()
# Footer with Spaces branding
st.markdown("---")
st.markdown(
"""
<div style='text-align: center; color: #666; font-size: 12px;'>
NZ Legislation Loophole Analyzer v1.0.0 (Spaces Edition) |
Built with Streamlit & Llama.cpp | Hosted on πŸ€— Hugging Face Spaces
</div>
""",
unsafe_allow_html=True
)
if __name__ == "__main__":
main()