Spaces:

Really-amin
/

hoghoghi2

Runtime error

App Files Files Community

Really-amin commited on Aug 7, 2025

Commit

ccbebf0

verified ·

1 Parent(s): 5ccfb05

Rename src/streamlit_app.py to streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py → streamlit_app.py +369 -990

src/streamlit_app.py → streamlit_app.py RENAMED Viewed

@@ -1,8 +1,9 @@
 #!/usr/bin/env python3
 """
-Iran Legal Information Dashboard
-================================
-Complete Working System for Legal Document Management, OCR, AI Analysis, and Web Scraping
 """
 import streamlit as st
@@ -19,6 +20,7 @@ import logging
 import time
 import re
 import asyncio
 from datetime import datetime, timedelta
 from typing import Dict, List, Optional, Any, Tuple
 from urllib.parse import urlparse, urljoin
@@ -40,7 +42,7 @@ st.set_page_config(
     initial_sidebar_state="expanded"
 )
-# Advanced CSS for beautiful UI
 def load_css():
     st.markdown("""
     <style>
@@ -98,6 +100,37 @@ def load_css():
         margin: 0;
     }
     /* Card Styles */
     .metric-card {
         background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
@@ -112,21 +145,6 @@ def load_css():
         overflow: hidden;
     }
-    .metric-card::before {
-        content: '';
-        position: absolute;
-        top: 0;
-        left: -100%;
-        width: 100%;
-        height: 100%;
-        background: linear-gradient(90deg, transparent, rgba(255,255,255,0.2), transparent);
-        transition: left 0.5s;
-    }
-    .metric-card:hover::before {
-        left: 100%;
-    }
     .metric-card:hover {
         transform: translateY(-10px) scale(1.02);
         box-shadow: 0 20px 50px rgba(102, 126, 234, 0.4);
@@ -175,232 +193,12 @@ def load_css():
         border-radius: 20px 20px 0 0;
     }
-    /* Status Indicators */
-    .status-indicator {
-        display: inline-flex;
-        align-items: center;
-        padding: 0.25rem 0.75rem;
-        border-radius: 20px;
-        font-size: 0.85rem;
-        font-weight: 500;
-        margin: 0.25rem;
-    }
-    .status-success {
-        background: linear-gradient(135deg, #11998e, #38ef7d);
-        color: white;
-    }
-    .status-warning {
-        background: linear-gradient(135deg, #f093fb, #f5576c);
-        color: white;
-    }
-    .status-info {
-        background: linear-gradient(135deg, #4facfe, #00f2fe);
-        color: white;
-    }
-    .status-error {
-        background: linear-gradient(135deg, #ff416c, #ff4b2b);
-        color: white;
-    }
-    /* Upload Area */
-    .upload-area {
-        border: 3px dashed #667eea;
-        border-radius: 20px;
-        padding: 3rem;
-        text-align: center;
-        background: linear-gradient(135deg, rgba(102, 126, 234, 0.05), rgba(118, 75, 162, 0.05));
-        margin: 2rem 0;
-        transition: all 0.3s ease;
-        position: relative;
-        overflow: hidden;
-    }
-    .upload-area:hover {
-        border-color: #764ba2;
-        background: linear-gradient(135deg, rgba(102, 126, 234, 0.1), rgba(118, 75, 162, 0.1));
-        transform: scale(1.02);
-    }
-    .upload-area::before {
-        content: '📁';
-        font-size: 4rem;
-        display: block;
-        margin-bottom: 1rem;
-        animation: bounce 2s infinite;
-    }
-    @keyframes bounce {
-        0%, 20%, 50%, 80%, 100% { transform: translateY(0); }
-        40% { transform: translateY(-10px); }
-        60% { transform: translateY(-5px); }
-    }
-    /* Buttons */
-    .stButton > button {
-        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-        color: white;
-        border: none;
-        border-radius: 12px;
-        padding: 0.75rem 2rem;
-        font-weight: 600;
-        font-size: 1rem;
-        font-family: 'Vazir', sans-serif;
-        transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
-        box-shadow: 0 5px 15px rgba(102, 126, 234, 0.3);
-        position: relative;
-        overflow: hidden;
-    }
-    .stButton > button:hover {
-        transform: translateY(-3px);
-        box-shadow: 0 10px 25px rgba(102, 126, 234, 0.4);
-    }
-    .stButton > button:active {
-        transform: translateY(-1px);
-    }
-    /* Sidebar */
-    .css-1d391kg {
-        background: linear-gradient(180deg, #667eea 0%, #764ba2 100%);
-    }
-    .sidebar .sidebar-content {
-        background: linear-gradient(180deg, #667eea 0%, #764ba2 100%);
-        color: white;
-    }
-    /* Data Display */
-    .data-row {
-        background: rgba(255, 255, 255, 0.9);
-        backdrop-filter: blur(10px);
-        border-radius: 12px;
-        padding: 1rem;
-        margin: 0.5rem 0;
-        border: 1px solid rgba(102, 126, 234, 0.1);
-        transition: all 0.3s ease;
-    }
-    .data-row:hover {
-        background: rgba(255, 255, 255, 1);
-        transform: translateX(-5px);
-        box-shadow: 0 5px 20px rgba(0, 0, 0, 0.1);
-    }
-    /* Progress Bars */
-    .progress-container {
-        background: rgba(255, 255, 255, 0.2);
-        border-radius: 10px;
-        padding: 4px;
-        margin: 1rem 0;
-        box-shadow: inset 0 2px 4px rgba(0, 0, 0, 0.1);
-    }
-    .progress-bar {
-        background: linear-gradient(90deg, #667eea, #764ba2);
-        height: 20px;
-        border-radius: 8px;
-        transition: width 0.5s cubic-bezier(0.4, 0, 0.2, 1);
-        position: relative;
-        overflow: hidden;
-    }
-    .progress-bar::after {
-        content: '';
-        position: absolute;
-        top: 0;
-        left: -100%;
-        width: 100%;
-        height: 100%;
-        background: linear-gradient(90deg, transparent, rgba(255,255,255,0.3), transparent);
-        animation: loading 1.5s infinite;
-    }
-    @keyframes loading {
-        0% { left: -100%; }
-        100% { left: 100%; }
-    }
-    /* Chart Containers */
-    .chart-container {
-        background: rgba(255, 255, 255, 0.95);
-        backdrop-filter: blur(10px);
-        border-radius: 20px;
-        padding: 1.5rem;
-        margin: 1rem 0;
-        box-shadow: 0 10px 30px rgba(0, 0, 0, 0.1);
-        border: 1px solid rgba(255, 255, 255, 0.2);
-    }
-    /* Search Box */
-    .stTextInput > div > div > input {
-        background: rgba(255, 255, 255, 0.9);
-        border: 2px solid #667eea;
-        border-radius: 12px;
-        padding: 0.75rem;
-        font-family: 'Vazir', sans-serif;
-        transition: all 0.3s ease;
-    }
-    .stTextInput > div > div > input:focus {
-        border-color: #764ba2;
-        box-shadow: 0 0 20px rgba(102, 126, 234, 0.3);
-        transform: scale(1.02);
-    }
-    /* Selectbox */
-    .stSelectbox > div > div > select {
-        background: rgba(255, 255, 255, 0.9);
-        border: 2px solid #667eea;
-        border-radius: 12px;
-        font-family: 'Vazir', sans-serif;
-    }
     /* Hide Streamlit elements */
     #MainMenu { visibility: hidden; }
     footer { visibility: hidden; }
     header { visibility: hidden; }
     .stDeployButton { display: none; }
-    /* Custom scrollbar */
-    ::-webkit-scrollbar {
-        width: 8px;
-    }
-    ::-webkit-scrollbar-track {
-        background: #f1f1f1;
-        border-radius: 10px;
-    }
-    ::-webkit-scrollbar-thumb {
-        background: linear-gradient(135deg, #667eea, #764ba2);
-        border-radius: 10px;
-    }
-    ::-webkit-scrollbar-thumb:hover {
-        background: linear-gradient(135deg, #764ba2, #667eea);
-    }
-    /* Loading Animation */
-    .loading-spinner {
-        border: 4px solid #f3f3f3;
-        border-radius: 50%;
-        border-top: 4px solid #667eea;
-        width: 40px;
-        height: 40px;
-        animation: spin 1s linear infinite;
-        margin: 20px auto;
-    }
-    @keyframes spin {
-        0% { transform: rotate(0deg); }
-        100% { transform: rotate(360deg); }
-    }
     /* Responsive Design */
     @media (max-width: 768px) {
         .main-header h1 { font-size: 1.8rem; }
@@ -411,64 +209,258 @@ def load_css():
     </style>
     """, unsafe_allow_html=True)
-# Database Manager Class
 class DatabaseManager:
-    def __init__(self, db_path: str = "iran_legal.db"):
-        self.db_path = db_path
         self.initialize_database()
     def initialize_database(self):
         """Initialize the database with required tables"""
         try:
-            with sqlite3.connect(self.db_path) as conn:
                 conn.execute("PRAGMA foreign_keys = ON")
-                # Documents table
-                conn.execute("""
-                    CREATE TABLE IF NOT EXISTS documents (
-                        id INTEGER PRIMARY KEY AUTOINCREMENT,
-                        title TEXT NOT NULL,
-                        content TEXT NOT NULL,
-                        source TEXT,
-                        category TEXT,
-                        ai_score REAL DEFAULT 0.0,
-                        keywords TEXT,
-                        created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-                        file_size INTEGER DEFAULT 0,
-                        language TEXT DEFAULT 'fa'
-                    )
-                """)
-                # Scraped items table
-                conn.execute("""
-                    CREATE TABLE IF NOT EXISTS scraped_items (
-                        id TEXT PRIMARY KEY,
-                        url TEXT NOT NULL,
-                        title TEXT,
-                        content TEXT,
-                        domain TEXT,
-                        rating_score REAL DEFAULT 0.0,
-                        word_count INTEGER DEFAULT 0,
-                        created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-                        status TEXT DEFAULT 'completed'
-                    )
-                """)
-                conn.commit()
-                logger.info("Database initialized successfully")
         except Exception as e:
-            logger.error(f"Database initialization failed: {e}")
             raise
     @contextmanager
     def get_connection(self):
         """Get database connection with proper error handling"""
-        conn = sqlite3.connect(self.db_path)
-        conn.row_factory = sqlite3.Row
         try:
             yield conn
         finally:
-            conn.close()
     def add_document(self, doc_data: Dict) -> int:
         """Add a new document to the database"""
@@ -490,7 +482,7 @@ class DatabaseManager:
                 conn.commit()
                 return doc_id
         except Exception as e:
-            logger.error(f"Error adding document: {e}")
             return 0
     def add_scraped_item(self, item_data: Dict) -> bool:
@@ -514,7 +506,7 @@ class DatabaseManager:
                 conn.commit()
                 return True
         except Exception as e:
-            logger.error(f"Error adding scraped item: {e}")
             return False
     def get_documents(self, limit: int = 100) -> List[Dict]:
@@ -528,7 +520,7 @@ class DatabaseManager:
                 """, (limit,))
                 return [dict(row) for row in cursor.fetchall()]
         except Exception as e:
-            logger.error(f"Error getting documents: {e}")
             return []
     def get_scraped_items(self, limit: int = 100) -> List[Dict]:
@@ -542,7 +534,7 @@ class DatabaseManager:
                 """, (limit,))
                 return [dict(row) for row in cursor.fetchall()]
         except Exception as e:
-            logger.error(f"Error getting scraped items: {e}")
             return []
     def search_content(self, query: str, limit: int = 50) -> List[Dict]:
@@ -571,7 +563,7 @@ class DatabaseManager:
                 results.extend([dict(row) for row in cursor.fetchall()])
         except Exception as e:
-            logger.error(f"Error searching content: {e}")
         return sorted(results, key=lambda x: x.get('score', 0), reverse=True)[:limit]
@@ -615,11 +607,11 @@ class DatabaseManager:
                 stats['categories'] = dict(cursor.fetchall())
         except Exception as e:
-            logger.error(f"Error getting statistics: {e}")
         return stats
-# AI Analysis Engine
 class AIAnalysisEngine:
     def __init__(self):
         self.legal_keywords = {
@@ -737,7 +729,7 @@ class AIAnalysisEngine:
         else:
             return 'unknown'
-# Web Scraping Service
 class WebScrapingService:
     def __init__(self):
         self.session = requests.Session()
@@ -787,14 +779,8 @@ class WebScrapingService:
         """Extract main content from soup object"""
         # Try different content selectors
         content_selectors = [
-            'article',
-            '.content',
-            '.main-content',
-            '#content',
-            '.post-content',
-            '.entry-content',
-            'main',
-            '.container'
         ]
         content = ""
@@ -812,7 +798,7 @@ class WebScrapingService:
         return content
-# Rating Service
 class RatingService:
     def __init__(self):
         self.trusted_domains = {
@@ -869,45 +855,6 @@ class RatingService:
         return min(score, 1.0)
-# File processing utilities
-def process_pdf_file(uploaded_file) -> Dict:
-    """Process uploaded PDF file and extract text"""
-    try:
-        # Save file temporarily
-        with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
-            tmp_file.write(uploaded_file.read())
-            tmp_file_path = tmp_file.name
-        # Try to extract text using PyMuPDF if available
-        try:
-            import fitz
-            doc = fitz.open(tmp_file_path)
-            text = ""
-            for page in doc:
-                text += page.get_text()
-            doc.close()
-        except ImportError:
-            # Fallback: simple text extraction
-            text = f"محتوای فایل {uploaded_file.name} (OCR در حال توسعه)"
-        # Clean up
-        os.unlink(tmp_file_path)
-        return {
-            'success': True,
-            'text': text,
-            'file_size': uploaded_file.size,
-            'filename': uploaded_file.name
-        }
-    except Exception as e:
-        logger.error(f"Error processing PDF: {e}")
-        return {
-            'success': False,
-            'text': '',
-            'error': str(e)
-        }
 # UI Helper Functions
 def show_status_message(message: str, status_type: str = "info"):
     """Show styled status message"""
@@ -924,15 +871,80 @@ def create_metric_card(title: str, value: str, subtitle: str = ""):
     </div>
     """
-# Initialize services
 @st.cache_resource
 def initialize_services():
-    """Initialize all services"""
-    db_manager = DatabaseManager()
-    ai_engine = AIAnalysisEngine()
-    scraping_service = WebScrapingService()
-    rating_service = RatingService()
-    return db_manager, ai_engine, scraping_service, rating_service
 # Main Application
 def main():
@@ -949,6 +961,12 @@ def main():
     # Initialize services
     db_manager, ai_engine, scraping_service, rating_service = initialize_services()
     # Sidebar navigation
     st.sidebar.markdown("### 📋 منوی اصلی")
@@ -957,7 +975,8 @@ def main():
         "🌐 اسکرپینگ وب": "scraping",
         "📄 مدیریت اسناد": "documents",
         "🔍 جستجو و تحلیل": "search",
-        "📊 گزارشات و آمار": "reports"
     }
     selected_page = st.sidebar.selectbox("انتخاب صفحه:", list(pages.keys()))
@@ -966,18 +985,12 @@ def main():
     # Route to appropriate page
     if page_key == "dashboard":
         show_dashboard(db_manager)
-    elif page_key == "scraping":
-        show_scraping_page(db_manager, scraping_service, ai_engine, rating_service)
-    elif page_key == "documents":
-        show_documents_page(db_manager, ai_engine)
-    elif page_key == "search":
-        show_search_page(db_manager)
-    elif page_key == "reports":
-        show_reports_page(db_manager)
 def show_dashboard(db_manager: DatabaseManager):
     """Display main dashboard"""
     # Get statistics
     stats = db_manager.get_statistics()
@@ -1012,648 +1025,14 @@ def show_dashboard(db_manager: DatabaseManager):
             "میانگین کیفیت"
         ), unsafe_allow_html=True)
     st.markdown("---")
-    # Charts section
-    col1, col2 = st.columns(2)
-    with col1:
-        st.markdown('<div class="chart-container">', unsafe_allow_html=True)
-        st.subheader("📊 توزیع دسته‌بندی اسناد")
-        if stats['categories']:
-            df_categories = pd.DataFrame(
-                list(stats['categories'].items()),
-                columns=['دسته‌بندی', 'تعداد']
-            )
-            fig = px.pie(df_categories, values='تعداد', names='دسته‌بندی',
-                        title="توزیع اسناد بر اساس دسته‌بندی")
-            fig.update_traces(textposition='inside', textinfo='percent+label')
-            st.plotly_chart(fig, use_container_width=True)
-        else:
-            st.info("هنوز اسنادی دسته‌بندی نشده است")
-        st.markdown('</div>', unsafe_allow_html=True)
-    with col2:
-        st.markdown('<div class="chart-container">', unsafe_allow_html=True)
-        st.subheader("📈 آمار عملکرد")
-        # Create performance chart
-        performance_data = {
-            'معیار': ['اسناد', 'محتوای وب', 'کیفیت AI', 'رتبه‌بندی'],
-            'مقدار': [
-                min(stats['total_documents'], 100),
-                min(stats['total_scraped'], 100),
-                stats['avg_ai_score'] * 100,
-                stats['avg_rating'] * 100
-            ]
-        }
-        df_performance = pd.DataFrame(performance_data)
-        fig = px.bar(df_performance, x='معیار', y='مقدار',
-                    title="نمودار عملکرد سیستم")
-        fig.update_layout(yaxis_title="مقدار", xaxis_title="معیارها")
-        st.plotly_chart(fig, use_container_width=True)
-        st.markdown('</div>', unsafe_allow_html=True)
-    # Recent activity
-    st.markdown("### 📋 فعالیت‌های اخیر")
-    col1, col2 = st.columns(2)
-    with col1:
-        st.markdown('<div class="feature-card">', unsafe_allow_html=True)
-        st.markdown("#### 📄 آخرین اسناد")
-        recent_docs = db_manager.get_documents(limit=5)
-        if recent_docs:
-            for doc in recent_docs:
-                st.markdown(f"""
-                <div class="data-row">
-                    <strong>{doc['title'][:50]}...</strong><br>
-                    <small>دسته‌بندی: {doc.get('category', 'نامشخص')} |
-                    امتیاز: {doc.get('ai_score', 0):.2f}</small>
-                </div>
-                """, unsafe_allow_html=True)
-        else:
-            st.info("هنوز اسنادی ثبت نشده است")
-        st.markdown('</div>', unsafe_allow_html=True)
-    with col2:
-        st.markdown('<div class="feature-card">', unsafe_allow_html=True)
-        st.markdown("#### 🌐 آخرین محتوای اسکرپ شده")
-        recent_scraped = db_manager.get_scraped_items(limit=5)
-        if recent_scraped:
-            for item in recent_scraped:
-                st.markdown(f"""
-                <div class="data-row">
-                    <strong>{item['title'][:50]}...</strong><br>
-                    <small>دامنه: {item.get('domain', 'نامشخص')} |
-                    رتبه: {item.get('rating_score', 0):.2f}</small>
-                </div>
-                """, unsafe_allow_html=True)
-        else:
-            st.info("هنوز محتوایی اسکرپ نشده است")
-        st.markdown('</div>', unsafe_allow_html=True)
-def show_scraping_page(db_manager: DatabaseManager, scraping_service: WebScrapingService,
-                      ai_engine: AIAnalysisEngine, rating_service: RatingService):
-    """Display web scraping page"""
-    st.markdown("## 🌐 اسکرپینگ محتوای حقوقی")
-    # Configuration section
-    st.markdown('<div class="feature-card">', unsafe_allow_html=True)
-    st.markdown("### ⚙️ تنظیمات اسکرپینگ")
-    col1, col2 = st.columns(2)
-    with col1:
-        st.markdown("#### 🎯 آدرس‌های هدف")
-        urls_text = st.text_area(
-            "آدرس‌های وب‌سایت (هر خط یک آدرس):",
-            value="https://dastour.ir\nhttps://mizanonline.ir/news\nhttps://judiciary.ir/news",
-            height=120
-        )
-        max_pages = st.slider("حداکثر تعداد URL:", 1, 20, 5)
-    with col2:
-        st.markdown("#### 📋 سایت‌های پیشنهادی")
-        st.markdown("""
-        **منابع معتبر حقوقی:**
-        - 📜 [دستور - قوانین ایران](https://dastour.ir)
-        - ⚖️ [میزان آنلاین](https://mizanonline.ir)
-        - 🏛️ [قوه قضاییه](https://judiciary.ir)
-        - 🏛️ [مجلس شورای اسلامی](https://majlis.ir)
-        - 📚 [مرکز پژوهش‌های مجلس](https://rc.majlis.ir)
-        """)
-    st.markdown('</div>', unsafe_allow_html=True)
-    # Scraping action
-    if st.button("🚀 شروع اسکرپینگ", type="primary"):
-        urls = [url.strip() for url in urls_text.split('\n') if url.strip()]
-        if not urls:
-            show_status_message("لطفاً حداقل یک آدرس وارد کنید", "error")
-            return
-        # Limit URLs
-        urls = urls[:max_pages]
-        # Progress tracking
-        progress_container = st.container()
-        status_container = st.container()
-        results_container = st.container()
-        with progress_container:
-            progress_bar = st.progress(0)
-            status_text = st.empty()
-        successful_items = []
-        failed_urls = []
-        # Process each URL
-        for i, url in enumerate(urls):
-            with status_container:
-                status_text.text(f"در حال پردازش: {url}")
-            # Scrape URL
-            scraped_data = scraping_service.scrape_url(url)
-            if scraped_data:
-                # Rate content
-                rating_score = rating_service.rate_content(scraped_data)
-                # Prepare item data
-                item_id = hashlib.md5(f"{url}_{datetime.now()}".encode()).hexdigest()[:16]
-                item_data = {
-                    'id': item_id,
-                    'url': url,
-                    'title': scraped_data['title'],
-                    'content': scraped_data['content'][:2000],  # Limit content length
-                    'domain': scraped_data['domain'],
-                    'rating_score': rating_score,
-                    'word_count': scraped_data['word_count'],
-                    'status': 'completed'
-                }
-                # Save to database
-                if db_manager.add_scraped_item(item_data):
-                    successful_items.append(item_data)
-                else:
-                    failed_urls.append(url)
-            else:
-                failed_urls.append(url)
-            # Update progress
-            progress = (i + 1) / len(urls)
-            progress_bar.progress(progress)
-            # Small delay to prevent overwhelming servers
-            time.sleep(1)
-        # Show results
-        with status_container:
-            status_text.text("اسکرپینگ کامل شد!")
-        with results_container:
-            st.markdown("### 📊 نتایج اسکرپینگ")
-            if successful_items:
-                show_status_message(f"✅ {len(successful_items)} آیتم با موفقیت جمع‌آوری شد", "success")
-                # Display results
-                for item in successful_items:
-                    rating_color = "🟢" if item['rating_score'] > 0.7 else "🟡" if item['rating_score'] > 0.4 else "🔴"
-                    with st.expander(f"{rating_color} {item['title']} (رتبه: {item['rating_score']:.2f})"):
-                        col1, col2 = st.columns(2)
-                        with col1:
-                            st.write(f"**آدرس:** {item['url']}")
-                            st.write(f"**دامنه:** {item['domain']}")
-                        with col2:
-                            st.write(f"**تعداد کلمات:** {item['word_count']}")
-                            st.write(f"**وضعیت:** {item['status']}")
-                        st.markdown("**پیش‌نمایش محتوا:**")
-                        st.text_area("", value=item['content'][:300] + "...", height=100, disabled=True, key=f"content_{item['id']}")
-            if failed_urls:
-                show_status_message(f"❌ {len(failed_urls)} آدرس ناموفق بود", "error")
-                for url in failed_urls:
-                    st.write(f"- {url}")
-    # Show scraped history
-    st.markdown("---")
-    st.markdown("### 📚 تاریخچه اسکرپینگ")
-    scraped_items = db_manager.get_scraped_items(limit=20)
-    if scraped_items:
-        for item in scraped_items:
-            rating_color = "🟢" if item.get('rating_score', 0) > 0.7 else "🟡" if item.get('rating_score', 0) > 0.4 else "🔴"
-            with st.expander(f"{rating_color} {item.get('title', 'بدون عنوان')} - رتبه: {item.get('rating_score', 0):.2f}"):
-                col1, col2 = st.columns(2)
-                with col1:
-                    st.write(f"**آدرس:** {item.get('url', '')}")
-                    st.write(f"**دامنه:** {item.get('domain', '')}")
-                with col2:
-                    st.write(f"**تاریخ:** {item.get('created_at', '')[:16]}")
-                    st.write(f"**تعداد کلمات:** {item.get('word_count', 0)}")
-                if item.get('content'):
-                    st.text_area("محتوا:", value=item['content'][:200] + "...", height=80, disabled=True, key=f"hist_{item['id']}")
-    else:
-        st.info("هنوز آیتمی اسکرپ نشده است")
-def show_documents_page(db_manager: DatabaseManager, ai_engine: AIAnalysisEngine):
-    """Display documents management page"""
-    st.markdown("## 📄 مدیریت اسناد")
-    # File upload section
-    st.markdown('<div class="feature-card">', unsafe_allow_html=True)
-    st.markdown("### 📤 آپلود سند جدید")
-    uploaded_file = st.file_uploader(
-        "فایل PDF انتخاب کنید:",
-        type=['pdf', 'txt'],
-        help="فایل‌های PDF و متنی پشتیبانی می‌شوند"
-    )
-    if uploaded_file:
-        col1, col2 = st.columns(2)
-        with col1:
-            source = st.text_input("منبع سند:", value="آپلود کاربر")
-        with col2:
-            manual_category = st.selectbox(
-                "دسته‌بندی دستی:",
-                ["خودکار", "قانون", "قرارداد", "حکم", "اداری", "عمومی"]
-            )
-        if st.button("📄 پردازش سند", type="primary"):
-            with st.spinner("در حال پردازش..."):
-                # Process file based on type
-                if uploaded_file.type == "application/pdf":
-                    result = process_pdf_file(uploaded_file)
-                else:
-                    # Text file
-                    result = {
-                        'success': True,
-                        'text': str(uploaded_file.read(), 'utf-8'),
-                        'file_size': uploaded_file.size,
-                        'filename': uploaded_file.name
-                    }
-                if result['success']:
-                    # Analyze with AI
-                    analysis = ai_engine.analyze_text(result['text'], uploaded_file.name)
-                    # Prepare document data
-                    doc_data = {
-                        'title': result['filename'].replace('.pdf', '').replace('.txt', ''),
-                        'content': result['text'],
-                        'source': source,
-                        'category': analysis['category'] if manual_category == "خودکار" else manual_category,
-                        'ai_score': analysis['ai_score'],
-                        'keywords': analysis['keywords'],
-                        'file_size': result['file_size']
-                    }
-                    # Save to database
-                    doc_id = db_manager.add_document(doc_data)
-                    if doc_id:
-                        show_status_message(f"✅ سند با موفقیت ثبت شد (شناسه: {doc_id})", "success")
-                        # Show analysis results
-                        st.markdown("#### 📊 نتایج تحلیل:")
-                        col1, col2, col3 = st.columns(3)
-                        with col1:
-                            st.metric("امتیاز کیفیت", f"{analysis['ai_score']:.2f}")
-                        with col2:
-                            st.metric("تعداد کلمات", analysis['word_count'])
-                        with col3:
-                            st.metric("دسته‌بندی", analysis['category'])
-                        if analysis['keywords']:
-                            st.markdown("**کلمات کلیدی:** " + ", ".join(analysis['keywords'][:10]))
-                    else:
-                        show_status_message("خطا در ثبت سند", "error")
-                else:
-                    show_status_message(f"خطا در پردازش فایل: {result.get('error', 'نامشخص')}", "error")
-    st.markdown('</div>', unsafe_allow_html=True)
-    # Documents list
-    st.markdown("### 📚 اسناد موجود")
-    documents = db_manager.get_documents(limit=50)
-    if documents:
-        # Filters
-        col1, col2 = st.columns(2)
-        with col1:
-            categories = list(set([doc.get('category', 'نامشخص') for doc in documents]))
-            selected_category = st.selectbox("فیلتر دسته‌بندی:", ["همه"] + categories)
-        with col2:
-            sort_options = ["جدیدترین", "قدیمی‌ترین", "بالاترین امتیاز", "بیشترین کلمات"]
-            sort_by = st.selectbox("مرتب‌سازی:", sort_options)
-        # Apply filters
-        filtered_docs = documents
-        if selected_category != "همه":
-            filtered_docs = [doc for doc in documents if doc.get('category') == selected_category]
-        # Apply sorting
-        if sort_by == "قدیمی‌ترین":
-            filtered_docs = sorted(filtered_docs, key=lambda x: x.get('created_at', ''))
-        elif sort_by == "بالاترین امتیاز":
-            filtered_docs = sorted(filtered_docs, key=lambda x: x.get('ai_score', 0), reverse=True)
-        elif sort_by == "بیشترین کلمات":
-            filtered_docs = sorted(filtered_docs, key=lambda x: len(x.get('content', '').split()), reverse=True)
-        # Display documents
-        for doc in filtered_docs[:20]:
-            score_color = "🟢" if doc.get('ai_score', 0) > 0.7 else "🟡" if doc.get('ai_score', 0) > 0.4 else "🔴"
-            with st.expander(f"{score_color} {doc['title']} (امتیاز: {doc.get('ai_score', 0):.2f})"):
-                col1, col2 = st.columns(2)
-                with col1:
-                    st.write(f"**شناسه:** {doc['id']}")
-                    st.write(f"**دسته‌بندی:** {doc.get('category', 'نامشخص')}")
-                    st.write(f"**منبع:** {doc.get('source', 'نامشخص')}")
-                with col2:
-                    st.write(f"**تاریخ:** {doc.get('created_at', '')[:16]}")
-                    st.write(f"**اندازه فایل:** {doc.get('file_size', 0)} بایت")
-                    st.write(f"**تعداد کلمات:** {len(doc.get('content', '').split())}")
-                # Show keywords
-                try:
-                    keywords = json.loads(doc.get('keywords', '[]'))
-                    if keywords:
-                        st.write("**کلمات کلیدی:** " + ", ".join(keywords[:8]))
-                except:
-                    pass
-                # Content preview
-                content_preview = doc.get('content', '')[:300] + "..."
-                st.text_area("پیش‌نمایش:", value=content_preview, height=100, disabled=True, key=f"doc_{doc['id']}")
     else:
-        st.info("هنوز اسنادی ثبت نشده است")
-def show_search_page(db_manager: DatabaseManager):
-    """Display search page"""
-    st.markdown("## 🔍 جستجو و تحلیل")
-    # Search interface
-    st.markdown('<div class="feature-card">', unsafe_allow_html=True)
-    st.markdown("### 🔎 جستجوی محتوا")
-    col1, col2 = st.columns([3, 1])
-    with col1:
-        search_query = st.text_input(
-            "عبارت جستجو:",
-            placeholder="کلمات کلیدی خود را وارد کنید...",
-            help="در عنوان و محتوای اسناد و آیتم‌های اسکرپ شده جستجو می‌شود"
-        )
-    with col2:
-        max_results = st.selectbox("حداکثر نتایج:", [10, 20, 50, 100], index=1)
-    st.markdown('</div>', unsafe_allow_html=True)
-    # Search execution
-    if search_query and len(search_query.strip()) > 2:
-        with st.spinner("در حال جستجو..."):
-            search_results = db_manager.search_content(search_query, limit=max_results)
-        if search_results:
-            show_status_message(f"✅ {len(search_results)} نتیجه یافت شد", "success")
-            # Results summary
-            doc_results = [r for r in search_results if r['type'] == 'document']
-            scraped_results = [r for r in search_results if r['type'] == 'scraped']
-            col1, col2 = st.columns(2)
-            with col1:
-                st.metric("نتایج از اسناد", len(doc_results))
-            with col2:
-                st.metric("نتایج از محتوای وب", len(scraped_results))
-            st.markdown("---")
-            # Display results
-            for i, result in enumerate(search_results, 1):
-                score = result.get('score', 0)
-                score_color = "🟢" if score > 0.7 else "🟡" if score > 0.4 else "🔴"
-                result_type = "📄" if result['type'] == 'document' else "🌐"
-                with st.expander(f"{i}. {result_type} {score_color} {result['title']} (امتیاز: {score:.2f})"):
-                    col1, col2 = st.columns(2)
-                    with col1:
-                        st.write(f"**نوع:** {'سند' if result['type'] == 'document' else 'محتوای وب'}")
-                        st.write(f"**شناسه:** {result['id']}")
-                        st.write(f"**امتیاز:** {score:.3f}")
-                    with col2:
-                        st.write(f"**تاریخ:** {result.get('created_at', '')[:16]}")
-                        words_count = len(result.get('content', '').split())
-                        st.write(f"**تعداد کلمات:** {words_count}")
-                    # Highlight search terms in content
-                    content = result.get('content', '')[:500]
-                    if search_query.lower() in content.lower():
-                        # Simple highlighting
-                        highlighted_content = content.replace(
-                            search_query,
-                            f"**{search_query}**"
-                        )
-                        st.markdown("**محتوا:**")
-                        st.markdown(highlighted_content + "...")
-                    else:
-                        st.text_area("محتوا:", value=content + "...", height=100, disabled=True, key=f"search_{result['id']}_{i}")
-        else:
-            show_status_message("هیچ نتیجه‌ای یافت نشد. کلمات دیگری امتحان کنید.", "error")
-    elif search_query and len(search_query.strip()) <= 2:
-        show_status_message("لطفاً حداقل 3 کاراکتر وارد کنید", "warning")
-def show_reports_page(db_manager: DatabaseManager):
-    """Display reports and analytics page"""
-    st.markdown("## 📊 گزارشات و آمار")
-    # Get comprehensive statistics
-    stats = db_manager.get_statistics()
-    documents = db_manager.get_documents(limit=1000)
-    scraped_items = db_manager.get_scraped_items(limit=1000)
-    # Overview metrics
-    st.markdown("### 📈 آمار کلی سیستم")
-    col1, col2, col3, col4 = st.columns(4)
-    with col1:
-        st.metric(
-            "کل اسناد",
-            stats['total_documents'],
-            delta=f"+{len([d for d in documents if d.get('created_at', '')[:10] == datetime.now().strftime('%Y-%m-%d')])}" if documents else "0"
-        )
-    with col2:
-        st.metric(
-            "محتوای وب",
-            stats['total_scraped'],
-            delta=f"+{len([s for s in scraped_items if s.get('created_at', '')[:10] == datetime.now().strftime('%Y-%m-%d')])}" if scraped_items else "0"
-        )
-    with col3:
-        high_quality_docs = len([d for d in documents if d.get('ai_score', 0) > 0.8])
-        quality_percentage = (high_quality_docs / max(len(documents), 1)) * 100
-        st.metric(
-            "اسناد با کیفیت بالا",
-            high_quality_docs,
-            delta=f"{quality_percentage:.1f}%"
-        )
-    with col4:
-        high_rating_scraped = len([s for s in scraped_items if s.get('rating_score', 0) > 0.8])
-        rating_percentage = (high_rating_scraped / max(len(scraped_items), 1)) * 100
-        st.metric(
-            "محتوای با رتبه بالا",
-            high_rating_scraped,
-            delta=f"{rating_percentage:.1f}%"
-        )
-    st.markdown("---")
-    # Detailed charts
-    col1, col2 = st.columns(2)
-    with col1:
-        st.markdown('<div class="chart-container">', unsafe_allow_html=True)
-        st.markdown("#### 📊 توزیع امتیازات AI")
-        if documents:
-            ai_scores = [doc.get('ai_score', 0) for doc in documents]
-            score_ranges = {
-                'عالی (0.8-1.0)': len([s for s in ai_scores if s >= 0.8]),
-                'خوب (0.6-0.8)': len([s for s in ai_scores if 0.6 <= s < 0.8]),
-                'متوسط (0.4-0.6)': len([s for s in ai_scores if 0.4 <= s < 0.6]),
-                'ضعیف (0.0-0.4)': len([s for s in ai_scores if s < 0.4])
-            }
-            df_scores = pd.DataFrame(
-                list(score_ranges.items()),
-                columns=['محدوده', 'تعداد']
-            )
-            fig = px.bar(df_scores, x='محدوده', y='تعداد',
-                        title="توزیع کیفیت اسناد",
-                        color='تعداد',
-                        color_continuous_scale='viridis')
-            st.plotly_chart(fig, use_container_width=True)
-        else:
-            st.info("داده‌ای برای نمایش وجود ندارد")
-        st.markdown('</div>', unsafe_allow_html=True)
-    with col2:
-        st.markdown('<div class="chart-container">', unsafe_allow_html=True)
-        st.markdown("#### 🌐 توزیع رتبه‌بندی محتوای وب")
-        if scraped_items:
-            rating_scores = [item.get('rating_score', 0) for item in scraped_items]
-            rating_ranges = {
-                'عالی (0.8-1.0)': len([s for s in rating_scores if s >= 0.8]),
-                'خوب (0.6-0.8)': len([s for s in rating_scores if 0.6 <= s < 0.8]),
-                'متوسط (0.4-0.6)': len([s for s in rating_scores if 0.4 <= s < 0.6]),
-                'ضعیف (0.0-0.4)': len([s for s in rating_scores if s < 0.4])
-            }
-            df_ratings = pd.DataFrame(
-                list(rating_ranges.items()),
-                columns=['محدوده', 'تعداد']
-            )
-            fig = px.pie(df_ratings, values='تعداد', names='محدوده',
-                        title="توزیع کیفیت محتوای وب")
-            fig.update_traces(textposition='inside', textinfo='percent+label')
-            st.plotly_chart(fig, use_container_width=True)
-        else:
-            st.info("داده‌ای برای نمایش وجود ندارد")
-        st.markdown('</div>', unsafe_allow_html=True)
-    # Performance table
-    st.markdown("### 📋 جدول عملکرد")
-    performance_data = []
-    if documents:
-        categories = {}
-        for doc in documents:
-            cat = doc.get('category', 'نامشخص')
-            if cat not in categories:
-                categories[cat] = {'count': 0, 'total_score': 0}
-            categories[cat]['count'] += 1
-            categories[cat]['total_score'] += doc.get('ai_score', 0)
-        for cat, data in categories.items():
-            avg_score = data['total_score'] / data['count'] if data['count'] > 0 else 0
-            performance_data.append({
-                'دسته‌بندی': cat,
-                'تعداد اسناد': data['count'],
-                'میانگین امتیاز AI': f"{avg_score:.3f}",
-                'نوع': 'سند'
-            })
-    if scraped_items:
-        domains = {}
-        for item in scraped_items:
-            domain = item.get('domain', 'نامشخص')
-            if domain not in domains:
-                domains[domain] = {'count': 0, 'total_rating': 0}
-            domains[domain]['count'] += 1
-            domains[domain]['total_rating'] += item.get('rating_score', 0)
-        for domain, data in domains.items():
-            avg_rating = data['total_rating'] / data['count'] if data['count'] > 0 else 0
-            performance_data.append({
-                'دسته‌بندی': domain,
-                'تعداد اسناد': data['count'],
-                'میانگین امتیاز AI': f"{avg_rating:.3f}",
-                'نوع': 'محتوای وب'
-            })
-    if performance_data:
-        df_performance = pd.DataFrame(performance_data)
-        st.dataframe(df_performance, use_container_width=True)
-    # Export options
-    st.markdown("---")
-    st.markdown("### 📥 گزینه‌های صادرات")
-    col1, col2, col3 = st.columns(3)
-    with col1:
-        if st.button("📊 تولید گزارش CSV"):
-            if documents or scraped_items:
-                show_status_message("گزارش CSV آماده شد", "success")
-            else:
-                show_status_message("داده‌ای برای صادرات وجود ندارد", "warning")
-    with col2:
-        if st.button("📈 گزارش تفصیلی"):
-            show_status_message("گزارش تفصیلی در حال آماده‌سازی", "info")
-    with col3:
-        if st.button("🧹 پاکسازی داده‌ها"):
-            show_status_message("عملیات پاکسازی طراحی شده است", "warning")
 # Run the application
 if __name__ == "__main__":

 #!/usr/bin/env python3
 """
+Iran Legal Information Dashboard - Enhanced Version
+==================================================
+Complete Working System with Robust Database Management, OCR, AI Analysis, and Web Scraping
+Designed for Hugging Face Spaces deployment with enhanced error handling
 """
 import streamlit as st
 import time
 import re
 import asyncio
+import sys
 from datetime import datetime, timedelta
 from typing import Dict, List, Optional, Any, Tuple
 from urllib.parse import urlparse, urljoin
     initial_sidebar_state="expanded"
 )
+# Advanced CSS for beautiful UI (same as before)
 def load_css():
     st.markdown("""
     <style>
         margin: 0;
     }
+    /* Status Indicators */
+    .status-indicator {
+        display: inline-flex;
+        align-items: center;
+        padding: 0.25rem 0.75rem;
+        border-radius: 20px;
+        font-size: 0.85rem;
+        font-weight: 500;
+        margin: 0.25rem;
+    }
+    .status-success {
+        background: linear-gradient(135deg, #11998e, #38ef7d);
+        color: white;
+    }
+    .status-warning {
+        background: linear-gradient(135deg, #f093fb, #f5576c);
+        color: white;
+    }
+    .status-info {
+        background: linear-gradient(135deg, #4facfe, #00f2fe);
+        color: white;
+    }
+    .status-error {
+        background: linear-gradient(135deg, #ff416c, #ff4b2b);
+        color: white;
+    }
     /* Card Styles */
     .metric-card {
         background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
         overflow: hidden;
     }
     .metric-card:hover {
         transform: translateY(-10px) scale(1.02);
         box-shadow: 0 20px 50px rgba(102, 126, 234, 0.4);
         border-radius: 20px 20px 0 0;
     }
     /* Hide Streamlit elements */
     #MainMenu { visibility: hidden; }
     footer { visibility: hidden; }
     header { visibility: hidden; }
     .stDeployButton { display: none; }
     /* Responsive Design */
     @media (max-width: 768px) {
         .main-header h1 { font-size: 1.8rem; }
     </style>
     """, unsafe_allow_html=True)
+# Enhanced Database Manager Class with Robust Error Handling
 class DatabaseManager:
+    def __init__(self, db_path: str = None):
+        """
+        Initialize DatabaseManager with robust error handling
+        Args:
+            db_path (str, optional): Custom database path. If None, uses auto-detection.
+        """
+        # Set up logging
+        self.logger = logging.getLogger(__name__)
+        # Set database path with fallbacks
+        if db_path:
+            self.db_path = db_path
+        else:
+            # Try multiple fallback locations
+            possible_paths = [
+                "./data/iran_legal.db",          # Preferred location
+                "/tmp/iran_legal.db",            # Temp directory (for cloud/container environments)
+                os.path.expanduser("~/iran_legal.db"),  # User home directory
+                "./iran_legal.db"                # Current directory
+            ]
+            self.db_path = self._find_writable_path(possible_paths)
+        self.logger.info(f"Using database path: {self.db_path}")
         self.initialize_database()
+    def _find_writable_path(self, paths):
+        """
+        Find the first writable path from a list of potential paths
+        Args:
+            paths (list): List of potential database paths
+        Returns:
+            str: First writable path found
+        """
+        for path in paths:
+            try:
+                # Create directory if it doesn't exist
+                directory = os.path.dirname(path)
+                if directory and not os.path.exists(directory):
+                    os.makedirs(directory, exist_ok=True)
+                # Test if we can write to this location
+                test_file = path + ".test"
+                with open(test_file, 'w') as f:
+                    f.write("test")
+                os.remove(test_file)
+                self.logger.info(f"Found writable path: {path}")
+                return path
+            except (OSError, PermissionError) as e:
+                self.logger.warning(f"Cannot write to {path}: {e}")
+                continue
+        # If no writable path found, default to current directory
+        default_path = "./iran_legal.db"
+        self.logger.warning(f"No writable path found, using default: {default_path}")
+        return default_path
     def initialize_database(self):
         """Initialize the database with required tables"""
         try:
+            # Ensure the directory exists
+            directory = os.path.dirname(self.db_path)
+            if directory and not os.path.exists(directory):
+                os.makedirs(directory, exist_ok=True)
+                self.logger.info(f"Created directory: {directory}")
+            # Test database connection
+            with sqlite3.connect(self.db_path, timeout=10.0) as conn:
+                # Enable WAL mode for better concurrency
+                conn.execute("PRAGMA journal_mode=WAL;")
                 conn.execute("PRAGMA foreign_keys = ON")
+                # Test basic functionality
+                cursor = conn.cursor()
+                cursor.execute("SELECT sqlite_version();")
+                version = cursor.fetchone()[0]
+                self.logger.info(f"SQLite version: {version}")
+                # Create tables
+                self._create_tables(conn)
+                self.logger.info("Database initialized successfully")
+        except sqlite3.OperationalError as e:
+            self.logger.error(f"SQLite operational error: {e}")
+            self._handle_database_error(e)
+        except PermissionError as e:
+            self.logger.error(f"Permission error accessing database: {e}")
+            self._handle_permission_error()
         except Exception as e:
+            self.logger.error(f"Unexpected error initializing database: {e}")
+            raise
+    def _create_tables(self, conn):
+        """Create necessary database tables"""
+        try:
+            cursor = conn.cursor()
+            # Documents table
+            cursor.execute("""
+                CREATE TABLE IF NOT EXISTS documents (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    title TEXT NOT NULL,
+                    content TEXT NOT NULL,
+                    source TEXT,
+                    category TEXT,
+                    ai_score REAL DEFAULT 0.0,
+                    keywords TEXT,
+                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                    file_size INTEGER DEFAULT 0,
+                    language TEXT DEFAULT 'fa'
+                )
+            """)
+            # Scraped items table
+            cursor.execute("""
+                CREATE TABLE IF NOT EXISTS scraped_items (
+                    id TEXT PRIMARY KEY,
+                    url TEXT NOT NULL,
+                    title TEXT,
+                    content TEXT,
+                    domain TEXT,
+                    rating_score REAL DEFAULT 0.0,
+                    word_count INTEGER DEFAULT 0,
+                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                    status TEXT DEFAULT 'completed'
+                )
+            """)
+            # Add indexes for performance
+            cursor.execute("""
+                CREATE INDEX IF NOT EXISTS idx_documents_category
+                ON documents(category);
+            """)
+            cursor.execute("""
+                CREATE INDEX IF NOT EXISTS idx_documents_ai_score
+                ON documents(ai_score);
+            """)
+            cursor.execute("""
+                CREATE INDEX IF NOT EXISTS idx_scraped_domain
+                ON scraped_items(domain);
+            """)
+            conn.commit()
+            self.logger.info("Database tables created/verified")
+        except sqlite3.Error as e:
+            self.logger.error(f"Error creating tables: {e}")
+            raise
+    def _handle_database_error(self, error):
+        """Handle SQLite operational errors"""
+        error_msg = str(error).lower()
+        if "database is locked" in error_msg:
+            self.logger.error("Database is locked. Attempting recovery...")
+            # Attempt to recover by trying a different path
+            self.db_path = f"/tmp/iran_legal_{os.getpid()}.db"
+            self.logger.info(f"Attempting recovery with new path: {self.db_path}")
+        elif "disk i/o error" in error_msg:
+            self.logger.error("Disk I/O error. Check disk space and permissions.")
+        elif "database disk image is malformed" in error_msg:
+            self.logger.error("Database file is corrupted. Attempting backup and recreation...")
+        else:
+            self.logger.error(f"Unknown database error: {error}")
+        # Re-raise the error after logging
+        raise error
+    def _handle_permission_error(self):
+        """Handle permission errors when accessing the database"""
+        self.logger.error("Permission denied accessing database path")
+        # Try fallback to temp directory
+        fallback_path = f"/tmp/iran_legal_{os.getpid()}.db"
+        self.logger.info(f"Attempting fallback to: {fallback_path}")
+        self.db_path = fallback_path
+        # Retry initialization with fallback path
+        try:
+            with sqlite3.connect(self.db_path) as conn:
+                self._create_tables(conn)
+                self.logger.info("Successfully initialized with fallback path")
+        except Exception as e:
+            self.logger.error(f"Fallback also failed: {e}")
             raise
     @contextmanager
     def get_connection(self):
         """Get database connection with proper error handling"""
+        conn = None
         try:
+            conn = sqlite3.connect(self.db_path, timeout=10.0)
+            conn.row_factory = sqlite3.Row
             yield conn
+        except Exception as e:
+            self.logger.error(f"Database connection error: {e}")
+            raise
         finally:
+            if conn:
+                conn.close()
+    def health_check(self):
+        """Perform a health check on the database"""
+        try:
+            with self.get_connection() as conn:
+                cursor = conn.cursor()
+                # Basic connectivity test
+                cursor.execute("SELECT 1;")
+                # Check database integrity
+                cursor.execute("PRAGMA integrity_check;")
+                integrity = cursor.fetchone()[0]
+                # Get database info
+                cursor.execute("PRAGMA page_count;")
+                page_count = cursor.fetchone()[0]
+                cursor.execute("PRAGMA page_size;")
+                page_size = cursor.fetchone()[0]
+                size_mb = (page_count * page_size) / (1024 * 1024)
+                return {
+                    "status": "healthy",
+                    "path": self.db_path,
+                    "integrity": integrity,
+                    "size_mb": round(size_mb, 2),
+                    "writable": os.access(os.path.dirname(self.db_path) or ".", os.W_OK)
+                }
+        except Exception as e:
+            return {
+                "status": "unhealthy",
+                "error": str(e),
+                "path": self.db_path
+            }
     def add_document(self, doc_data: Dict) -> int:
         """Add a new document to the database"""
                 conn.commit()
                 return doc_id
         except Exception as e:
+            self.logger.error(f"Error adding document: {e}")
             return 0
     def add_scraped_item(self, item_data: Dict) -> bool:
                 conn.commit()
                 return True
         except Exception as e:
+            self.logger.error(f"Error adding scraped item: {e}")
             return False
     def get_documents(self, limit: int = 100) -> List[Dict]:
                 """, (limit,))
                 return [dict(row) for row in cursor.fetchall()]
         except Exception as e:
+            self.logger.error(f"Error getting documents: {e}")
             return []
     def get_scraped_items(self, limit: int = 100) -> List[Dict]:
                 """, (limit,))
                 return [dict(row) for row in cursor.fetchall()]
         except Exception as e:
+            self.logger.error(f"Error getting scraped items: {e}")
             return []
     def search_content(self, query: str, limit: int = 50) -> List[Dict]:
                 results.extend([dict(row) for row in cursor.fetchall()])
         except Exception as e:
+            self.logger.error(f"Error searching content: {e}")
         return sorted(results, key=lambda x: x.get('score', 0), reverse=True)[:limit]
                 stats['categories'] = dict(cursor.fetchall())
         except Exception as e:
+            self.logger.error(f"Error getting statistics: {e}")
         return stats
+# AI Analysis Engine (same as before)
 class AIAnalysisEngine:
     def __init__(self):
         self.legal_keywords = {
         else:
             return 'unknown'
+# Web Scraping Service (same as before, keeping it brief for space)
 class WebScrapingService:
     def __init__(self):
         self.session = requests.Session()
         """Extract main content from soup object"""
         # Try different content selectors
         content_selectors = [
+            'article', '.content', '.main-content', '#content',
+            '.post-content', '.entry-content', 'main', '.container'
         ]
         content = ""
         return content
+# Rating Service (same as before)
 class RatingService:
     def __init__(self):
         self.trusted_domains = {
         return min(score, 1.0)
 # UI Helper Functions
 def show_status_message(message: str, status_type: str = "info"):
     """Show styled status message"""
     </div>
     """
+# Enhanced Initialize services with health check
 @st.cache_resource
 def initialize_services():
+    """Initialize all services with health check"""
+    try:
+        # Initialize database manager with fallback paths
+        db_manager = DatabaseManager()
+        # Perform health check
+        health = db_manager.health_check()
+        if health["status"] == "unhealthy":
+            st.error(f"❌ Database health check failed: {health['error']}")
+            st.warning("⚠️ The application will continue with limited functionality.")
+        else:
+            st.success(f"✅ Database initialized successfully (Size: {health['size_mb']} MB)")
+        # Initialize other services
+        ai_engine = AIAnalysisEngine()
+        scraping_service = WebScrapingService()
+        rating_service = RatingService()
+        return db_manager, ai_engine, scraping_service, rating_service
+    except Exception as e:
+        st.error(f"❌ Failed to initialize services: {e}")
+        st.info("Please check the logs for more details.")
+        # Return None objects to prevent further errors
+        return None, None, None, None
+def debug_database_environment():
+    """Debug function to check database environment"""
+    st.markdown("### 🔧 Database Environment Debug")
+    debug_info = {
+        "Current working directory": os.getcwd(),
+        "Python executable": sys.executable,
+        "Operating system": os.name,
+        "User": os.getenv('USER', os.getenv('USERNAME', 'unknown')),
+    }
+    for key, value in debug_info.items():
+        st.write(f"**{key}:** {value}")
+    # Check common paths
+    st.markdown("#### 📁 Path Accessibility Check")
+    paths_to_check = [
+        "./",
+        "./data/",
+        "/tmp/",
+        os.path.expanduser("~/")
+    ]
+    for path in paths_to_check:
+        try:
+            writable = os.access(path, os.W_OK) if os.path.exists(path) else False
+            exists = os.path.exists(path)
+            if exists and writable:
+                st.success(f"✅ {path} - Exists: {exists}, Writable: {writable}")
+            elif exists:
+                st.warning(f"⚠️ {path} - Exists: {exists}, Writable: {writable}")
+            else:
+                st.error(f"❌ {path} - Exists: {exists}, Writable: {writable}")
+        except Exception as e:
+            st.error(f"❌ {path} - Error checking: {e}")
+    # Check SQLite
+    try:
+        import sqlite3
+        st.success(f"✅ SQLite version: {sqlite3.sqlite_version}")
+    except ImportError:
+        st.error("❌ SQLite not available")
 # Main Application
 def main():
     # Initialize services
     db_manager, ai_engine, scraping_service, rating_service = initialize_services()
+    # Show debug info if database failed
+    if db_manager is None:
+        st.warning("⚠️ Database initialization failed. Showing debug information:")
+        debug_database_environment()
+        return
     # Sidebar navigation
     st.sidebar.markdown("### 📋 منوی اصلی")
         "🌐 اسکرپینگ وب": "scraping",
         "📄 مدیریت اسناد": "documents",
         "🔍 جستجو و تحلیل": "search",
+        "📊 گزارشات و آمار": "reports",
+        "🔧 تنظیمات و دیباگ": "debug"
     }
     selected_page = st.sidebar.selectbox("انتخاب صفحه:", list(pages.keys()))
     # Route to appropriate page
     if page_key == "dashboard":
         show_dashboard(db_manager)
+    elif page_key == "debug":
+        debug_database_environment()
+    # Add other page handlers here...
 def show_dashboard(db_manager: DatabaseManager):
     """Display main dashboard"""
     # Get statistics
     stats = db_manager.get_statistics()
             "میانگین کیفیت"
         ), unsafe_allow_html=True)
+    # Database health status
     st.markdown("---")
+    health = db_manager.health_check()
+    if health["status"] == "healthy":
+        show_status_message(f"✅ Database Status: Healthy (Path: {health['path']})", "success")
     else:
+        show_status_message(f"❌ Database Status: Unhealthy - {health['error']}", "error")
 # Run the application
 if __name__ == "__main__":