Spaces:

Really-amin
/

hoghoghi2

Runtime error

App Files Files Community

Really-amin commited on Aug 7, 2025

Commit

f022ccd

verified ·

1 Parent(s): ccbebf0

Upload 2 files

Browse files

Files changed (2) hide show

data/iran_legal.db +0 -0
src/streamlit_app.py +1039 -0

data/iran_legal.db ADDED Viewed

File without changes

src/streamlit_app.py ADDED Viewed

	@@ -0,0 +1,1039 @@

+#!/usr/bin/env python3
+"""
+Iran Legal Information Dashboard - Enhanced Version
+==================================================
+Complete Working System with Robust Database Management, OCR, AI Analysis, and Web Scraping
+Designed for Hugging Face Spaces deployment with enhanced error handling
+"""
+import streamlit as st
+import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+import sqlite3
+import os
+import tempfile
+import io
+import json
+import hashlib
+import logging
+import time
+import re
+import asyncio
+import sys
+from datetime import datetime, timedelta
+from typing import Dict, List, Optional, Any, Tuple
+from urllib.parse import urlparse, urljoin
+from contextlib import contextmanager
+import requests
+from bs4 import BeautifulSoup
+import base64
+from pathlib import Path
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+# Page configuration
+st.set_page_config(
+    page_title="داشبورد اطلاعات حقوقی ایران",
+    page_icon="⚖️",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+# Advanced CSS for beautiful UI (same as before)
+def load_css():
+    st.markdown("""
+    <style>
+    @import url('https://fonts.googleapis.com/css2?family=Vazir:wght@300;400;500;600;700&display=swap');
+    /* Global Styles */
+    .main {
+        font-family: 'Vazir', 'Segoe UI', sans-serif;
+        direction: rtl;
+        text-align: right;
+        background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
+        min-height: 100vh;
+    }
+    /* Header Styles */
+    .main-header {
+        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+        padding: 2rem;
+        border-radius: 20px;
+        color: white;
+        margin-bottom: 2rem;
+        text-align: center;
+        box-shadow: 0 20px 40px rgba(102, 126, 234, 0.3);
+        position: relative;
+        overflow: hidden;
+    }
+    .main-header::before {
+        content: '';
+        position: absolute;
+        top: -50%;
+        left: -50%;
+        width: 200%;
+        height: 200%;
+        background: linear-gradient(45deg, transparent, rgba(255,255,255,0.1), transparent);
+        transform: rotate(45deg);
+        animation: shine 3s infinite;
+    }
+    @keyframes shine {
+        0% { transform: translateX(-100%) translateY(-100%) rotate(45deg); }
+        100% { transform: translateX(100%) translateY(100%) rotate(45deg); }
+    }
+    .main-header h1 {
+        font-size: 2.5rem;
+        margin-bottom: 0.5rem;
+        font-weight: 700;
+        text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
+    }
+    .main-header p {
+        font-size: 1.2rem;
+        opacity: 0.9;
+        margin: 0;
+    }
+    /* Status Indicators */
+    .status-indicator {
+        display: inline-flex;
+        align-items: center;
+        padding: 0.25rem 0.75rem;
+        border-radius: 20px;
+        font-size: 0.85rem;
+        font-weight: 500;
+        margin: 0.25rem;
+    }
+    .status-success {
+        background: linear-gradient(135deg, #11998e, #38ef7d);
+        color: white;
+    }
+    .status-warning {
+        background: linear-gradient(135deg, #f093fb, #f5576c);
+        color: white;
+    }
+    .status-info {
+        background: linear-gradient(135deg, #4facfe, #00f2fe);
+        color: white;
+    }
+    .status-error {
+        background: linear-gradient(135deg, #ff416c, #ff4b2b);
+        color: white;
+    }
+    /* Card Styles */
+    .metric-card {
+        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+        padding: 1.5rem;
+        border-radius: 15px;
+        color: white;
+        text-align: center;
+        margin: 0.5rem 0;
+        box-shadow: 0 10px 30px rgba(102, 126, 234, 0.3);
+        transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
+        position: relative;
+        overflow: hidden;
+    }
+    .metric-card:hover {
+        transform: translateY(-10px) scale(1.02);
+        box-shadow: 0 20px 50px rgba(102, 126, 234, 0.4);
+    }
+    .metric-value {
+        font-size: 2.5rem;
+        font-weight: 700;
+        margin: 0.5rem 0;
+        text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
+    }
+    .metric-label {
+        font-size: 1rem;
+        opacity: 0.9;
+        text-transform: uppercase;
+        letter-spacing: 1px;
+    }
+    /* Feature Cards */
+    .feature-card {
+        background: rgba(255, 255, 255, 0.95);
+        backdrop-filter: blur(10px);
+        padding: 2rem;
+        border-radius: 20px;
+        box-shadow: 0 15px 35px rgba(0, 0, 0, 0.1);
+        margin: 1rem 0;
+        border: 1px solid rgba(255, 255, 255, 0.2);
+        transition: all 0.3s ease;
+        position: relative;
+    }
+    .feature-card:hover {
+        transform: translateY(-5px);
+        box-shadow: 0 25px 50px rgba(0, 0, 0, 0.15);
+    }
+    .feature-card::before {
+        content: '';
+        position: absolute;
+        top: 0;
+        left: 0;
+        right: 0;
+        height: 4px;
+        background: linear-gradient(90deg, #667eea, #764ba2);
+        border-radius: 20px 20px 0 0;
+    }
+    /* Hide Streamlit elements */
+    #MainMenu { visibility: hidden; }
+    footer { visibility: hidden; }
+    header { visibility: hidden; }
+    .stDeployButton { display: none; }
+    /* Responsive Design */
+    @media (max-width: 768px) {
+        .main-header h1 { font-size: 1.8rem; }
+        .main-header p { font-size: 1rem; }
+        .metric-card { margin: 0.25rem 0; }
+        .feature-card { padding: 1rem; }
+    }
+    </style>
+    """, unsafe_allow_html=True)
+# Enhanced Database Manager Class with Robust Error Handling
+class DatabaseManager:
+    def __init__(self, db_path: str = None):
+        """
+        Initialize DatabaseManager with robust error handling
+        Args:
+            db_path (str, optional): Custom database path. If None, uses auto-detection.
+        """
+        # Set up logging
+        self.logger = logging.getLogger(__name__)
+        # Set database path with fallbacks
+        if db_path:
+            self.db_path = db_path
+        else:
+            # Try multiple fallback locations
+            possible_paths = [
+                "./data/iran_legal.db",          # Preferred location
+                "/tmp/iran_legal.db",            # Temp directory (for cloud/container environments)
+                os.path.expanduser("~/iran_legal.db"),  # User home directory
+                "./iran_legal.db"                # Current directory
+            ]
+            self.db_path = self._find_writable_path(possible_paths)
+        self.logger.info(f"Using database path: {self.db_path}")
+        self.initialize_database()
+    def _find_writable_path(self, paths):
+        """
+        Find the first writable path from a list of potential paths
+        Args:
+            paths (list): List of potential database paths
+        Returns:
+            str: First writable path found
+        """
+        for path in paths:
+            try:
+                # Create directory if it doesn't exist
+                directory = os.path.dirname(path)
+                if directory and not os.path.exists(directory):
+                    os.makedirs(directory, exist_ok=True)
+                # Test if we can write to this location
+                test_file = path + ".test"
+                with open(test_file, 'w') as f:
+                    f.write("test")
+                os.remove(test_file)
+                self.logger.info(f"Found writable path: {path}")
+                return path
+            except (OSError, PermissionError) as e:
+                self.logger.warning(f"Cannot write to {path}: {e}")
+                continue
+        # If no writable path found, default to current directory
+        default_path = "./iran_legal.db"
+        self.logger.warning(f"No writable path found, using default: {default_path}")
+        return default_path
+    def initialize_database(self):
+        """Initialize the database with required tables"""
+        try:
+            # Ensure the directory exists
+            directory = os.path.dirname(self.db_path)
+            if directory and not os.path.exists(directory):
+                os.makedirs(directory, exist_ok=True)
+                self.logger.info(f"Created directory: {directory}")
+            # Test database connection
+            with sqlite3.connect(self.db_path, timeout=10.0) as conn:
+                # Enable WAL mode for better concurrency
+                conn.execute("PRAGMA journal_mode=WAL;")
+                conn.execute("PRAGMA foreign_keys = ON")
+                # Test basic functionality
+                cursor = conn.cursor()
+                cursor.execute("SELECT sqlite_version();")
+                version = cursor.fetchone()[0]
+                self.logger.info(f"SQLite version: {version}")
+                # Create tables
+                self._create_tables(conn)
+                self.logger.info("Database initialized successfully")
+        except sqlite3.OperationalError as e:
+            self.logger.error(f"SQLite operational error: {e}")
+            self._handle_database_error(e)
+        except PermissionError as e:
+            self.logger.error(f"Permission error accessing database: {e}")
+            self._handle_permission_error()
+        except Exception as e:
+            self.logger.error(f"Unexpected error initializing database: {e}")
+            raise
+    def _create_tables(self, conn):
+        """Create necessary database tables"""
+        try:
+            cursor = conn.cursor()
+            # Documents table
+            cursor.execute("""
+                CREATE TABLE IF NOT EXISTS documents (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    title TEXT NOT NULL,
+                    content TEXT NOT NULL,
+                    source TEXT,
+                    category TEXT,
+                    ai_score REAL DEFAULT 0.0,
+                    keywords TEXT,
+                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                    file_size INTEGER DEFAULT 0,
+                    language TEXT DEFAULT 'fa'
+                )
+            """)
+            # Scraped items table
+            cursor.execute("""
+                CREATE TABLE IF NOT EXISTS scraped_items (
+                    id TEXT PRIMARY KEY,
+                    url TEXT NOT NULL,
+                    title TEXT,
+                    content TEXT,
+                    domain TEXT,
+                    rating_score REAL DEFAULT 0.0,
+                    word_count INTEGER DEFAULT 0,
+                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                    status TEXT DEFAULT 'completed'
+                )
+            """)
+            # Add indexes for performance
+            cursor.execute("""
+                CREATE INDEX IF NOT EXISTS idx_documents_category
+                ON documents(category);
+            """)
+            cursor.execute("""
+                CREATE INDEX IF NOT EXISTS idx_documents_ai_score
+                ON documents(ai_score);
+            """)
+            cursor.execute("""
+                CREATE INDEX IF NOT EXISTS idx_scraped_domain
+                ON scraped_items(domain);
+            """)
+            conn.commit()
+            self.logger.info("Database tables created/verified")
+        except sqlite3.Error as e:
+            self.logger.error(f"Error creating tables: {e}")
+            raise
+    def _handle_database_error(self, error):
+        """Handle SQLite operational errors"""
+        error_msg = str(error).lower()
+        if "database is locked" in error_msg:
+            self.logger.error("Database is locked. Attempting recovery...")
+            # Attempt to recover by trying a different path
+            self.db_path = f"/tmp/iran_legal_{os.getpid()}.db"
+            self.logger.info(f"Attempting recovery with new path: {self.db_path}")
+        elif "disk i/o error" in error_msg:
+            self.logger.error("Disk I/O error. Check disk space and permissions.")
+        elif "database disk image is malformed" in error_msg:
+            self.logger.error("Database file is corrupted. Attempting backup and recreation...")
+        else:
+            self.logger.error(f"Unknown database error: {error}")
+        # Re-raise the error after logging
+        raise error
+    def _handle_permission_error(self):
+        """Handle permission errors when accessing the database"""
+        self.logger.error("Permission denied accessing database path")
+        # Try fallback to temp directory
+        fallback_path = f"/tmp/iran_legal_{os.getpid()}.db"
+        self.logger.info(f"Attempting fallback to: {fallback_path}")
+        self.db_path = fallback_path
+        # Retry initialization with fallback path
+        try:
+            with sqlite3.connect(self.db_path) as conn:
+                self._create_tables(conn)
+                self.logger.info("Successfully initialized with fallback path")
+        except Exception as e:
+            self.logger.error(f"Fallback also failed: {e}")
+            raise
+    @contextmanager
+    def get_connection(self):
+        """Get database connection with proper error handling"""
+        conn = None
+        try:
+            conn = sqlite3.connect(self.db_path, timeout=10.0)
+            conn.row_factory = sqlite3.Row
+            yield conn
+        except Exception as e:
+            self.logger.error(f"Database connection error: {e}")
+            raise
+        finally:
+            if conn:
+                conn.close()
+    def health_check(self):
+        """Perform a health check on the database"""
+        try:
+            with self.get_connection() as conn:
+                cursor = conn.cursor()
+                # Basic connectivity test
+                cursor.execute("SELECT 1;")
+                # Check database integrity
+                cursor.execute("PRAGMA integrity_check;")
+                integrity = cursor.fetchone()[0]
+                # Get database info
+                cursor.execute("PRAGMA page_count;")
+                page_count = cursor.fetchone()[0]
+                cursor.execute("PRAGMA page_size;")
+                page_size = cursor.fetchone()[0]
+                size_mb = (page_count * page_size) / (1024 * 1024)
+                return {
+                    "status": "healthy",
+                    "path": self.db_path,
+                    "integrity": integrity,
+                    "size_mb": round(size_mb, 2),
+                    "writable": os.access(os.path.dirname(self.db_path) or ".", os.W_OK)
+                }
+        except Exception as e:
+            return {
+                "status": "unhealthy",
+                "error": str(e),
+                "path": self.db_path
+            }
+    def add_document(self, doc_data: Dict) -> int:
+        """Add a new document to the database"""
+        try:
+            with self.get_connection() as conn:
+                cursor = conn.execute("""
+                    INSERT INTO documents (title, content, source, category, ai_score, keywords, file_size)
+                    VALUES (?, ?, ?, ?, ?, ?, ?)
+                """, (
+                    doc_data.get('title', ''),
+                    doc_data.get('content', ''),
+                    doc_data.get('source', ''),
+                    doc_data.get('category', ''),
+                    doc_data.get('ai_score', 0.0),
+                    json.dumps(doc_data.get('keywords', [])),
+                    doc_data.get('file_size', 0)
+                ))
+                doc_id = cursor.lastrowid
+                conn.commit()
+                return doc_id
+        except Exception as e:
+            self.logger.error(f"Error adding document: {e}")
+            return 0
+    def add_scraped_item(self, item_data: Dict) -> bool:
+        """Add a scraped item to the database"""
+        try:
+            with self.get_connection() as conn:
+                conn.execute("""
+                    INSERT OR REPLACE INTO scraped_items
+                    (id, url, title, content, domain, rating_score, word_count, status)
+                    VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+                """, (
+                    item_data.get('id', ''),
+                    item_data.get('url', ''),
+                    item_data.get('title', ''),
+                    item_data.get('content', ''),
+                    item_data.get('domain', ''),
+                    item_data.get('rating_score', 0.0),
+                    item_data.get('word_count', 0),
+                    item_data.get('status', 'completed')
+                ))
+                conn.commit()
+                return True
+        except Exception as e:
+            self.logger.error(f"Error adding scraped item: {e}")
+            return False
+    def get_documents(self, limit: int = 100) -> List[Dict]:
+        """Get documents from database"""
+        try:
+            with self.get_connection() as conn:
+                cursor = conn.execute("""
+                    SELECT * FROM documents
+                    ORDER BY created_at DESC
+                    LIMIT ?
+                """, (limit,))
+                return [dict(row) for row in cursor.fetchall()]
+        except Exception as e:
+            self.logger.error(f"Error getting documents: {e}")
+            return []
+    def get_scraped_items(self, limit: int = 100) -> List[Dict]:
+        """Get scraped items from database"""
+        try:
+            with self.get_connection() as conn:
+                cursor = conn.execute("""
+                    SELECT * FROM scraped_items
+                    ORDER BY created_at DESC
+                    LIMIT ?
+                """, (limit,))
+                return [dict(row) for row in cursor.fetchall()]
+        except Exception as e:
+            self.logger.error(f"Error getting scraped items: {e}")
+            return []
+    def search_content(self, query: str, limit: int = 50) -> List[Dict]:
+        """Search in documents and scraped items"""
+        results = []
+        try:
+            with self.get_connection() as conn:
+                # Search in documents
+                cursor = conn.execute("""
+                    SELECT 'document' as type, id, title, content, ai_score as score, created_at
+                    FROM documents
+                    WHERE title LIKE ? OR content LIKE ?
+                    ORDER BY ai_score DESC
+                    LIMIT ?
+                """, (f'%{query}%', f'%{query}%', limit//2))
+                results.extend([dict(row) for row in cursor.fetchall()])
+                # Search in scraped items
+                cursor = conn.execute("""
+                    SELECT 'scraped' as type, id, title, content, rating_score as score, created_at
+                    FROM scraped_items
+                    WHERE title LIKE ? OR content LIKE ?
+                    ORDER BY rating_score DESC
+                    LIMIT ?
+                """, (f'%{query}%', f'%{query}%', limit//2))
+                results.extend([dict(row) for row in cursor.fetchall()])
+        except Exception as e:
+            self.logger.error(f"Error searching content: {e}")
+        return sorted(results, key=lambda x: x.get('score', 0), reverse=True)[:limit]
+    def get_statistics(self) -> Dict:
+        """Get database statistics"""
+        stats = {
+            'total_documents': 0,
+            'total_scraped': 0,
+            'avg_ai_score': 0.0,
+            'avg_rating': 0.0,
+            'categories': {}
+        }
+        try:
+            with self.get_connection() as conn:
+                # Total documents
+                cursor = conn.execute("SELECT COUNT(*) FROM documents")
+                stats['total_documents'] = cursor.fetchone()[0]
+                # Average AI score
+                cursor = conn.execute("SELECT AVG(ai_score) FROM documents WHERE ai_score > 0")
+                result = cursor.fetchone()[0]
+                stats['avg_ai_score'] = result if result else 0.0
+                # Total scraped items
+                cursor = conn.execute("SELECT COUNT(*) FROM scraped_items")
+                stats['total_scraped'] = cursor.fetchone()[0]
+                # Average rating
+                cursor = conn.execute("SELECT AVG(rating_score) FROM scraped_items WHERE rating_score > 0")
+                result = cursor.fetchone()[0]
+                stats['avg_rating'] = result if result else 0.0
+                # Categories
+                cursor = conn.execute("""
+                    SELECT category, COUNT(*)
+                    FROM documents
+                    WHERE category IS NOT NULL
+                    GROUP BY category
+                """)
+                stats['categories'] = dict(cursor.fetchall())
+        except Exception as e:
+            self.logger.error(f"Error getting statistics: {e}")
+        return stats
+# AI Analysis Engine (same as before)
+class AIAnalysisEngine:
+    def __init__(self):
+        self.legal_keywords = {
+            'قانون': ['قانون', 'ماده', 'تبصره', 'بند', 'فصل', 'باب', 'مصوبه'],
+            'قرارداد': ['قرارداد', 'عقد', 'طرفین', 'متعاهدین', 'شرایط', 'مفاد'],
+            'حکم': ['حکم', 'رای', 'دادگاه', 'قاضی', 'شعبه', 'دعوا', 'خواهان'],
+            'اداری': ['اداره', 'سازمان', 'وزارت', 'دولت', 'مقررات', 'بخشنامه']
+        }
+    def analyze_text(self, text: str, title: str = "") -> Dict:
+        """Analyze text and return comprehensive analysis"""
+        if not text:
+            return {'ai_score': 0.0, 'category': 'نامشخص', 'keywords': []}
+        # Calculate quality score
+        quality_score = self._calculate_quality_score(text)
+        # Predict category
+        category = self._predict_category(text + " " + title)
+        # Extract keywords
+        keywords = self._extract_keywords(text)
+        # Detect language
+        language = self._detect_language(text)
+        return {
+            'ai_score': quality_score,
+            'category': category,
+            'keywords': keywords,
+            'language': language,
+            'word_count': len(text.split()),
+            'char_count': len(text)
+        }
+    def _calculate_quality_score(self, text: str) -> float:
+        """Calculate text quality score"""
+        score = 0.0
+        word_count = len(text.split())
+        # Length scoring
+        if 50 <= word_count <= 5000:
+            score += 0.3
+        elif word_count > 5000:
+            score += 0.2
+        elif word_count >= 20:
+            score += 0.1
+        # Legal terms scoring
+        legal_term_count = 0
+        for category_terms in self.legal_keywords.values():
+            for term in category_terms:
+                legal_term_count += text.count(term)
+        if legal_term_count >= 5:
+            score += 0.4
+        elif legal_term_count >= 2:
+            score += 0.2
+        elif legal_term_count >= 1:
+            score += 0.1
+        # Structure scoring
+        if re.search(r'ماده\s*\d+', text):
+            score += 0.1
+        if re.search(r'بند\s*[الف-ی]', text):
+            score += 0.05
+        if re.search(r'\d{1,2}/\d{1,2}/\d{2,4}', text):
+            score += 0.05
+        # Language quality
+        persian_ratio = len(re.findall(r'[\u0600-\u06FF]', text)) / max(len(text), 1)
+        if persian_ratio > 0.5:
+            score += 0.1
+        return min(score, 1.0)
+    def _predict_category(self, text: str) -> str:
+        """Predict document category"""
+        text_lower = text.lower()
+        category_scores = {}
+        for category, keywords in self.legal_keywords.items():
+            score = sum(text_lower.count(keyword) for keyword in keywords)
+            category_scores[category] = score
+        if category_scores:
+            best_category = max(category_scores, key=category_scores.get)
+            return best_category if category_scores[best_category] > 0 else 'عمومی'
+        return 'عمومی'
+    def _extract_keywords(self, text: str, max_keywords: int = 10) -> List[str]:
+        """Extract keywords from text"""
+        # Simple keyword extraction based on frequency
+        words = re.findall(r'[\u0600-\u06FF]{3,}', text)
+        word_freq = {}
+        for word in words:
+            if len(word) > 2:
+                word_freq[word] = word_freq.get(word, 0) + 1
+        # Sort by frequency and return top keywords
+        sorted_words = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)
+        return [word[0] for word in sorted_words[:max_keywords]]
+    def _detect_language(self, text: str) -> str:
+        """Detect text language"""
+        persian_chars = len(re.findall(r'[\u0600-\u06FF]', text))
+        english_chars = len(re.findall(r'[a-zA-Z]', text))
+        if persian_chars > english_chars:
+            return 'fa'
+        elif english_chars > 0:
+            return 'en'
+        else:
+            return 'unknown'
+# Web Scraping Service (same as before, keeping it brief for space)
+class WebScrapingService:
+    def __init__(self):
+        self.session = requests.Session()
+        self.session.headers.update({
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+        })
+    def scrape_url(self, url: str) -> Dict:
+        """Scrape content from a single URL"""
+        try:
+            response = self.session.get(url, timeout=15)
+            response.raise_for_status()
+            soup = BeautifulSoup(response.content, 'html.parser')
+            # Remove unwanted elements
+            for element in soup(['script', 'style', 'nav', 'header', 'footer', 'aside']):
+                element.decompose()
+            # Extract title
+            title_tag = soup.find('title')
+            title = title_tag.get_text().strip() if title_tag else "بدون عنوان"
+            # Extract main content
+            content = self._extract_main_content(soup)
+            if not content or len(content.strip()) < 50:
+                return None
+            # Clean content
+            content = re.sub(r'\s+', ' ', content).strip()
+            return {
+                'url': url,
+                'title': title,
+                'content': content,
+                'domain': urlparse(url).netloc,
+                'word_count': len(content.split()),
+                'success': True
+            }
+        except Exception as e:
+            logger.error(f"Error scraping {url}: {e}")
+            return None
+    def _extract_main_content(self, soup: BeautifulSoup) -> str:
+        """Extract main content from soup object"""
+        # Try different content selectors
+        content_selectors = [
+            'article', '.content', '.main-content', '#content',
+            '.post-content', '.entry-content', 'main', '.container'
+        ]
+        content = ""
+        for selector in content_selectors:
+            elements = soup.select(selector)
+            if elements:
+                content = ' '.join([elem.get_text().strip() for elem in elements])
+                break
+        # Fallback to body
+        if not content:
+            body = soup.find('body')
+            if body:
+                content = body.get_text()
+        return content
+# Rating Service (same as before)
+class RatingService:
+    def __init__(self):
+        self.trusted_domains = {
+            'dastour.ir': 1.0,
+            'mizanonline.ir': 0.9,
+            'judiciary.ir': 1.0,
+            'majlis.ir': 1.0,
+            'dolat.ir': 0.8,
+            'rc.majlis.ir': 0.9
+        }
+    def rate_content(self, content_data: Dict) -> float:
+        """Rate content quality based on multiple factors"""
+        score = 0.0
+        # Source credibility (40%)
+        domain = content_data.get('domain', '')
+        source_score = self.trusted_domains.get(domain, 0.3)
+        if '.gov.ir' in domain:
+            source_score = max(source_score, 0.7)
+        elif '.ir' in domain:
+            source_score = max(source_score, 0.4)
+        score += source_score * 0.4
+        # Content quality (40%)
+        content = content_data.get('content', '')
+        word_count = len(content.split())
+        # Length factor
+        if word_count >= 200:
+            length_factor = 0.3
+        elif word_count >= 100:
+            length_factor = 0.2
+        elif word_count >= 50:
+            length_factor = 0.1
+        else:
+            length_factor = 0.0
+        # Legal terms factor
+        legal_terms = ['قانون', 'حقوق', 'دادگاه', 'ماده', 'حکم', 'رای']
+        found_terms = sum(1 for term in legal_terms if term in content)
+        terms_factor = min(found_terms / len(legal_terms), 1.0) * 0.1
+        score += (length_factor + terms_factor) * 0.4
+        # Title relevance (20%)
+        title = content_data.get('title', '')
+        title_score = 0.1 if len(title) > 10 else 0.0
+        if any(term in title for term in legal_terms):
+            title_score += 0.1
+        score += title_score * 0.2
+        return min(score, 1.0)
+# UI Helper Functions
+def show_status_message(message: str, status_type: str = "info"):
+    """Show styled status message"""
+    status_class = f"status-{status_type}"
+    st.markdown(f'<div class="status-indicator {status_class}">{message}</div>', unsafe_allow_html=True)
+def create_metric_card(title: str, value: str, subtitle: str = ""):
+    """Create a beautiful metric card"""
+    return f"""
+    <div class="metric-card">
+        <div class="metric-label">{title}</div>
+        <div class="metric-value">{value}</div>
+        {f'<div style="font-size: 0.9rem; opacity: 0.8;">{subtitle}</div>' if subtitle else ''}
+    </div>
+    """
+# Enhanced Initialize services with health check
+@st.cache_resource
+def initialize_services():
+    """Initialize all services with health check"""
+    try:
+        # Initialize database manager with fallback paths
+        db_manager = DatabaseManager()
+        # Perform health check
+        health = db_manager.health_check()
+        if health["status"] == "unhealthy":
+            st.error(f"❌ Database health check failed: {health['error']}")
+            st.warning("⚠️ The application will continue with limited functionality.")
+        else:
+            st.success(f"✅ Database initialized successfully (Size: {health['size_mb']} MB)")
+        # Initialize other services
+        ai_engine = AIAnalysisEngine()
+        scraping_service = WebScrapingService()
+        rating_service = RatingService()
+        return db_manager, ai_engine, scraping_service, rating_service
+    except Exception as e:
+        st.error(f"❌ Failed to initialize services: {e}")
+        st.info("Please check the logs for more details.")
+        # Return None objects to prevent further errors
+        return None, None, None, None
+def debug_database_environment():
+    """Debug function to check database environment"""
+    st.markdown("### 🔧 Database Environment Debug")
+    debug_info = {
+        "Current working directory": os.getcwd(),
+        "Python executable": sys.executable,
+        "Operating system": os.name,
+        "User": os.getenv('USER', os.getenv('USERNAME', 'unknown')),
+    }
+    for key, value in debug_info.items():
+        st.write(f"**{key}:** {value}")
+    # Check common paths
+    st.markdown("#### 📁 Path Accessibility Check")
+    paths_to_check = [
+        "./",
+        "./data/",
+        "/tmp/",
+        os.path.expanduser("~/")
+    ]
+    for path in paths_to_check:
+        try:
+            writable = os.access(path, os.W_OK) if os.path.exists(path) else False
+            exists = os.path.exists(path)
+            if exists and writable:
+                st.success(f"✅ {path} - Exists: {exists}, Writable: {writable}")
+            elif exists:
+                st.warning(f"⚠️ {path} - Exists: {exists}, Writable: {writable}")
+            else:
+                st.error(f"❌ {path} - Exists: {exists}, Writable: {writable}")
+        except Exception as e:
+            st.error(f"❌ {path} - Error checking: {e}")
+    # Check SQLite
+    try:
+        import sqlite3
+        st.success(f"✅ SQLite version: {sqlite3.sqlite_version}")
+    except ImportError:
+        st.error("❌ SQLite not available")
+# Main Application
+def main():
+    load_css()
+    # Header
+    st.markdown("""
+    <div class="main-header">
+        <h1>🏛️ داشبورد اطلاعات حقوقی جمهوری اسلامی ایران</h1>
+        <p>سیستم جامع جمع‌آوری، تحلیل و رتبه‌بندی اطلاعات حقوقی با هوش مصنوعی</p>
+    </div>
+    """, unsafe_allow_html=True)
+    # Initialize services
+    db_manager, ai_engine, scraping_service, rating_service = initialize_services()
+    # Show debug info if database failed
+    if db_manager is None:
+        st.warning("⚠️ Database initialization failed. Showing debug information:")
+        debug_database_environment()
+        return
+    # Sidebar navigation
+    st.sidebar.markdown("### 📋 منوی اصلی")
+    pages = {
+        "🏠 داشبورد اصلی": "dashboard",
+        "🌐 اسکرپینگ وب": "scraping",
+        "📄 مدیریت اسناد": "documents",
+        "🔍 جستجو و تحلیل": "search",
+        "📊 گزارشات و آمار": "reports",
+        "🔧 تنظیمات و دیباگ": "debug"
+    }
+    selected_page = st.sidebar.selectbox("انتخاب صفحه:", list(pages.keys()))
+    page_key = pages[selected_page]
+    # Route to appropriate page
+    if page_key == "dashboard":
+        show_dashboard(db_manager)
+    elif page_key == "debug":
+        debug_database_environment()
+    # Add other page handlers here...
+def show_dashboard(db_manager: DatabaseManager):
+    """Display main dashboard"""
+    # Get statistics
+    stats = db_manager.get_statistics()
+    # Metrics row
+    col1, col2, col3, col4 = st.columns(4)
+    with col1:
+        st.markdown(create_metric_card(
+            "کل اسناد",
+            str(stats['total_documents']),
+            "اسناد ثبت شده"
+        ), unsafe_allow_html=True)
+    with col2:
+        st.markdown(create_metric_card(
+            "محتوای وب",
+            str(stats['total_scraped']),
+            "آیتم جمع‌آوری شده"
+        ), unsafe_allow_html=True)
+    with col3:
+        st.markdown(create_metric_card(
+            "کیفیت AI",
+            f"{stats['avg_ai_score']:.2f}",
+            "میانگین امتیاز"
+        ), unsafe_allow_html=True)
+    with col4:
+        st.markdown(create_metric_card(
+            "رتبه‌بندی",
+            f"{stats['avg_rating']:.2f}",
+            "میانگین کیفیت"
+        ), unsafe_allow_html=True)
+    # Database health status
+    st.markdown("---")
+    health = db_manager.health_check()
+    if health["status"] == "healthy":
+        show_status_message(f"✅ Database Status: Healthy (Path: {health['path']})", "success")
+    else:
+        show_status_message(f"❌ Database Status: Unhealthy - {health['error']}", "error")
+# Run the application
+if __name__ == "__main__":
+    main()