Really-amin commited on
Commit
b233d94
·
verified ·
1 Parent(s): beda519

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +23 -988
src/streamlit_app.py CHANGED
@@ -1,996 +1,31 @@
1
  #!/usr/bin/env python3
2
  """
3
- Iran Legal Information Dashboard - Hugging Face Spaces Version
4
- ============================================================
5
- Optimized for Hugging Face Spaces deployment
6
  """
7
 
8
- import streamlit as st
9
- import pandas as pd
10
- import plotly.express as px
11
- import plotly.graph_objects as go
12
- import sqlite3
13
  import os
14
- import tempfile
15
- import io
16
- import json
17
- import hashlib
18
- import logging
19
- import time
20
- import re
21
- from datetime import datetime, timedelta
22
- from typing import Dict, List, Optional, Any, Tuple
23
- from urllib.parse import urlparse, urljoin
24
- from contextlib import contextmanager
25
- import requests
26
- from bs4 import BeautifulSoup
27
- import base64
28
- from pathlib import Path
29
 
30
- # Configure logging for Hugging Face
31
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
32
- logger = logging.getLogger(__name__)
33
 
34
- # Page configuration
35
- st.set_page_config(
36
- page_title="داشبورد اطلاعات حقوقی ایران",
37
- page_icon="⚖️",
38
- layout="wide",
39
- initial_sidebar_state="expanded"
40
- )
41
-
42
- # Enhanced CSS with Fixed Sidebar Menu and Better Persian Typography
43
- def load_css():
44
- st.markdown("""
45
- <style>
46
- /* Import Persian Fonts */
47
- @import url('https://fonts.googleapis.com/css2?family=Vazirmatn:wght@300;400;500;600;700;800&display=swap');
48
- @import url('https://fonts.googleapis.com/css2?family=Yekan+Bakh:wght@300;400;500;600;700&display=swap');
49
-
50
- /* Root Variables */
51
- :root {
52
- --primary-gradient: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
53
- --secondary-gradient: linear-gradient(135deg, #f8fafc 0%, #e2e8f0 100%);
54
- --text-primary: #1a202c;
55
- --text-secondary: #4a5568;
56
- --text-light: #718096;
57
- --white: #ffffff;
58
- --shadow-light: 0 4px 6px rgba(0, 0, 0, 0.05);
59
- --shadow-medium: 0 10px 25px rgba(0, 0, 0, 0.1);
60
- --shadow-heavy: 0 20px 40px rgba(102, 126, 234, 0.2);
61
- --border-radius: 16px;
62
- --transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
63
- --sidebar-width: 280px;
64
- }
65
-
66
- /* Global Typography */
67
- * {
68
- font-family: 'Vazirmatn', 'Yekan Bakh', 'Tahoma', 'Arial', sans-serif !important;
69
- font-feature-settings: "kern" 1, "liga" 1;
70
- text-rendering: optimizeLegibility;
71
- -webkit-font-smoothing: antialiased;
72
- -moz-osx-font-smoothing: grayscale;
73
- }
74
-
75
- /* Main Content Area */
76
- .main {
77
- direction: rtl;
78
- text-align: right;
79
- background: var(--secondary-gradient);
80
- min-height: 100vh;
81
- padding: 1.5rem 1.5rem 1.5rem var(--sidebar-width);
82
- line-height: 1.8;
83
- font-weight: 400;
84
- transition: var(--transition);
85
- }
86
-
87
- /* Fixed Sidebar */
88
- .css-1d391kg {
89
- position: fixed !important;
90
- top: 0;
91
- left: 0;
92
- height: 100vh;
93
- width: var(--sidebar-width);
94
- background: var(--primary-gradient) !important;
95
- z-index: 999999;
96
- overflow-y: auto;
97
- box-shadow: 4px 0 20px rgba(0, 0, 0, 0.15);
98
- border-right: 1px solid rgba(255, 255, 255, 0.1);
99
- backdrop-filter: blur(20px);
100
- }
101
-
102
- .css-1d391kg .css-17eq0hr {
103
- background: transparent !important;
104
- padding: 0 !important;
105
- }
106
-
107
- /* Sidebar Content */
108
- .sidebar .sidebar-content {
109
- background: transparent !important;
110
- color: white !important;
111
- padding: 2rem 1.5rem;
112
- height: 100%;
113
- overflow-y: auto;
114
- }
115
-
116
- /* Sidebar Navigation Buttons */
117
- .css-1d391kg .stButton > button {
118
- width: 100% !important;
119
- background: rgba(255, 255, 255, 0.1) !important;
120
- color: white !important;
121
- border: 1px solid rgba(255, 255, 255, 0.2) !important;
122
- border-radius: 12px !important;
123
- padding: 0.8rem 1.2rem !important;
124
- margin: 0.3rem 0 !important;
125
- font-family: 'Vazirmatn', sans-serif !important;
126
- font-weight: 500 !important;
127
- font-size: 0.95rem !important;
128
- text-align: right !important;
129
- direction: rtl !important;
130
- transition: all 0.3s ease !important;
131
- backdrop-filter: blur(10px) !important;
132
- }
133
-
134
- .css-1d391kg .stButton > button:hover {
135
- background: rgba(255, 255, 255, 0.2) !important;
136
- transform: translateX(-5px) !important;
137
- box-shadow: 0 5px 15px rgba(0, 0, 0, 0.2) !important;
138
- border-color: rgba(255, 255, 255, 0.3) !important;
139
- }
140
-
141
- .css-1d391kg .stButton > button:active,
142
- .css-1d391kg .stButton > button:focus {
143
- background: rgba(255, 255, 255, 0.25) !important;
144
- border-color: rgba(255, 255, 255, 0.4) !important;
145
- box-shadow: 0 0 0 2px rgba(255, 255, 255, 0.3) !important;
146
- }
147
-
148
- /* Sidebar Text and Elements */
149
- .css-1d391kg .stMarkdown,
150
- .css-1d391kg h1, .css-1d391kg h2, .css-1d391kg h3,
151
- .css-1d391kg p, .css-1d391kg div {
152
- color: white !important;
153
- font-family: 'Vazirmatn', sans-serif !important;
154
- text-align: right !important;
155
- direction: rtl !important;
156
- }
157
-
158
- .css-1d391kg .stMarkdown h3 {
159
- font-size: 1.1rem !important;
160
- font-weight: 600 !important;
161
- margin: 1.5rem 0 1rem 0 !important;
162
- text-align: center !important;
163
- border-bottom: 2px solid rgba(255, 255, 255, 0.2) !important;
164
- padding-bottom: 0.8rem !important;
165
- }
166
-
167
- /* Sidebar Header */
168
- .sidebar-header {
169
- text-align: center;
170
- padding: 1.5rem 1rem;
171
- border-bottom: 2px solid rgba(255, 255, 255, 0.15);
172
- margin-bottom: 2rem;
173
- }
174
-
175
- .sidebar-header h2 {
176
- color: white !important;
177
- font-size: 1.4rem !important;
178
- font-weight: 700 !important;
179
- margin: 0 !important;
180
- line-height: 1.4 !important;
181
- text-shadow: 2px 2px 4px rgba(0, 0, 0, 0.3);
182
- }
183
-
184
- .sidebar-header p {
185
- color: rgba(255, 255, 255, 0.8) !important;
186
- font-size: 0.85rem !important;
187
- margin: 0.5rem 0 0 0 !important;
188
- font-weight: 400 !important;
189
- }
190
-
191
- /* Sidebar Stats */
192
- .sidebar-stats {
193
- background: rgba(255, 255, 255, 0.1);
194
- border-radius: 12px;
195
- padding: 1.5rem;
196
- margin: 2rem 0;
197
- backdrop-filter: blur(10px);
198
- border: 1px solid rgba(255, 255, 255, 0.2);
199
- }
200
-
201
- .sidebar-stats h3 {
202
- color: white !important;
203
- font-size: 1.1rem !important;
204
- font-weight: 600 !important;
205
- margin-bottom: 1rem !important;
206
- text-align: center !important;
207
- }
208
-
209
- .stat-item {
210
- display: flex;
211
- justify-content: space-between;
212
- align-items: center;
213
- padding: 0.5rem 0;
214
- border-bottom: 1px solid rgba(255, 255, 255, 0.1);
215
- direction: rtl;
216
- }
217
-
218
- .stat-item:last-child {
219
- border-bottom: none;
220
- }
221
-
222
- .stat-label {
223
- color: rgba(255, 255, 255, 0.8) !important;
224
- font-size: 0.9rem;
225
- font-weight: 400;
226
- }
227
-
228
- .stat-value {
229
- color: white !important;
230
- font-size: 1rem;
231
- font-weight: 600;
232
- }
233
-
234
- /* Main Header */
235
- .main-header {
236
- background: var(--primary-gradient);
237
- padding: 3rem 2.5rem;
238
- border-radius: var(--border-radius);
239
- color: var(--white);
240
- margin-bottom: 2.5rem;
241
- text-align: center;
242
- box-shadow: var(--shadow-heavy);
243
- position: relative;
244
- overflow: hidden;
245
- }
246
-
247
- .main-header::before {
248
- content: '';
249
- position: absolute;
250
- top: -50%;
251
- left: -50%;
252
- width: 200%;
253
- height: 200%;
254
- background: linear-gradient(45deg, transparent, rgba(255,255,255,0.1), transparent);
255
- transform: rotate(45deg);
256
- animation: shimmer 4s infinite;
257
- }
258
-
259
- @keyframes shimmer {
260
- 0% { transform: translateX(-100%) translateY(-100%) rotate(45deg); }
261
- 100% { transform: translateX(100%) translateY(100%) rotate(45deg); }
262
- }
263
-
264
- .main-header h1 {
265
- font-family: 'Yekan Bakh', 'Vazirmatn', sans-serif !important;
266
- font-size: 2.5rem;
267
- margin-bottom: 1rem;
268
- font-weight: 700;
269
- text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
270
- line-height: 1.3;
271
- letter-spacing: -0.5px;
272
- }
273
-
274
- .main-header p {
275
- font-size: 1.2rem;
276
- opacity: 0.9;
277
- margin: 0;
278
- line-height: 1.6;
279
- font-weight: 400;
280
- }
281
-
282
- /* Typography Improvements */
283
- h1, h2, h3, h4, h5, h6 {
284
- font-family: 'Yekan Bakh', 'Vazirmatn', sans-serif !important;
285
- font-weight: 600;
286
- line-height: 1.4;
287
- color: var(--text-primary);
288
- margin-bottom: 1.2rem;
289
- }
290
-
291
- h1 { font-size: 2.2rem; font-weight: 700; }
292
- h2 { font-size: 1.8rem; font-weight: 600; }
293
- h3 { font-size: 1.5rem; font-weight: 600; }
294
- h4 { font-size: 1.3rem; font-weight: 500; }
295
-
296
- p, div, span, li {
297
- font-family: 'Vazirmatn', sans-serif !important;
298
- line-height: 1.8;
299
- color: var(--text-secondary);
300
- font-weight: 400;
301
- }
302
-
303
- /* Strong Text */
304
- strong, b {
305
- font-weight: 600 !important;
306
- color: var(--text-primary);
307
- }
308
-
309
- /* Metric Cards */
310
- .metric-card {
311
- background: var(--primary-gradient);
312
- padding: 2.5rem 2rem;
313
- border-radius: var(--border-radius);
314
- color: var(--white);
315
- text-align: center;
316
- margin: 1rem 0;
317
- box-shadow: var(--shadow-medium);
318
- transition: var(--transition);
319
- position: relative;
320
- overflow: hidden;
321
- border: 1px solid rgba(255, 255, 255, 0.1);
322
- }
323
-
324
- .metric-card::before {
325
- content: '';
326
- position: absolute;
327
- top: 0;
328
- left: -100%;
329
- width: 100%;
330
- height: 100%;
331
- background: linear-gradient(90deg, transparent, rgba(255,255,255,0.2), transparent);
332
- transition: left 0.6s ease;
333
- }
334
-
335
- .metric-card:hover::before {
336
- left: 100%;
337
- }
338
-
339
- .metric-card:hover {
340
- transform: translateY(-8px) scale(1.02);
341
- box-shadow: 0 20px 40px rgba(102, 126, 234, 0.3);
342
- }
343
-
344
- .metric-value {
345
- font-family: 'Yekan Bakh', sans-serif !important;
346
- font-size: 3rem;
347
- font-weight: 800;
348
- margin: 0.8rem 0;
349
- text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
350
- line-height: 1;
351
- }
352
-
353
- .metric-label {
354
- font-size: 1rem;
355
- opacity: 0.9;
356
- font-weight: 500;
357
- letter-spacing: 0.5px;
358
- line-height: 1.4;
359
- }
360
-
361
- .metric-subtitle {
362
- font-size: 0.85rem;
363
- opacity: 0.7;
364
- font-weight: 400;
365
- margin-top: 0.5rem;
366
- }
367
-
368
- /* Feature Cards */
369
- .feature-card {
370
- background: rgba(255, 255, 255, 0.95);
371
- backdrop-filter: blur(20px);
372
- padding: 2.5rem;
373
- border-radius: var(--border-radius);
374
- box-shadow: var(--shadow-light);
375
- margin: 2rem 0;
376
- border-right: 4px solid #667eea;
377
- transition: var(--transition);
378
- position: relative;
379
- border: 1px solid rgba(102, 126, 234, 0.1);
380
- }
381
-
382
- .feature-card:hover {
383
- transform: translateY(-5px);
384
- box-shadow: var(--shadow-medium);
385
- border-right-width: 6px;
386
- }
387
-
388
- .feature-card::before {
389
- content: '';
390
- position: absolute;
391
- top: 0;
392
- left: 0;
393
- right: 0;
394
- height: 3px;
395
- background: var(--primary-gradient);
396
- border-radius: var(--border-radius) var(--border-radius) 0 0;
397
- }
398
-
399
- /* Status Indicators */
400
- .status-indicator {
401
- display: inline-flex;
402
- align-items: center;
403
- padding: 0.5rem 1.2rem;
404
- border-radius: 25px;
405
- font-size: 0.9rem;
406
- font-weight: 500;
407
- margin: 0.4rem 0.3rem;
408
- box-shadow: var(--shadow-light);
409
- transition: var(--transition);
410
- font-family: 'Vazirmatn', sans-serif !important;
411
- }
412
-
413
- .status-indicator:hover {
414
- transform: translateY(-2px);
415
- box-shadow: var(--shadow-medium);
416
- }
417
-
418
- .status-success { background: #10b981; color: white; }
419
- .status-warning { background: #f59e0b; color: white; }
420
- .status-info { background: #3b82f6; color: white; }
421
- .status-error { background: #ef4444; color: white; }
422
-
423
- /* Buttons */
424
- .stButton > button {
425
- background: var(--primary-gradient) !important;
426
- color: white !important;
427
- border: none !important;
428
- border-radius: 12px !important;
429
- padding: 0.8rem 2.5rem !important;
430
- font-weight: 600 !important;
431
- font-family: 'Vazirmatn', sans-serif !important;
432
- font-size: 1rem !important;
433
- transition: var(--transition) !important;
434
- box-shadow: var(--shadow-light) !important;
435
- line-height: 1.4 !important;
436
- text-align: center !important;
437
- }
438
-
439
- .stButton > button:hover {
440
- transform: translateY(-3px) !important;
441
- box-shadow: var(--shadow-medium) !important;
442
- }
443
-
444
- .stButton > button:active {
445
- transform: translateY(-1px) !important;
446
- }
447
-
448
- /* Responsive Design */
449
- @media (max-width: 1024px) {
450
- :root { --sidebar-width: 250px; }
451
-
452
- .main {
453
- padding-left: var(--sidebar-width);
454
- }
455
-
456
- .css-1d391kg {
457
- width: var(--sidebar-width);
458
- }
459
- }
460
-
461
- @media (max-width: 768px) {
462
- :root { --sidebar-width: 0px; }
463
-
464
- .main {
465
- padding-left: 1rem !important;
466
- padding-right: 1rem !important;
467
- }
468
-
469
- .css-1d391kg {
470
- position: relative !important;
471
- width: 100% !important;
472
- height: auto !important;
473
- margin-bottom: 1rem;
474
- }
475
-
476
- .main-header h1 {
477
- font-size: 1.8rem;
478
- }
479
-
480
- .main-header p {
481
- font-size: 1rem;
482
- }
483
-
484
- .metric-value {
485
- font-size: 2.2rem;
486
- }
487
- }
488
-
489
- /* Hide Streamlit Elements */
490
- #MainMenu { visibility: hidden; }
491
- footer { visibility: hidden; }
492
- header { visibility: hidden; }
493
- .stDeployButton { display: none; }
494
-
495
- /* Custom Scrollbar */
496
- ::-webkit-scrollbar {
497
- width: 8px;
498
- height: 8px;
499
- }
500
-
501
- ::-webkit-scrollbar-track {
502
- background: rgba(255, 255, 255, 0.1);
503
- border-radius: 4px;
504
- }
505
-
506
- ::-webkit-scrollbar-thumb {
507
- background: rgba(255, 255, 255, 0.3);
508
- border-radius: 4px;
509
- }
510
-
511
- ::-webkit-scrollbar-thumb:hover {
512
- background: rgba(255, 255, 255, 0.5);
513
- }
514
-
515
- /* Loading States */
516
- .stSpinner > div {
517
- border-top-color: #667eea !important;
518
- }
519
-
520
- /* Alerts */
521
- .stAlert {
522
- direction: rtl !important;
523
- text-align: right !important;
524
- font-family: 'Vazirmatn', sans-serif !important;
525
- border-radius: 12px !important;
526
- font-weight: 500 !important;
527
- }
528
-
529
- /* Footer in Sidebar */
530
- .sidebar-footer {
531
- position: absolute;
532
- bottom: 1rem;
533
- left: 1rem;
534
- right: 1rem;
535
- text-align: center;
536
- padding: 1rem;
537
- background: rgba(255, 255, 255, 0.1);
538
- border-radius: 12px;
539
- backdrop-filter: blur(10px);
540
- }
541
-
542
- .sidebar-footer p {
543
- color: rgba(255, 255, 255, 0.7) !important;
544
- font-size: 0.8rem !important;
545
- margin: 0 !important;
546
- line-height: 1.4 !important;
547
- }
548
- </style>
549
- """, unsafe_allow_html=True)
550
-
551
- # Enhanced Database Manager for Hugging Face Environment
552
- class DatabaseManager:
553
- def __init__(self, db_path: str = None):
554
- """Initialize DatabaseManager optimized for Hugging Face Spaces"""
555
- self.logger = logging.getLogger(__name__)
556
-
557
- # For Hugging Face Spaces, use temp storage explicitly
558
- if db_path:
559
- self.db_path = db_path
560
- else:
561
- # Use /tmp/ directly for Hugging Face Spaces
562
- self.db_path = "/tmp/iran_legal.db"
563
- self.logger.info(f"🗄️ Using Hugging Face temp storage: {self.db_path}")
564
-
565
- self.initialize_database()
566
-
567
- def _find_writable_path(self, paths):
568
- """Find the first writable path"""
569
- for path in paths:
570
- try:
571
- directory = os.path.dirname(path)
572
- if directory and not os.path.exists(directory):
573
- os.makedirs(directory, exist_ok=True)
574
-
575
- # Test write access
576
- test_file = path + ".test"
577
- with open(test_file, 'w') as f:
578
- f.write("test")
579
- os.remove(test_file)
580
-
581
- self.logger.info(f"✅ Found writable path: {path}")
582
- return path
583
-
584
- except (OSError, PermissionError) as e:
585
- self.logger.warning(f"⚠️ Cannot write to {path}: {e}")
586
- continue
587
-
588
- # Fallback
589
- fallback_path = f"/tmp/iran_legal_{int(time.time())}.db"
590
- self.logger.warning(f"⚠️ Using fallback path: {fallback_path}")
591
- return fallback_path
592
-
593
- def initialize_database(self):
594
- """Initialize database with error handling for Hugging Face"""
595
- try:
596
- with sqlite3.connect(self.db_path, timeout=30.0) as conn:
597
- conn.execute("PRAGMA journal_mode=WAL;")
598
- conn.execute("PRAGMA foreign_keys = ON;")
599
-
600
- # Test connection
601
- cursor = conn.cursor()
602
- cursor.execute("SELECT sqlite_version();")
603
- version = cursor.fetchone()[0]
604
- self.logger.info(f"📊 SQLite version: {version}")
605
-
606
- # Create tables
607
- self._create_tables(conn)
608
-
609
- # Add sample data if database is empty
610
- cursor.execute("SELECT COUNT(*) FROM documents")
611
- doc_count = cursor.fetchone()[0]
612
-
613
- if doc_count == 0:
614
- self._add_sample_data(conn)
615
- self.logger.info("📝 Added sample data to empty database")
616
-
617
- self.logger.info("✅ Database initialized successfully")
618
-
619
- except Exception as e:
620
- self.logger.error(f"❌ Database initialization failed: {e}")
621
- # Create minimal fallback database
622
- self._create_fallback_database()
623
-
624
- def _create_fallback_database(self):
625
- """Create a minimal in-memory database as fallback"""
626
- try:
627
- self.db_path = ":memory:"
628
- with sqlite3.connect(self.db_path) as conn:
629
- self._create_tables(conn)
630
- self._add_minimal_sample_data(conn)
631
- self.logger.info("🆘 Created fallback in-memory database")
632
- except Exception as e:
633
- self.logger.error(f"❌ Even fallback database failed: {e}")
634
-
635
- def _create_tables(self, conn):
636
- """Create database tables"""
637
- cursor = conn.cursor()
638
-
639
- # Documents table
640
- cursor.execute("""
641
- CREATE TABLE IF NOT EXISTS documents (
642
- id INTEGER PRIMARY KEY AUTOINCREMENT,
643
- title TEXT NOT NULL,
644
- content TEXT NOT NULL,
645
- source TEXT,
646
- category TEXT,
647
- ai_score REAL DEFAULT 0.0,
648
- keywords TEXT,
649
- created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
650
- file_size INTEGER DEFAULT 0,
651
- language TEXT DEFAULT 'fa'
652
- )
653
- """)
654
-
655
- # Scraped items table
656
- cursor.execute("""
657
- CREATE TABLE IF NOT EXISTS scraped_items (
658
- id TEXT PRIMARY KEY,
659
- url TEXT NOT NULL,
660
- title TEXT,
661
- content TEXT,
662
- domain TEXT,
663
- rating_score REAL DEFAULT 0.0,
664
- word_count INTEGER DEFAULT 0,
665
- created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
666
- status TEXT DEFAULT 'completed'
667
- )
668
- """)
669
-
670
- # Create indexes
671
- cursor.execute("CREATE INDEX IF NOT EXISTS idx_docs_category ON documents(category);")
672
- cursor.execute("CREATE INDEX IF NOT EXISTS idx_docs_score ON documents(ai_score);")
673
- cursor.execute("CREATE INDEX IF NOT EXISTS idx_scraped_domain ON scraped_items(domain);")
674
-
675
- conn.commit()
676
-
677
- def _add_sample_data(self, conn):
678
- """Add comprehensive sample data"""
679
- sample_documents = [
680
- {
681
- 'title': 'قانون اساسی جمهوری اسلامی ایران - اصول کلی',
682
- 'content': '''فصل اول - اصول کلی
683
-
684
- اصل یکم: حکومت ایران، جمهوری اسلامی است که ملت ایران بر اساس ایمان دیرینه‌اش به حاکمیت حق و عدالت قرآن، در انقلاب اسلامی سال ۱۳۵۷ تحت رهبری امام خمینی به آن رأی مثبت داد.
685
-
686
- اصل دوم: جمهوری اسلامی، نظامی است مبتنی بر ایمان به خدای یکتا، حاکمیت و قانون الهی، عدل، ایمان و تقوا.
687
-
688
- اصل سوم: برای نیل به اهداف مندرج در اصل دوم، دولت جمهوری اسلامی ایران موظف است تمام امکانات خود را برای دستیابی به اهداف زیر به کار گیرد:
689
- ۱- ایجاد محیط مساعد برای رشد فضایل اخلاقی
690
- ۲- ارتقاء سطح آگاهی عمومی
691
- ۳- آموزش و پرورش رایگان برای همه''',
692
- 'source': 'قانون اساسی ج.ا.ایران',
693
- 'category': 'قانون',
694
- 'ai_score': 0.95,
695
- 'keywords': json.dumps(['قانون اساسی', 'جمهوری اسلامی', 'حاکمیت']),
696
- 'file_size': 2450,
697
- 'language': 'fa'
698
- },
699
- {
700
- 'title': 'قانون مجازات اسلامی - مقدمات',
701
- 'content': '''باب اول - احکام عمومی
702
-
703
- ماده ۱- مجازات‌ها به اعتبار کیفیت به سه دسته تقسیم می‌شوند:
704
- الف) حدود
705
- ب) قصاص
706
- ج) تعزیرات
707
-
708
- ماده ۲- حدود عبارت است از مجازات‌هایی که نوع و میزان آنها در شرع مقدس تعیین شده است.
709
-
710
- ماده ۳- قصاص عبارت است از مجازاتی که در شرع مقدس تعیین شده و حق العبد محسوب می‌شود.
711
-
712
- ماده ۴- تعزیرات عبارت است از مجازات‌هایی غیر از حدود و قصاص که برای تأدیب مرتکب تعیین می‌شود.''',
713
- 'source': 'قانون مجازات اسلامی',
714
- 'category': 'قانون',
715
- 'ai_score': 0.88,
716
- 'keywords': json.dumps(['مجازات', 'حدود', 'قصاص', 'تعزیرات']),
717
- 'file_size': 1850,
718
- 'language': 'fa'
719
- },
720
- {
721
- 'title': 'نمونه قرارداد خرید و فروش',
722
- 'content': '''قرارداد خرید و فروش
723
-
724
- طرفین قرارداد:
725
- فروشنده: .........................
726
- خریدار: .........................
727
-
728
- ماده ۱- موضوع قرارداد
729
- فروشنده متعهد می‌شود ملک واقع در آدرس مشخص شده را به خریدار منتقل نماید.
730
-
731
- ماده ۲- مبلغ قرارداد
732
- مبلغ کل قرارداد ................ ریال می‌باشد.
733
-
734
- ماده ۳- تعهدات طرفین
735
- فروشنده متعهد است اسناد مالکیت را تحویل دهد.
736
- خریدار متعهد است مبلغ قرارداد را پرداخت نماید.''',
737
- 'source': 'نمونه قرارداد',
738
- 'category': 'قرارداد',
739
- 'ai_score': 0.75,
740
- 'keywords': json.dumps(['قرارداد', 'خرید', 'فروش', 'طرفین']),
741
- 'file_size': 1200,
742
- 'language': 'fa'
743
- }
744
- ]
745
-
746
- for doc in sample_documents:
747
- conn.execute("""
748
- INSERT INTO documents (title, content, source, category, ai_score, keywords, file_size, language)
749
- VALUES (?, ?, ?, ?, ?, ?, ?, ?)
750
- """, (
751
- doc['title'], doc['content'], doc['source'], doc['category'],
752
- doc['ai_score'], doc['keywords'], doc['file_size'], doc['language']
753
- ))
754
-
755
- sample_scraped = [
756
- {
757
- 'id': 'sample_hf_001',
758
- 'url': 'https://dastour.ir/sample',
759
- 'title': 'نمونه محتوای قانونی',
760
- 'content': 'این یک نمونه محتوای قانونی است که از وب‌سایت‌های معتبر جمع‌آوری شده است.',
761
- 'domain': 'dastour.ir',
762
- 'rating_score': 0.85,
763
- 'word_count': 25,
764
- 'status': 'completed'
765
- }
766
- ]
767
-
768
- for item in sample_scraped:
769
- conn.execute("""
770
- INSERT INTO scraped_items (id, url, title, content, domain, rating_score, word_count, status)
771
- VALUES (?, ?, ?, ?, ?, ?, ?, ?)
772
- """, (
773
- item['id'], item['url'], item['title'], item['content'],
774
- item['domain'], item['rating_score'], item['word_count'], item['status']
775
- ))
776
-
777
- conn.commit()
778
-
779
- def _add_minimal_sample_data(self, conn):
780
- """Add minimal sample data for fallback"""
781
- conn.execute("""
782
- INSERT INTO documents (title, content, source, category, ai_score, keywords, file_size, language)
783
- VALUES (?, ?, ?, ?, ?, ?, ?, ?)
784
- """, (
785
- 'نمونه سند قانونی',
786
- 'این یک نمونه سند قانونی برای نمایش عملکرد سیستم است.',
787
- 'نمونه',
788
- 'قانون',
789
- 0.5,
790
- json.dumps(['نمونه', 'قانون']),
791
- 100,
792
- 'fa'
793
- ))
794
- conn.commit()
795
-
796
- @contextmanager
797
- def get_connection(self):
798
- """Get database connection with error handling"""
799
- conn = None
800
- try:
801
- conn = sqlite3.connect(self.db_path, timeout=10.0)
802
- conn.row_factory = sqlite3.Row
803
- yield conn
804
- except Exception as e:
805
- self.logger.error(f"Database connection error: {e}")
806
- raise
807
- finally:
808
- if conn:
809
- conn.close()
810
-
811
- def get_statistics(self) -> Dict:
812
- """Get database statistics"""
813
- stats = {
814
- 'total_documents': 0,
815
- 'total_scraped': 0,
816
- 'avg_ai_score': 0.0,
817
- 'avg_rating': 0.0,
818
- 'categories': {}
819
- }
820
-
821
- try:
822
- with self.get_connection() as conn:
823
- cursor = conn.execute("SELECT COUNT(*) FROM documents")
824
- stats['total_documents'] = cursor.fetchone()[0]
825
-
826
- cursor = conn.execute("SELECT AVG(ai_score) FROM documents WHERE ai_score > 0")
827
- result = cursor.fetchone()[0]
828
- stats['avg_ai_score'] = result if result else 0.0
829
-
830
- cursor = conn.execute("SELECT COUNT(*) FROM scraped_items")
831
- stats['total_scraped'] = cursor.fetchone()[0]
832
-
833
- cursor = conn.execute("SELECT AVG(rating_score) FROM scraped_items WHERE rating_score > 0")
834
- result = cursor.fetchone()[0]
835
- stats['avg_rating'] = result if result else 0.0
836
-
837
- cursor = conn.execute("""
838
- SELECT category, COUNT(*)
839
- FROM documents
840
- WHERE category IS NOT NULL
841
- GROUP BY category
842
- """)
843
- stats['categories'] = dict(cursor.fetchall())
844
-
845
- except Exception as e:
846
- self.logger.error(f"Error getting statistics: {e}")
847
-
848
- return stats
849
-
850
- def get_documents(self, limit: int = 100) -> List[Dict]:
851
- """Get documents from database"""
852
- try:
853
- with self.get_connection() as conn:
854
- cursor = conn.execute("""
855
- SELECT * FROM documents
856
- ORDER BY created_at DESC
857
- LIMIT ?
858
- """, (limit,))
859
- return [dict(row) for row in cursor.fetchall()]
860
- except Exception as e:
861
- self.logger.error(f"Error getting documents: {e}")
862
- return []
863
-
864
- def get_scraped_items(self, limit: int = 100) -> List[Dict]:
865
- """Get scraped items from database"""
866
- try:
867
- with self.get_connection() as conn:
868
- cursor = conn.execute("""
869
- SELECT * FROM scraped_items
870
- ORDER BY created_at DESC
871
- LIMIT ?
872
- """, (limit,))
873
- return [dict(row) for row in cursor.fetchall()]
874
- except Exception as e:
875
- self.logger.error(f"Error getting scraped items: {e}")
876
- return []
877
-
878
- # Simplified AI Analysis Engine for Hugging Face
879
- class AIAnalysisEngine:
880
- def __init__(self):
881
- self.legal_keywords = {
882
- 'قانون': ['قانون', 'ماده', 'تبصره', 'بند', 'فصل', 'باب'],
883
- 'قرارداد': ['قرارداد', 'عقد', 'طرفین', 'متعاهدین'],
884
- 'حکم': ['حکم', 'رای', 'دادگاه', 'قاضی'],
885
- 'اداری': ['اداره', 'سازمان', 'وزارت', 'دولت']
886
- }
887
-
888
- def analyze_text(self, text: str, title: str = "") -> Dict:
889
- """Analyze text and return analysis"""
890
- if not text:
891
- return {'ai_score': 0.0, 'category': 'نامشخص', 'keywords': []}
892
-
893
- quality_score = self._calculate_quality_score(text)
894
- category = self._predict_category(text + " " + title)
895
- keywords = self._extract_keywords(text)
896
-
897
- return {
898
- 'ai_score': quality_score,
899
- 'category': category,
900
- 'keywords': keywords,
901
- 'word_count': len(text.split()),
902
- 'char_count': len(text)
903
- }
904
-
905
- def _calculate_quality_score(self, text: str) -> float:
906
- """Calculate text quality score"""
907
- score = 0.0
908
- word_count = len(text.split())
909
-
910
- # Length scoring
911
- if 50 <= word_count <= 5000:
912
- score += 0.3
913
- elif word_count >= 20:
914
- score += 0.1
915
-
916
- # Legal terms scoring
917
- legal_term_count = 0
918
- for category_terms in self.legal_keywords.values():
919
- for term in category_terms:
920
- legal_term_count += text.count(term)
921
-
922
- if legal_term_count >= 3:
923
- score += 0.4
924
- elif legal_term_count >= 1:
925
- score += 0.2
926
-
927
- # Persian language bonus
928
- persian_ratio = len(re.findall(r'[\u0600-\u06FF]', text)) / max(len(text), 1)
929
- if persian_ratio > 0.5:
930
- score += 0.3
931
-
932
- return min(score, 1.0)
933
-
934
- def _predict_category(self, text: str) -> str:
935
- """Predict document category"""
936
- text_lower = text.lower()
937
- category_scores = {}
938
-
939
- for category, keywords in self.legal_keywords.items():
940
- score = sum(text_lower.count(keyword) for keyword in keywords)
941
- category_scores[category] = score
942
-
943
- if category_scores:
944
- best_category = max(category_scores, key=category_scores.get)
945
- return best_category if category_scores[best_category] > 0 else 'عمومی'
946
-
947
- return 'عمومی'
948
-
949
- def _extract_keywords(self, text: str, max_keywords: int = 5) -> List[str]:
950
- """Extract keywords from text"""
951
- words = re.findall(r'[\u0600-\u06FF]{3,}', text)
952
- word_freq = {}
953
-
954
- for word in words:
955
- if len(word) > 2:
956
- word_freq[word] = word_freq.get(word, 0) + 1
957
-
958
- sorted_words = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)
959
- return [word[0] for word in sorted_words[:max_keywords]]
960
-
961
- # UI Helper Functions
962
- def show_status_message(message: str, status_type: str = "info"):
963
- """Show styled status message"""
964
- status_class = f"status-{status_type}"
965
- st.markdown(f'<div class="status-indicator {status_class}">{message}</div>', unsafe_allow_html=True)
966
-
967
- def create_metric_card(title: str, value: str, subtitle: str = ""):
968
- """Create a beautiful metric card with enhanced typography"""
969
- return f"""
970
- <div class="metric-card">
971
- <div class="metric-label">{title}</div>
972
- <div class="metric-value">{value}</div>
973
- {f'<div class="metric-subtitle">{subtitle}</div>' if subtitle else ''}
974
- </div>
975
- """
976
-
977
- # Initialize services for Hugging Face
978
- @st.cache_resource
979
- def initialize_services():
980
- """Initialize services optimized for Hugging Face"""
981
  try:
982
- db_manager = DatabaseManager()
983
- ai_engine = AIAnalysisEngine()
984
-
985
- # Check database health
986
- stats = db_manager.get_statistics()
987
-
988
- if stats['total_documents'] > 0:
989
- st.success(f"✅ Database loaded successfully ({stats['total_documents']} documents)")
990
- else:
991
- st.warning("⚠️ Database is empty, using fallback data")
992
-
993
- return db_manager, ai_engine
994
-
995
- except Exception as e:
996
- st.error(f"An error occurred: {e}")
 
1
  #!/usr/bin/env python3
2
  """
3
+ Iran Legal Information Dashboard - src/streamlit_app.py
4
+ ====================================================
5
+ Main application file for src directory structure
6
  """
7
 
8
+ # Import the main application from the parent directory or current directory
9
+ import sys
 
 
 
10
  import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
+ # Add current directory to path for imports
13
+ current_dir = os.path.dirname(os.path.abspath(__file__))
14
+ sys.path.insert(0, current_dir)
15
 
16
+ try:
17
+ # Try to import from current directory
18
+ from app import main
19
+ except ImportError:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  try:
21
+ # Try to import from parent directory
22
+ parent_dir = os.path.dirname(current_dir)
23
+ sys.path.insert(0, parent_dir)
24
+ from app import main
25
+ except ImportError:
26
+ import streamlit as st
27
+ st.error("❌ Could not import main application. Please ensure app.py is in the correct location.")
28
+ st.stop()
29
+
30
+ if __name__ == "__main__":
31
+ main()