SHAFI commited on
Commit
f0a6b9e
Β·
1 Parent(s): 1c9809b

chore: Sync backend code with latest changes

Browse files

- Updated database cleanup and maintenance scripts
- Maintained scheduler service configuration
- Backend ready for production deployment

app/services/appwrite_db.py CHANGED
@@ -276,8 +276,9 @@ class AppwriteDatabase:
276
  # Get counts by category
277
  categories = [
278
  "ai", "data-security", "data-governance", "data-privacy",
279
- "data-engineering", "business-intelligence", "business-analytics",
280
- "customer-data-platform", "data-centers", "cloud-computing", "magazines"
 
281
  ]
282
 
283
  articles_by_category = {}
 
276
  # Get counts by category
277
  categories = [
278
  "ai", "data-security", "data-governance", "data-privacy",
279
+ "data-engineering", "data-management", "business-intelligence",
280
+ "business-analytics", "customer-data-platform", "data-centers",
281
+ "cloud-computing", "magazines"
282
  ]
283
 
284
  articles_by_category = {}
app/services/scheduler.py CHANGED
@@ -28,6 +28,7 @@ CATEGORIES = [
28
  "data-governance",
29
  "data-privacy",
30
  "data-engineering",
 
31
  "business-intelligence",
32
  "business-analytics",
33
  "customer-data-platform",
 
28
  "data-governance",
29
  "data-privacy",
30
  "data-engineering",
31
+ "data-management",
32
  "business-intelligence",
33
  "business-analytics",
34
  "customer-data-platform",
check_database_status.py ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Database Status Checker
3
+ Provides comprehensive diagnostics for Appwrite database and scheduler status
4
+ """
5
+
6
+ import asyncio
7
+ import sys
8
+ import os
9
+ from datetime import datetime, timedelta
10
+
11
+ # Add parent directory to path
12
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
13
+
14
+ async def main():
15
+ from app.services.appwrite_db import get_appwrite_db
16
+ from app.services.scheduler import scheduler
17
+ from app.config import settings
18
+ from appwrite.query import Query
19
+
20
+ print("=" * 80)
21
+ print("πŸ” DATABASE & SCHEDULER STATUS REPORT")
22
+ print(f"πŸ“… Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
23
+ print("=" * 80)
24
+ print("")
25
+
26
+ # ===== PART 1: Appwrite Connection =====
27
+ print("━" * 80)
28
+ print("πŸ“Š PART 1: APPWRITE DATABASE CONNECTION")
29
+ print("━" * 80)
30
+
31
+ appwrite_db = get_appwrite_db()
32
+ print(f"βœ“ Initialized: {appwrite_db.initialized}")
33
+ print(f"βœ“ Endpoint: {settings.APPWRITE_ENDPOINT}")
34
+ print(f"βœ“ Database ID: {settings.APPWRITE_DATABASE_ID}")
35
+ print(f"βœ“ Collection ID: {settings.APPWRITE_COLLECTION_ID}")
36
+ print("")
37
+
38
+ if not appwrite_db.initialized:
39
+ print("❌ ERROR: Appwrite is not initialized!")
40
+ print("πŸ’‘ Check your .env file for valid credentials")
41
+ return
42
+
43
+ # ===== PART 2: Total Article Count =====
44
+ print("━" * 80)
45
+ print("πŸ“ˆ PART 2: TOTAL ARTICLE COUNT")
46
+ print("━" * 80)
47
+
48
+ try:
49
+ stats = await appwrite_db.get_stats()
50
+ total_articles = stats.get('total_articles', 0)
51
+
52
+ print(f"πŸ”’ TOTAL ARTICLES IN DATABASE: {total_articles:,}")
53
+ print("")
54
+
55
+ # ===== PART 3: Articles by Category =====
56
+ print("━" * 80)
57
+ print("πŸ“‹ PART 3: ARTICLES BY CATEGORY")
58
+ print("━" * 80)
59
+
60
+ articles_by_category = stats.get('articles_by_category', {})
61
+ for category, count in sorted(articles_by_category.items(), key=lambda x: x[1], reverse=True):
62
+ print(f" {category:30s} : {count:6,} articles")
63
+ print("")
64
+
65
+ except Exception as e:
66
+ print(f"❌ Error getting stats: {e}")
67
+ print("")
68
+
69
+ # ===== PART 4: Age Analysis =====
70
+ print("━" * 80)
71
+ print("⏰ PART 4: ARTICLE AGE ANALYSIS")
72
+ print("━" * 80)
73
+
74
+ try:
75
+ # Articles older than 48 hours (should be deleted by cleanup)
76
+ cutoff_48h = (datetime.now() - timedelta(hours=48)).isoformat()
77
+ response_48h = appwrite_db.databases.list_documents(
78
+ database_id=settings.APPWRITE_DATABASE_ID,
79
+ collection_id=settings.APPWRITE_COLLECTION_ID,
80
+ queries=[
81
+ Query.less_than('published_at', cutoff_48h),
82
+ Query.limit(1)
83
+ ]
84
+ )
85
+ old_articles = response_48h['total']
86
+
87
+ # Articles from last 24 hours (recent)
88
+ cutoff_24h = (datetime.now() - timedelta(hours=24)).isoformat()
89
+ response_24h = appwrite_db.databases.list_documents(
90
+ database_id=settings.APPWRITE_DATABASE_ID,
91
+ collection_id=settings.APPWRITE_COLLECTION_ID,
92
+ queries=[
93
+ Query.greater_than('published_at', cutoff_24h),
94
+ Query.limit(1)
95
+ ]
96
+ )
97
+ recent_articles = response_24h['total']
98
+
99
+ print(f"πŸ“… Articles older than 48 hours: {old_articles:,}")
100
+ print(f" ⚠️ These SHOULD be cleaned up by the scheduler")
101
+ print("")
102
+ print(f"πŸ†• Articles from last 24 hours: {recent_articles:,}")
103
+ print(f" βœ“ These are fresh articles")
104
+ print("")
105
+
106
+ if old_articles > 0:
107
+ print("⚠️ WARNING: Old articles detected!")
108
+ print(f" The scheduler should delete {min(old_articles, 500)} articles on next run")
109
+ print("")
110
+ else:
111
+ print("βœ… GOOD: No articles older than 48 hours found")
112
+ print(" Cleanup scheduler is working correctly!")
113
+ print("")
114
+
115
+ except Exception as e:
116
+ print(f"❌ Error analyzing article age: {e}")
117
+ print("")
118
+
119
+ # ===== PART 5: Scheduler Status =====
120
+ print("━" * 80)
121
+ print("⏰ PART 5: SCHEDULER STATUS")
122
+ print("━" * 80)
123
+
124
+ print(f"πŸ”„ Scheduler Running: {scheduler.running}")
125
+ print("")
126
+
127
+ jobs = scheduler.get_jobs()
128
+ if jobs:
129
+ print(f"πŸ“‹ Registered Jobs: {len(jobs)}")
130
+ print("")
131
+ for job in jobs:
132
+ print(f" πŸ”Ή {job.name}")
133
+ print(f" ID: {job.id}")
134
+ print(f" Next Run: {job.next_run_time}")
135
+ print(f" Trigger: {job.trigger}")
136
+ print("")
137
+ else:
138
+ print("⚠️ No jobs registered")
139
+ print("πŸ’‘ The scheduler might not have started yet")
140
+ print("")
141
+
142
+ # ===== PART 6: Cleanup Efficiency =====
143
+ print("━" * 80)
144
+ print("🧹 PART 6: CLEANUP SCHEDULER EFFICIENCY")
145
+ print("━" * 80)
146
+
147
+ print("πŸ“Š Cleanup Configuration:")
148
+ print(f" β€’ Retention Policy: 48 hours (articles older than this are deleted)")
149
+ print(f" β€’ Cleanup Frequency: Every 6 hours (00:00, 06:00, 12:00, 18:00 UTC)")
150
+ print(f" β€’ Cleanup Capacity: 500 articles per run")
151
+ print(f" β€’ Daily Cleanup Limit: 2,000 articles/day")
152
+ print("")
153
+
154
+ if old_articles > 0:
155
+ days_to_clear = (old_articles / 2000)
156
+ print(f"⏳ Estimated time to clear {old_articles:,} old articles:")
157
+ print(f" {days_to_clear:.1f} days at current cleanup rate")
158
+ print("")
159
+
160
+ if days_to_clear > 3:
161
+ print("⚠️ WARNING: Cleanup is falling behind!")
162
+ print("πŸ’‘ Recommendations:")
163
+ print(" 1. Run manual cleanup: python test_cleanup.py")
164
+ print(" 2. Consider increasing cleanup limit in scheduler.py")
165
+ print(" 3. Reduce article retention to 24 hours instead of 48")
166
+ print("")
167
+
168
+ # ===== PART 7: Why Platform Shows Fewer Articles =====
169
+ print("━" * 80)
170
+ print("πŸ” PART 7: WHY PLATFORM SHOWS FEWER ARTICLES")
171
+ print("━" * 80)
172
+
173
+ print(f"πŸ’‘ Your database has {total_articles:,} articles")
174
+ print(f" BUT your API is configured to show only 20 articles per category")
175
+ print("")
176
+ print("πŸ“ Location: app/routes/news.py:49")
177
+ print(" Code: db_articles = await appwrite_db.get_articles(category, limit=20)")
178
+ print("")
179
+ print("This is INTENTIONAL for:")
180
+ print(" βœ“ Fast response times")
181
+ print(" βœ“ Better user experience")
182
+ print(" βœ“ Reduced bandwidth usage")
183
+ print("")
184
+ print("If you want to show more articles, you can:")
185
+ print(" 1. Increase the limit parameter in the API")
186
+ print(" 2. Implement pagination to load more articles on demand")
187
+ print("")
188
+
189
+ print("=" * 80)
190
+ print("βœ… REPORT COMPLETE")
191
+ print("=" * 80)
192
+
193
+ if __name__ == "__main__":
194
+ asyncio.run(main())