segmentopulse-backend / scripts /migrate_article_fields.py
SHAFI
chore: Latest backend updates and improvements
3619409
"""
Migration Script: Backfill Slug and Quality Score
Adds missing fields to existing articles in Appwrite
Run this once to update all existing articles with:
- slug: SEO-friendly URL slug
- quality_score: Article quality ranking (0-100)
"""
import asyncio
from appwrite.client import Client
from appwrite.services.databases import Databases
from appwrite.query import Query
import os
from dotenv import load_dotenv
# Add parent directory to path
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from app.utils.data_validation import generate_slug, calculate_quality_score
# Load environment variables
load_dotenv()
# Appwrite Configuration
APPWRITE_ENDPOINT = os.getenv('APPWRITE_ENDPOINT')
APPWRITE_PROJECT_ID = os.getenv('APPWRITE_PROJECT_ID')
APPWRITE_API_KEY = os.getenv('APPWRITE_API_KEY' )
APPWRITE_DATABASE_ID = os.getenv('APPWRITE_DATABASE_ID')
APPWRITE_COLLECTION_ID = os.getenv('APPWRITE_COLLECTION_ID')
async def migrate_articles():
"""
Migrate existing articles to add slug and quality_score
"""
print("=" * 60)
print("πŸ“Š Appwrite Article Migration Script")
print("=" * 60)
print(f"Database: {APPWRITE_DATABASE_ID}")
print(f"Collection: {APPWRITE_COLLECTION_ID}")
print()
# Initialize Appwrite client
client = Client()
client.set_endpoint(APPWRITE_ENDPOINT)
client.set_project(APPWRITE_PROJECT_ID)
client.set_key(APPWRITE_API_KEY)
databases = Databases(client)
# Fetch all articles (paginated)
offset = 0
limit = 100
total_updated = 0
total_skipped = 0
total_errors = 0
while True:
try:
print(f"πŸ“₯ Fetching articles {offset + 1} to {offset + limit}...")
# Query articles
response = databases.list_documents(
database_id=APPWRITE_DATABASE_ID,
collection_id=APPWRITE_COLLECTION_ID,
queries=[
Query.limit(limit),
Query.offset(offset)
]
)
documents = response['documents']
if not documents:
print("βœ… No more articles to process")
break
print(f"πŸ“ Processing {len(documents)} articles...")
# Update each document
for doc in documents:
try:
doc_id = doc['$id']
title = doc.get('title', '')
# Check if already has slug and quality_score
has_slug = doc.get('slug')
has_quality = doc.get('quality_score') is not None
if has_slug and has_quality:
total_skipped += 1
continue
# Generate missing fields
updates = {}
if not has_slug:
updates['slug'] = generate_slug(title)
if not has_quality:
updates['quality_score'] = calculate_quality_score({
'title': title,
'description': doc.get('description', ''),
'image': doc.get('image_url'),
'source': doc.get('source', '')
})
# Update document
if updates:
databases.update_document(
database_id=APPWRITE_DATABASE_ID,
collection_id=APPWRITE_COLLECTION_ID,
document_id=doc_id,
data=updates
)
total_updated += 1
print(f" βœ“ Updated: {title[:50]}... (score: {updates.get('quality_score', 'N/A')})")
except Exception as e:
total_errors += 1
print(f" βœ— Error updating {doc.get('title', 'unknown')[:30]}: {e}")
continue
# Move to next batch
offset += limit
# Small delay to avoid rate limiting
await asyncio.sleep(0.5)
except Exception as e:
print(f"❌ Error fetching batch at offset {offset}: {e}")
break
# Summary
print()
print("=" * 60)
print("πŸ“Š MIGRATION SUMMARY")
print("=" * 60)
print(f"βœ… Updated: {total_updated} articles")
print(f"⏭️ Skipped: {total_skipped} articles (already have fields)")
print(f"❌ Errors: {total_errors} articles")
print(f"πŸ“ˆ Total Processed: {total_updated + total_skipped + total_errors}")
print("=" * 60)
if __name__ == "__main__":
print("Starting migration...")
asyncio.run(migrate_articles())
print("Migration complete!")