Spaces:

WORKWITHSHAFISK
/

segmentopulse-backend

Paused

segmentopulse-backend / scripts /migrate_article_fields.py

SHAFI

chore: Latest backend updates and improvements

3619409 about 2 months ago

5.09 kB

	"""
	Migration Script: Backfill Slug and Quality Score
	Adds missing fields to existing articles in Appwrite

	Run this once to update all existing articles with:
	- slug: SEO-friendly URL slug
	- quality_score: Article quality ranking (0-100)
	"""

	import asyncio
	from appwrite.client import Client
	from appwrite.services.databases import Databases
	from appwrite.query import Query
	import os
	from dotenv import load_dotenv

	# Add parent directory to path
	import sys
	sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

	from app.utils.data_validation import generate_slug, calculate_quality_score

	# Load environment variables
	load_dotenv()

	# Appwrite Configuration
	APPWRITE_ENDPOINT = os.getenv('APPWRITE_ENDPOINT')
	APPWRITE_PROJECT_ID = os.getenv('APPWRITE_PROJECT_ID')
	APPWRITE_API_KEY = os.getenv('APPWRITE_API_KEY' )
	APPWRITE_DATABASE_ID = os.getenv('APPWRITE_DATABASE_ID')
	APPWRITE_COLLECTION_ID = os.getenv('APPWRITE_COLLECTION_ID')


	async def migrate_articles():
	"""
	Migrate existing articles to add slug and quality_score
	"""
	print("=" * 60)
	print("📊 Appwrite Article Migration Script")
	print("=" * 60)
	print(f"Database: {APPWRITE_DATABASE_ID}")
	print(f"Collection: {APPWRITE_COLLECTION_ID}")
	print()

	# Initialize Appwrite client
	client = Client()
	client.set_endpoint(APPWRITE_ENDPOINT)
	client.set_project(APPWRITE_PROJECT_ID)
	client.set_key(APPWRITE_API_KEY)

	databases = Databases(client)

	# Fetch all articles (paginated)
	offset = 0
	limit = 100
	total_updated = 0
	total_skipped = 0
	total_errors = 0

	while True:
	try:
	print(f"📥 Fetching articles {offset + 1} to {offset + limit}...")

	# Query articles
	response = databases.list_documents(
	database_id=APPWRITE_DATABASE_ID,
	collection_id=APPWRITE_COLLECTION_ID,
	queries=[
	Query.limit(limit),
	Query.offset(offset)
	]
	)

	documents = response['documents']

	if not documents:
	print("✅ No more articles to process")
	break

	print(f"📝 Processing {len(documents)} articles...")

	# Update each document
	for doc in documents:
	try:
	doc_id = doc['$id']
	title = doc.get('title', '')

	# Check if already has slug and quality_score
	has_slug = doc.get('slug')
	has_quality = doc.get('quality_score') is not None

	if has_slug and has_quality:
	total_skipped += 1
	continue

	# Generate missing fields
	updates = {}

	if not has_slug:
	updates['slug'] = generate_slug(title)

	if not has_quality:
	updates['quality_score'] = calculate_quality_score({
	'title': title,
	'description': doc.get('description', ''),
	'image': doc.get('image_url'),
	'source': doc.get('source', '')
	})

	# Update document
	if updates:
	databases.update_document(
	database_id=APPWRITE_DATABASE_ID,
	collection_id=APPWRITE_COLLECTION_ID,
	document_id=doc_id,
	data=updates
	)
	total_updated += 1
	print(f" ✓ Updated: {title[:50]}... (score: {updates.get('quality_score', 'N/A')})")

	except Exception as e:
	total_errors += 1
	print(f" ✗ Error updating {doc.get('title', 'unknown')[:30]}: {e}")
	continue

	# Move to next batch
	offset += limit

	# Small delay to avoid rate limiting
	await asyncio.sleep(0.5)

	except Exception as e:
	print(f"❌ Error fetching batch at offset {offset}: {e}")
	break

	# Summary
	print()
	print("=" * 60)
	print("📊 MIGRATION SUMMARY")
	print("=" * 60)
	print(f"✅ Updated: {total_updated} articles")
	print(f"⏭️ Skipped: {total_skipped} articles (already have fields)")
	print(f"❌ Errors: {total_errors} articles")
	print(f"📈 Total Processed: {total_updated + total_skipped + total_errors}")
	print("=" * 60)


	if __name__ == "__main__":
	print("Starting migration...")
	asyncio.run(migrate_articles())
	print("Migration complete!")