resumebuilder / cleanup_duplicates.py
sakthi07's picture
pushing to hugging face
ab028ac
from app import app, db, Introduction, ProfileSummary
from sqlalchemy import func
def cleanup_duplicate_records():
"""Clean up duplicate introduction and profile summary records"""
with app.app_context():
# Find duplicate introductions
dup_intros = db.session.query(
Introduction.user_id,
func.count(Introduction.id).label('count')
).group_by(Introduction.user_id).having(func.count(Introduction.id) > 1).all()
print(f"Found {len(dup_intros)} users with duplicate introductions")
# For each user with duplicates, keep the most recent one
for user_id, count in dup_intros:
# Get all introductions for this user, ordered by creation date
intros = Introduction.query.filter_by(user_id=user_id).order_by(Introduction.created_at.desc()).all()
# Keep the first (most recent), delete the rest
for intro in intros[1:]:
print(f"Deleting duplicate introduction for user {user_id}: {intro.id}")
db.session.delete(intro)
# Find duplicate profile summaries
dup_summaries = db.session.query(
ProfileSummary.user_id,
func.count(ProfileSummary.id).label('count')
).group_by(ProfileSummary.user_id).having(func.count(ProfileSummary.id) > 1).all()
print(f"Found {len(dup_summaries)} users with duplicate profile summaries")
# For each user with duplicates, keep the most recent one
for user_id, count in dup_summaries:
# Get all profile summaries for this user, ordered by creation date
summaries = ProfileSummary.query.filter_by(user_id=user_id).order_by(ProfileSummary.created_at.desc()).all()
# Keep the first (most recent), delete the rest
for summary in summaries[1:]:
print(f"Deleting duplicate profile summary for user {user_id}: {summary.id}")
db.session.delete(summary)
# Commit the changes
db.session.commit()
print("Cleanup completed!")
if __name__ == "__main__":
cleanup_duplicate_records()