| from modules.db import SheamiDB | |
| db = None # global placeholder | |
| async def deduplicate_trend_data(): | |
| global db | |
| if db is None: | |
| db = SheamiDB() | |
| cursor = db.trends.find({}) | |
| updated_count = 0 | |
| async for doc in cursor: | |
| # Create a dict to keep max value for each date | |
| date_max = {} | |
| for entry in doc.get("trend_data", []): | |
| date = entry["date"] | |
| try: | |
| value = float(entry["value"]) | |
| except ValueError as e: | |
| # Skip this entry because value is not a valid float | |
| print("Error converting str to float ", e) | |
| continue | |
| print("entry = ", entry) | |
| print("date_max = ", date_max) | |
| if date not in date_max or value > float(date_max[date]["value"]): | |
| date_max[date] = entry | |
| deduped_trend_data = list(date_max.values()) | |
| # Only update if changes are made | |
| if len(deduped_trend_data) != len(doc.get("trend_data", [])): | |
| await db.trends.update_one( | |
| {"_id": doc["_id"]}, {"$set": {"trend_data": deduped_trend_data}} | |
| ) | |
| updated_count += 1 | |
| print(f"Updated {updated_count} documents.") | |
| # Usage: | |
| if __name__ == "__main__": | |
| import asyncio | |
| asyncio.run(deduplicate_trend_data()) | |