""" Database Utilities (Supabase) ============================= Manages all Supabase operations: deduplication checks and article insertion. Tables used: - `registry` — tracks which article IDs have been processed (deduplication) - `articles` — stores final processed articles with summaries and audio URLs Configuration (required in .env): SUPABASE_URL=https://your-project.supabase.co SUPABASE_KEY=your_service_role_key Usage: from backend.utils.db_utils import DatabaseManager db = DatabaseManager() # Check which articles are already processed existing = db.check_registry(["id1", "id2", "id3"]) # Insert a fully processed article db.insert_article(article_dict) """ import os from supabase import create_client, Client from dotenv import load_dotenv import logging load_dotenv() logger = logging.getLogger(__name__) class DatabaseManager: """Thread-safe Supabase client for article deduplication and storage. If SUPABASE_URL or SUPABASE_KEY are missing, all operations gracefully return empty results / False instead of raising. """ def __init__(self): url: str = os.environ.get("SUPABASE_URL", "").strip() key: str = os.environ.get("SUPABASE_KEY", "").strip() # Auto-correct missing https:// prefix if url and not url.startswith("http"): url = f"https://{url}" if not url or not key or url == "https://": logger.warning("Supabase URL or Key not found. Database operations will be skipped.") self.supabase: Client = None else: try: self.supabase: Client = create_client(url, key) except Exception as e: logger.error(f"Failed to initialize Supabase client: {e}") self.supabase: Client = None def check_registry(self, article_ids: list) -> set: """Check which article IDs are already in the registry table. Args: article_ids: List of article ID strings to check. Returns: Set of IDs that already exist in registry (should be skipped). """ if not self.supabase or not article_ids: return set() try: response = self.supabase.table("registry").select("id").in_("id", article_ids).execute() return {item['id'] for item in response.data} except Exception as e: logger.error(f"Error checking registry: {str(e)}") return set() def insert_article(self, article_data: dict) -> bool: """Insert a processed article into both `articles` and `registry` tables. Uses upsert to handle re-runs gracefully. The article must have an 'id' key. Args: article_data: Dict with keys matching the articles table schema: id, category, title, author, url, content, summary, audio_url, published_date, scraped_at, summary_generated_at Returns: True on success, False on failure or missing Supabase config. """ if not self.supabase: return False try: article_id = article_data.get('id') if not article_id: return False article_record = { "id": article_id, "category": article_data.get('category', ''), "title": article_data.get('title', ''), "author": article_data.get('author', ''), "url": article_data.get('url', ''), "content": article_data.get('content', ''), "summary": article_data.get('summary', ''), "audio_url": article_data.get('audio_url', ''), "published_at": article_data.get('published_date'), "scraped_at": article_data.get('scraped_at'), "summary_generated_at": article_data.get('summary_generated_at') } registry_record = { "id": article_id, "category": article_data.get('category', ''), "title": article_data.get('title', ''), "status": "completed" } self.supabase.table("articles").upsert(article_record).execute() self.supabase.table("registry").upsert(registry_record).execute() logger.debug(f"Successfully saved article {article_id} to database.") return True except Exception as e: logger.error(f"Error inserting article {article_data.get('id')}: {str(e)}") return False