File size: 4,592 Bytes
2cb327c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
"""
Database Utilities (Supabase)
=============================
Manages all Supabase operations: deduplication checks and article insertion.

Tables used:
- `registry`  — tracks which article IDs have been processed (deduplication)
- `articles`  — stores final processed articles with summaries and audio URLs

Configuration (required in .env):
    SUPABASE_URL=https://your-project.supabase.co
    SUPABASE_KEY=your_service_role_key

Usage:
    from backend.utils.db_utils import DatabaseManager

    db = DatabaseManager()

    # Check which articles are already processed
    existing = db.check_registry(["id1", "id2", "id3"])

    # Insert a fully processed article
    db.insert_article(article_dict)
"""

import os
from supabase import create_client, Client
from dotenv import load_dotenv
import logging

load_dotenv()

logger = logging.getLogger(__name__)


class DatabaseManager:
    """Thread-safe Supabase client for article deduplication and storage.

    If SUPABASE_URL or SUPABASE_KEY are missing, all operations gracefully
    return empty results / False instead of raising.
    """

    def __init__(self):
        url: str = os.environ.get("SUPABASE_URL", "").strip()
        key: str = os.environ.get("SUPABASE_KEY", "").strip()

        # Auto-correct missing https:// prefix
        if url and not url.startswith("http"):
            url = f"https://{url}"

        if not url or not key or url == "https://":
            logger.warning("Supabase URL or Key not found. Database operations will be skipped.")
            self.supabase: Client = None
        else:
            try:
                self.supabase: Client = create_client(url, key)
            except Exception as e:
                logger.error(f"Failed to initialize Supabase client: {e}")
                self.supabase: Client = None

    def check_registry(self, article_ids: list) -> set:
        """Check which article IDs are already in the registry table.

        Args:
            article_ids: List of article ID strings to check.

        Returns:
            Set of IDs that already exist in registry (should be skipped).
        """
        if not self.supabase or not article_ids:
            return set()

        try:
            response = self.supabase.table("registry").select("id").in_("id", article_ids).execute()
            return {item['id'] for item in response.data}
        except Exception as e:
            logger.error(f"Error checking registry: {str(e)}")
            return set()

    def insert_article(self, article_data: dict) -> bool:
        """Insert a processed article into both `articles` and `registry` tables.

        Uses upsert to handle re-runs gracefully. The article must have an 'id' key.

        Args:
            article_data: Dict with keys matching the articles table schema:
                id, category, title, author, url, content, summary,
                audio_url, published_date, scraped_at, summary_generated_at

        Returns:
            True on success, False on failure or missing Supabase config.
        """
        if not self.supabase:
            return False

        try:
            article_id = article_data.get('id')
            if not article_id:
                return False

            article_record = {
                "id": article_id,
                "category": article_data.get('category', ''),
                "title": article_data.get('title', ''),
                "author": article_data.get('author', ''),
                "url": article_data.get('url', ''),
                "content": article_data.get('content', ''),
                "summary": article_data.get('summary', ''),
                "audio_url": article_data.get('audio_url', ''),
                "published_at": article_data.get('published_date'),
                "scraped_at": article_data.get('scraped_at'),
                "summary_generated_at": article_data.get('summary_generated_at')
            }

            registry_record = {
                "id": article_id,
                "category": article_data.get('category', ''),
                "title": article_data.get('title', ''),
                "status": "completed"
            }

            self.supabase.table("articles").upsert(article_record).execute()
            self.supabase.table("registry").upsert(registry_record).execute()

            logger.debug(f"Successfully saved article {article_id} to database.")
            return True

        except Exception as e:
            logger.error(f"Error inserting article {article_data.get('id')}: {str(e)}")
            return False