File size: 9,615 Bytes
0a7f9b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159faf0
0a7f9b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159faf0
0a7f9b4
 
 
 
 
 
 
 
 
 
159faf0
0a7f9b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
"""
Monitoring utilities specifically for Render production environment.
"""

import json
import logging
import os
import time
from datetime import datetime, timezone
from typing import Any, Dict, List, Optional, TypedDict

from .memory_utils import (
    clean_memory,
    force_garbage_collection,
    get_memory_usage,
    log_memory_checkpoint,
    memory_summary,
)


class MemorySample(TypedDict):
    """Type definition for memory sample records."""

    timestamp: float
    memory_mb: float
    context: str


class MemoryStatus(TypedDict):
    """Type definition for memory status results."""

    timestamp: str
    memory_mb: float
    peak_memory_mb: float
    context: str
    status: str
    action_taken: Optional[str]
    memory_limit_mb: float


logger = logging.getLogger(__name__)

# Configure these thresholds based on your Render free tier limits
RENDER_MEMORY_LIMIT_MB = 512
RENDER_WARNING_THRESHOLD_MB = 400  # 78% of limit
RENDER_CRITICAL_THRESHOLD_MB = 450  # 88% of limit
RENDER_EMERGENCY_THRESHOLD_MB = 480  # 94% of limit

# Memory metrics tracking
_memory_samples: List[MemorySample] = []
_memory_peak: float = 0.0
_memory_history_limit: int = 1000  # Keep last N samples to avoid unbounded growth
_memory_last_dump_time: float = 0.0


def init_render_monitoring(log_interval: int = 10) -> None:
    """
    Initialize Render-specific monitoring with shorter intervals

    Args:
        log_interval: Seconds between memory log entries
    """
    # Set environment variables for memory monitoring
    os.environ["MEMORY_DEBUG"] = "1"
    os.environ["MEMORY_LOG_INTERVAL"] = str(log_interval)

    logger.info(
        "Initialized Render monitoring with %ds intervals (memory limit: %dMB)",
        log_interval,
        RENDER_MEMORY_LIMIT_MB,
    )

    # Perform initial memory check
    memory_mb = get_memory_usage()
    logger.info("Initial memory: %.1fMB", memory_mb)

    # Record startup metrics
    _record_memory_sample("startup", memory_mb)


def check_render_memory_thresholds(context: str = "periodic") -> MemoryStatus:
    """
    Check current memory against Render thresholds and take action if needed.

    Args:
        context: Label for the check (e.g., "request", "background")

    Returns:
        Dictionary with memory status details
    """
    memory_mb = get_memory_usage()
    _record_memory_sample(context, memory_mb)

    global _memory_peak
    if memory_mb > _memory_peak:
        _memory_peak = memory_mb
        log_memory_checkpoint(f"new_peak_memory_{context}", force=True)

    status = "normal"
    action_taken: Optional[str] = None

    # Progressive response based on severity
    if memory_mb > RENDER_EMERGENCY_THRESHOLD_MB:
        logger.critical(
            "EMERGENCY: Memory usage at %.1fMB - critically close to %.1fMB limit",
            memory_mb,
            RENDER_MEMORY_LIMIT_MB,
        )
        status = "emergency"
        action_taken = "emergency_cleanup"
        # Take emergency action
        clean_memory("emergency")
        force_garbage_collection()

    elif memory_mb > RENDER_CRITICAL_THRESHOLD_MB:
        logger.warning(
            "CRITICAL: Memory usage at %.1fMB - approaching %.1fMB limit",
            memory_mb,
            RENDER_MEMORY_LIMIT_MB,
        )
        status = "critical"
        action_taken = "aggressive_cleanup"
        clean_memory("critical")

    elif memory_mb > RENDER_WARNING_THRESHOLD_MB:
        logger.warning(
            "WARNING: Memory usage at %.1fMB - monitor closely (limit: %.1fMB)",
            memory_mb,
            RENDER_MEMORY_LIMIT_MB,
        )
        status = "warning"
        action_taken = "light_cleanup"
        clean_memory("warning")

    result: MemoryStatus = {
        "timestamp": datetime.now(timezone.utc).isoformat(),  # Timestamp of the check
        "memory_mb": memory_mb,  # Current memory usage
        "peak_memory_mb": _memory_peak,  # Peak memory usage recorded
        "context": context,  # Context of the memory check
        "status": status,  # Current status based on memory usage
        "action_taken": action_taken,  # Action taken if any
        "memory_limit_mb": RENDER_MEMORY_LIMIT_MB,  # Memory limit defined
    }

    # Periodically dump memory metrics to a file in /tmp
    _maybe_dump_memory_metrics()

    return result


def _record_memory_sample(context: str, memory_mb: float) -> None:
    """Record a memory sample with timestamp for trend analysis."""
    global _memory_samples

    sample: MemorySample = {
        "timestamp": time.time(),
        "memory_mb": memory_mb,
        "context": context,
    }

    _memory_samples.append(sample)

    # Prevent unbounded growth by limiting history
    if len(_memory_samples) > _memory_history_limit:
        _memory_samples = _memory_samples[-_memory_history_limit:]


def _maybe_dump_memory_metrics() -> None:
    """Periodically save memory metrics to file for later analysis."""
    global _memory_last_dump_time

    # Only dump once every 5 minutes
    now = time.time()
    if now - _memory_last_dump_time < 300:  # 5 minutes
        return

    try:
        _memory_last_dump_time = now

        # Create directory if it doesn't exist
        dump_dir = "/tmp/render_metrics"
        os.makedirs(dump_dir, exist_ok=True)

        # Generate filename with timestamp
        timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
        filename = f"{dump_dir}/memory_metrics_{timestamp}.json"

        # Dump the samples to a file
        with open(filename, "w") as f:
            json.dump(
                {
                    "samples": _memory_samples,
                    "peak_memory_mb": _memory_peak,
                    "memory_limit_mb": RENDER_MEMORY_LIMIT_MB,
                    "summary": memory_summary(),
                },
                f,
                indent=2,
            )

        logger.info("Memory metrics dumped to %s", filename)

    except Exception as e:
        logger.error("Failed to dump memory metrics: %s", e)


def get_memory_trends() -> Dict[str, Any]:
    """
    Get memory usage trends from collected samples.

    Returns:
        Dictionary with memory trends and statistics
    """
    if not _memory_samples:
        return {"status": "no_data"}

    # Basic statistics
    current = _memory_samples[-1]["memory_mb"] if _memory_samples else 0.0

    # Calculate 5-minute and 1-hour trends if we have enough data
    trends: Dict[str, Any] = {
        "current_mb": current,
        "peak_mb": _memory_peak,
        "samples_count": len(_memory_samples),
    }

    # Calculate trend over last 5 minutes
    recent_samples: List[MemorySample] = [
        s for s in _memory_samples if time.time() - s["timestamp"] < 300
    ]  # Last 5 minutes

    if len(recent_samples) >= 2:
        start_mb: float = recent_samples[0]["memory_mb"]
        end_mb: float = recent_samples[-1]["memory_mb"]
        trends["trend_5min_mb"] = end_mb - start_mb

    # Calculate hourly trend if we have enough data
    hour_samples: List[MemorySample] = [s for s in _memory_samples if time.time() - s["timestamp"] < 3600]  # Last hour

    if len(hour_samples) >= 2:
        start_mb: float = hour_samples[0]["memory_mb"]
        end_mb: float = hour_samples[-1]["memory_mb"]
        trends["trend_1hour_mb"] = end_mb - start_mb

    return trends


def add_memory_middleware(app) -> None:
    """
    Add middleware to Flask app for request-level memory monitoring.

    Args:
        app: Flask application instance
    """
    try:

        @app.before_request
        def check_memory_before_request():
            """Check memory before processing each request."""
            try:
                from flask import request

                try:
                    memory_status = check_render_memory_thresholds(f"request_{request.endpoint}")

                    # If we're in emergency state, reject new requests
                    if memory_status["status"] == "emergency":
                        logger.critical(
                            "Rejecting request due to critical memory usage: %s %.1fMB",
                            request.path,
                            memory_status["memory_mb"],
                        )
                        return {
                            "status": "error",
                            "message": ("Service temporarily unavailable due to " "resource constraints"),
                            "retry_after": 30,  # Suggest retry after 30 seconds
                        }, 503
                except Exception as e:
                    # Don't let memory monitoring failures affect requests
                    logger.debug(f"Memory status check failed: {e}")
            except Exception as e:
                # Catch all other errors to prevent middleware from breaking the app
                logger.debug(f"Memory middleware error: {e}")

        @app.after_request
        def log_memory_after_request(response):
            """Log memory usage after request processing."""
            try:
                memory_mb = get_memory_usage()
                logger.debug("Memory after request: %.1fMB", memory_mb)
            except Exception as e:
                logger.debug(f"After request memory logging failed: {e}")
            return response

    except Exception as e:
        # If we can't even add the middleware, log it but don't crash
        logger.warning(f"Failed to add memory middleware: {e}")

        # Define empty placeholder to avoid errors
        @app.before_request
        def memory_middleware_failed():
            pass