File size: 6,117 Bytes
857d4f5
 
12dd01e
857d4f5
 
 
 
 
 
 
 
 
 
 
 
7a29d91
857d4f5
 
 
 
12dd01e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
857d4f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7a29d91
 
 
 
 
 
857d4f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7a29d91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
import subprocess
import time
import os
from typing import Optional

import torch
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel

try:
    import psutil
except ImportError:
    psutil = None

from app.services.cache_manager import get_cache_counts, load_blacklist_to_ram
from app.services.moderation_service import run_moderation, run_moderation_batch

router = APIRouter()


@router.get("/system-status")
def system_status():
    if psutil is None:
        raise HTTPException(status_code=500, detail="psutil kurulu değil")

    process = psutil.Process(os.getpid())
    mem = psutil.virtual_memory()
    status = {
        "process_ram_mb": round(process.memory_info().rss / 1024 / 1024, 1),
        "system_ram_total_mb": round(mem.total / 1024 / 1024, 1),
        "system_ram_used_mb": round(mem.used / 1024 / 1024, 1),
        "system_ram_percent": mem.percent,
        "cpu_percent": psutil.cpu_percent(interval=1),
        "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
    }

    # Printed logs are visible in Hugging Face Space logs and local terminal output.
    print(f"[system-status] {status}")
    return status


def get_gpu_info():
    try:
        raw = subprocess.check_output(
            [
                "nvidia-smi",
                "--query-gpu=utilization.gpu,memory.used,memory.total",
                "--format=csv,noheader,nounits",
            ],
            encoding="utf-8",
            stderr=subprocess.STDOUT,
        )
        util, mem_used, mem_total = [p.strip() for p in raw.strip().splitlines()[0].split(",", maxsplit=2)]
        return {
            "load": int(float(util)),
            "vram_used": int(float(mem_used)),
            "vram_total": int(float(mem_total)),
        }
    except Exception:
        if not torch.cuda.is_available():
            return None
        allocated = torch.cuda.memory_allocated(0) / (1024 ** 2)
        total = torch.cuda.get_device_properties(0).total_memory / (1024 ** 2)
        return {
            "load": None,
            "vram_used": int(round(allocated)),
            "vram_total": int(round(total)),
        }


def capture_process_metrics():
    cpu_load = None
    ram_pct = None
    if psutil is not None:
        cpu_load = round(psutil.cpu_percent(interval=0.05), 1)
        ram_pct = round(psutil.virtual_memory().percent, 1)

    gpu = get_gpu_info()
    return {
        "cpu": cpu_load,
        "ram_pct": ram_pct,
        "gpu_load": gpu["load"] if gpu else None,
        "vram_used": gpu["vram_used"] if gpu else 0,
        "vram_total": gpu["vram_total"] if gpu else 0,
        "timestamp": time.strftime("%H:%M:%S"),
    }


class ModerationInput(BaseModel):
    text: str
    platform_dil: Optional[str] = "tr"


class ModerationBatchInput(BaseModel):
    texts: list[str]
    platform_dil: Optional[str] = "tr"
    batch_size: Optional[int] = 8


@router.get("/vram-status")
def get_vram_status():
    if not torch.cuda.is_available():
        return {
            "cuda_available": False,
            "message": "CUDA aktif değil, GPU belleği ölçülemedi.",
        }

    allocated = torch.cuda.memory_allocated(0) / (1024 ** 2)
    reserved = torch.cuda.memory_reserved(0) / (1024 ** 2)
    total = torch.cuda.get_device_properties(0).total_memory / (1024 ** 2)

    return {
        "cuda_available": True,
        "gpu_name": torch.cuda.get_device_name(0),
        "allocated_mb": round(allocated, 2),
        "reserved_mb": round(reserved, 2),
        "total_mb": round(total, 2),
        "free_estimate_mb": round(total - reserved, 2),
    }


@router.get("/refresh-cache")
def refresh_cache():
    load_blacklist_to_ram()
    tr_count, en_count = get_cache_counts()
    return {
        "status": "success",
        "message": "Kara liste güncellendi.",
        "tr_count": tr_count,
        "en_count": en_count,
    }


@router.post("/analyze")
async def analyze(input_data: ModerationInput):
    if not input_data.text or not input_data.text.strip():
        raise HTTPException(status_code=400, detail="text alanı boş olamaz")

    start_time = time.time()
    decision, reason, risk, lang, cleaned, details = run_moderation(
        input_data.text,
        input_data.platform_dil or "tr",
    )

    latency_ms = round((time.time() - start_time) * 1000, 2)
    performance = capture_process_metrics()
    performance["latency_ms"] = latency_ms

    return {
        "text": input_data.text,
        "cleaned_text": cleaned,
        "decision": decision,
        "reason": reason,
        "risk_level": risk,
        "language": lang,
        "details": details,
        "latency_ms": latency_ms,
        "performance": performance,
    }


@router.post("/analyze-batch")
async def analyze_batch(input_data: ModerationBatchInput):
    if not input_data.texts:
        raise HTTPException(status_code=400, detail="texts alanı boş olamaz")

    cleaned_texts = [t for t in input_data.texts if isinstance(t, str) and t.strip()]
    if not cleaned_texts:
        raise HTTPException(status_code=400, detail="Geçerli metin bulunamadı")

    batch_size = max(1, int(input_data.batch_size or 8))
    start_time = time.time()
    batch_results = run_moderation_batch(
        cleaned_texts,
        input_data.platform_dil or "tr",
        batch_size=batch_size,
    )
    latency_ms = round((time.time() - start_time) * 1000, 2)
    performance = capture_process_metrics()
    performance["latency_ms"] = latency_ms

    items = []
    for original_text, result in zip(cleaned_texts, batch_results):
        decision, reason, risk, lang, cleaned, details = result
        items.append(
            {
                "text": original_text,
                "cleaned_text": cleaned,
                "decision": decision,
                "reason": reason,
                "risk_level": risk,
                "language": lang,
                "details": details,
            }
        )

    return {
        "count": len(items),
        "batch_size": batch_size,
        "latency_ms": latency_ms,
        "performance": performance,
        "results": items,
    }