Spaces:

moztrk
/

sentinel-api

Sleeping

File size: 6,117 Bytes

import subprocess
import time
import os
from typing import Optional

import torch
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel

try:
    import psutil
except ImportError:
    psutil = None

from app.services.cache_manager import get_cache_counts, load_blacklist_to_ram
from app.services.moderation_service import run_moderation, run_moderation_batch

router = APIRouter()


@router.get("/system-status")
def system_status():
    if psutil is None:
        raise HTTPException(status_code=500, detail="psutil kurulu değil")

    process = psutil.Process(os.getpid())
    mem = psutil.virtual_memory()
    status = {
        "process_ram_mb": round(process.memory_info().rss / 1024 / 1024, 1),
        "system_ram_total_mb": round(mem.total / 1024 / 1024, 1),
        "system_ram_used_mb": round(mem.used / 1024 / 1024, 1),
        "system_ram_percent": mem.percent,
        "cpu_percent": psutil.cpu_percent(interval=1),
        "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
    }

    # Printed logs are visible in Hugging Face Space logs and local terminal output.
    print(f"[system-status] {status}")
    return status


def get_gpu_info():
    try:
        raw = subprocess.check_output(
            [
                "nvidia-smi",
                "--query-gpu=utilization.gpu,memory.used,memory.total",
                "--format=csv,noheader,nounits",
            ],
            encoding="utf-8",
            stderr=subprocess.STDOUT,
        )
        util, mem_used, mem_total = [p.strip() for p in raw.strip().splitlines()[0].split(",", maxsplit=2)]
        return {
            "load": int(float(util)),
            "vram_used": int(float(mem_used)),
            "vram_total": int(float(mem_total)),
        }
    except Exception:
        if not torch.cuda.is_available():
            return None
        allocated = torch.cuda.memory_allocated(0) / (1024 ** 2)
        total = torch.cuda.get_device_properties(0).total_memory / (1024 ** 2)
        return {
            "load": None,
            "vram_used": int(round(allocated)),
            "vram_total": int(round(total)),
        }


def capture_process_metrics():
    cpu_load = None
    ram_pct = None
    if psutil is not None:
        cpu_load = round(psutil.cpu_percent(interval=0.05), 1)
        ram_pct = round(psutil.virtual_memory().percent, 1)

    gpu = get_gpu_info()
    return {
        "cpu": cpu_load,
        "ram_pct": ram_pct,
        "gpu_load": gpu["load"] if gpu else None,
        "vram_used": gpu["vram_used"] if gpu else 0,
        "vram_total": gpu["vram_total"] if gpu else 0,
        "timestamp": time.strftime("%H:%M:%S"),
    }


class ModerationInput(BaseModel):
    text: str
    platform_dil: Optional[str] = "tr"


class ModerationBatchInput(BaseModel):
    texts: list[str]
    platform_dil: Optional[str] = "tr"
    batch_size: Optional[int] = 8


@router.get("/vram-status")
def get_vram_status():
    if not torch.cuda.is_available():
        return {
            "cuda_available": False,
            "message": "CUDA aktif değil, GPU belleği ölçülemedi.",
        }

    allocated = torch.cuda.memory_allocated(0) / (1024 ** 2)
    reserved = torch.cuda.memory_reserved(0) / (1024 ** 2)
    total = torch.cuda.get_device_properties(0).total_memory / (1024 ** 2)

    return {
        "cuda_available": True,
        "gpu_name": torch.cuda.get_device_name(0),
        "allocated_mb": round(allocated, 2),
        "reserved_mb": round(reserved, 2),
        "total_mb": round(total, 2),
        "free_estimate_mb": round(total - reserved, 2),
    }


@router.get("/refresh-cache")
def refresh_cache():
    load_blacklist_to_ram()
    tr_count, en_count = get_cache_counts()
    return {
        "status": "success",
        "message": "Kara liste güncellendi.",
        "tr_count": tr_count,
        "en_count": en_count,
    }


@router.post("/analyze")
async def analyze(input_data: ModerationInput):
    if not input_data.text or not input_data.text.strip():
        raise HTTPException(status_code=400, detail="text alanı boş olamaz")

    start_time = time.time()
    decision, reason, risk, lang, cleaned, details = run_moderation(
        input_data.text,
        input_data.platform_dil or "tr",
    )

    latency_ms = round((time.time() - start_time) * 1000, 2)
    performance = capture_process_metrics()
    performance["latency_ms"] = latency_ms

    return {
        "text": input_data.text,
        "cleaned_text": cleaned,
        "decision": decision,
        "reason": reason,
        "risk_level": risk,
        "language": lang,
        "details": details,
        "latency_ms": latency_ms,
        "performance": performance,
    }


@router.post("/analyze-batch")
async def analyze_batch(input_data: ModerationBatchInput):
    if not input_data.texts:
        raise HTTPException(status_code=400, detail="texts alanı boş olamaz")

    cleaned_texts = [t for t in input_data.texts if isinstance(t, str) and t.strip()]
    if not cleaned_texts:
        raise HTTPException(status_code=400, detail="Geçerli metin bulunamadı")

    batch_size = max(1, int(input_data.batch_size or 8))
    start_time = time.time()
    batch_results = run_moderation_batch(
        cleaned_texts,
        input_data.platform_dil or "tr",
        batch_size=batch_size,
    )
    latency_ms = round((time.time() - start_time) * 1000, 2)
    performance = capture_process_metrics()
    performance["latency_ms"] = latency_ms

    items = []
    for original_text, result in zip(cleaned_texts, batch_results):
        decision, reason, risk, lang, cleaned, details = result
        items.append(
            {
                "text": original_text,
                "cleaned_text": cleaned,
                "decision": decision,
                "reason": reason,
                "risk_level": risk,
                "language": lang,
                "details": details,
            }
        )

    return {
        "count": len(items),
        "batch_size": batch_size,
        "latency_ms": latency_ms,
        "performance": performance,
        "results": items,
    }