import subprocess import time import os from typing import Optional import torch from fastapi import APIRouter, HTTPException from pydantic import BaseModel try: import psutil except ImportError: psutil = None from app.services.cache_manager import get_cache_counts, load_blacklist_to_ram from app.services.moderation_service import run_moderation, run_moderation_batch router = APIRouter() @router.get("/system-status") def system_status(): if psutil is None: raise HTTPException(status_code=500, detail="psutil kurulu değil") process = psutil.Process(os.getpid()) mem = psutil.virtual_memory() status = { "process_ram_mb": round(process.memory_info().rss / 1024 / 1024, 1), "system_ram_total_mb": round(mem.total / 1024 / 1024, 1), "system_ram_used_mb": round(mem.used / 1024 / 1024, 1), "system_ram_percent": mem.percent, "cpu_percent": psutil.cpu_percent(interval=1), "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"), } # Printed logs are visible in Hugging Face Space logs and local terminal output. print(f"[system-status] {status}") return status def get_gpu_info(): try: raw = subprocess.check_output( [ "nvidia-smi", "--query-gpu=utilization.gpu,memory.used,memory.total", "--format=csv,noheader,nounits", ], encoding="utf-8", stderr=subprocess.STDOUT, ) util, mem_used, mem_total = [p.strip() for p in raw.strip().splitlines()[0].split(",", maxsplit=2)] return { "load": int(float(util)), "vram_used": int(float(mem_used)), "vram_total": int(float(mem_total)), } except Exception: if not torch.cuda.is_available(): return None allocated = torch.cuda.memory_allocated(0) / (1024 ** 2) total = torch.cuda.get_device_properties(0).total_memory / (1024 ** 2) return { "load": None, "vram_used": int(round(allocated)), "vram_total": int(round(total)), } def capture_process_metrics(): cpu_load = None ram_pct = None if psutil is not None: cpu_load = round(psutil.cpu_percent(interval=0.05), 1) ram_pct = round(psutil.virtual_memory().percent, 1) gpu = get_gpu_info() return { "cpu": cpu_load, "ram_pct": ram_pct, "gpu_load": gpu["load"] if gpu else None, "vram_used": gpu["vram_used"] if gpu else 0, "vram_total": gpu["vram_total"] if gpu else 0, "timestamp": time.strftime("%H:%M:%S"), } class ModerationInput(BaseModel): text: str platform_dil: Optional[str] = "tr" class ModerationBatchInput(BaseModel): texts: list[str] platform_dil: Optional[str] = "tr" batch_size: Optional[int] = 8 @router.get("/vram-status") def get_vram_status(): if not torch.cuda.is_available(): return { "cuda_available": False, "message": "CUDA aktif değil, GPU belleği ölçülemedi.", } allocated = torch.cuda.memory_allocated(0) / (1024 ** 2) reserved = torch.cuda.memory_reserved(0) / (1024 ** 2) total = torch.cuda.get_device_properties(0).total_memory / (1024 ** 2) return { "cuda_available": True, "gpu_name": torch.cuda.get_device_name(0), "allocated_mb": round(allocated, 2), "reserved_mb": round(reserved, 2), "total_mb": round(total, 2), "free_estimate_mb": round(total - reserved, 2), } @router.get("/refresh-cache") def refresh_cache(): load_blacklist_to_ram() tr_count, en_count = get_cache_counts() return { "status": "success", "message": "Kara liste güncellendi.", "tr_count": tr_count, "en_count": en_count, } @router.post("/analyze") async def analyze(input_data: ModerationInput): if not input_data.text or not input_data.text.strip(): raise HTTPException(status_code=400, detail="text alanı boş olamaz") start_time = time.time() decision, reason, risk, lang, cleaned, details = run_moderation( input_data.text, input_data.platform_dil or "tr", ) latency_ms = round((time.time() - start_time) * 1000, 2) performance = capture_process_metrics() performance["latency_ms"] = latency_ms return { "text": input_data.text, "cleaned_text": cleaned, "decision": decision, "reason": reason, "risk_level": risk, "language": lang, "details": details, "latency_ms": latency_ms, "performance": performance, } @router.post("/analyze-batch") async def analyze_batch(input_data: ModerationBatchInput): if not input_data.texts: raise HTTPException(status_code=400, detail="texts alanı boş olamaz") cleaned_texts = [t for t in input_data.texts if isinstance(t, str) and t.strip()] if not cleaned_texts: raise HTTPException(status_code=400, detail="Geçerli metin bulunamadı") batch_size = max(1, int(input_data.batch_size or 8)) start_time = time.time() batch_results = run_moderation_batch( cleaned_texts, input_data.platform_dil or "tr", batch_size=batch_size, ) latency_ms = round((time.time() - start_time) * 1000, 2) performance = capture_process_metrics() performance["latency_ms"] = latency_ms items = [] for original_text, result in zip(cleaned_texts, batch_results): decision, reason, risk, lang, cleaned, details = result items.append( { "text": original_text, "cleaned_text": cleaned, "decision": decision, "reason": reason, "risk_level": risk, "language": lang, "details": details, } ) return { "count": len(items), "batch_size": batch_size, "latency_ms": latency_ms, "performance": performance, "results": items, }