Spaces:
Sleeping
Sleeping
| import subprocess | |
| import time | |
| import os | |
| from typing import Optional | |
| import torch | |
| from fastapi import APIRouter, HTTPException | |
| from pydantic import BaseModel | |
| try: | |
| import psutil | |
| except ImportError: | |
| psutil = None | |
| from app.services.cache_manager import get_cache_counts, load_blacklist_to_ram | |
| from app.services.moderation_service import run_moderation, run_moderation_batch | |
| router = APIRouter() | |
| def system_status(): | |
| if psutil is None: | |
| raise HTTPException(status_code=500, detail="psutil kurulu değil") | |
| process = psutil.Process(os.getpid()) | |
| mem = psutil.virtual_memory() | |
| status = { | |
| "process_ram_mb": round(process.memory_info().rss / 1024 / 1024, 1), | |
| "system_ram_total_mb": round(mem.total / 1024 / 1024, 1), | |
| "system_ram_used_mb": round(mem.used / 1024 / 1024, 1), | |
| "system_ram_percent": mem.percent, | |
| "cpu_percent": psutil.cpu_percent(interval=1), | |
| "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"), | |
| } | |
| # Printed logs are visible in Hugging Face Space logs and local terminal output. | |
| print(f"[system-status] {status}") | |
| return status | |
| def get_gpu_info(): | |
| try: | |
| raw = subprocess.check_output( | |
| [ | |
| "nvidia-smi", | |
| "--query-gpu=utilization.gpu,memory.used,memory.total", | |
| "--format=csv,noheader,nounits", | |
| ], | |
| encoding="utf-8", | |
| stderr=subprocess.STDOUT, | |
| ) | |
| util, mem_used, mem_total = [p.strip() for p in raw.strip().splitlines()[0].split(",", maxsplit=2)] | |
| return { | |
| "load": int(float(util)), | |
| "vram_used": int(float(mem_used)), | |
| "vram_total": int(float(mem_total)), | |
| } | |
| except Exception: | |
| if not torch.cuda.is_available(): | |
| return None | |
| allocated = torch.cuda.memory_allocated(0) / (1024 ** 2) | |
| total = torch.cuda.get_device_properties(0).total_memory / (1024 ** 2) | |
| return { | |
| "load": None, | |
| "vram_used": int(round(allocated)), | |
| "vram_total": int(round(total)), | |
| } | |
| def capture_process_metrics(): | |
| cpu_load = None | |
| ram_pct = None | |
| if psutil is not None: | |
| cpu_load = round(psutil.cpu_percent(interval=0.05), 1) | |
| ram_pct = round(psutil.virtual_memory().percent, 1) | |
| gpu = get_gpu_info() | |
| return { | |
| "cpu": cpu_load, | |
| "ram_pct": ram_pct, | |
| "gpu_load": gpu["load"] if gpu else None, | |
| "vram_used": gpu["vram_used"] if gpu else 0, | |
| "vram_total": gpu["vram_total"] if gpu else 0, | |
| "timestamp": time.strftime("%H:%M:%S"), | |
| } | |
| class ModerationInput(BaseModel): | |
| text: str | |
| platform_dil: Optional[str] = "tr" | |
| class ModerationBatchInput(BaseModel): | |
| texts: list[str] | |
| platform_dil: Optional[str] = "tr" | |
| batch_size: Optional[int] = 8 | |
| def get_vram_status(): | |
| if not torch.cuda.is_available(): | |
| return { | |
| "cuda_available": False, | |
| "message": "CUDA aktif değil, GPU belleği ölçülemedi.", | |
| } | |
| allocated = torch.cuda.memory_allocated(0) / (1024 ** 2) | |
| reserved = torch.cuda.memory_reserved(0) / (1024 ** 2) | |
| total = torch.cuda.get_device_properties(0).total_memory / (1024 ** 2) | |
| return { | |
| "cuda_available": True, | |
| "gpu_name": torch.cuda.get_device_name(0), | |
| "allocated_mb": round(allocated, 2), | |
| "reserved_mb": round(reserved, 2), | |
| "total_mb": round(total, 2), | |
| "free_estimate_mb": round(total - reserved, 2), | |
| } | |
| def refresh_cache(): | |
| load_blacklist_to_ram() | |
| tr_count, en_count = get_cache_counts() | |
| return { | |
| "status": "success", | |
| "message": "Kara liste güncellendi.", | |
| "tr_count": tr_count, | |
| "en_count": en_count, | |
| } | |
| async def analyze(input_data: ModerationInput): | |
| if not input_data.text or not input_data.text.strip(): | |
| raise HTTPException(status_code=400, detail="text alanı boş olamaz") | |
| start_time = time.time() | |
| decision, reason, risk, lang, cleaned, details = run_moderation( | |
| input_data.text, | |
| input_data.platform_dil or "tr", | |
| ) | |
| latency_ms = round((time.time() - start_time) * 1000, 2) | |
| performance = capture_process_metrics() | |
| performance["latency_ms"] = latency_ms | |
| return { | |
| "text": input_data.text, | |
| "cleaned_text": cleaned, | |
| "decision": decision, | |
| "reason": reason, | |
| "risk_level": risk, | |
| "language": lang, | |
| "details": details, | |
| "latency_ms": latency_ms, | |
| "performance": performance, | |
| } | |
| async def analyze_batch(input_data: ModerationBatchInput): | |
| if not input_data.texts: | |
| raise HTTPException(status_code=400, detail="texts alanı boş olamaz") | |
| cleaned_texts = [t for t in input_data.texts if isinstance(t, str) and t.strip()] | |
| if not cleaned_texts: | |
| raise HTTPException(status_code=400, detail="Geçerli metin bulunamadı") | |
| batch_size = max(1, int(input_data.batch_size or 8)) | |
| start_time = time.time() | |
| batch_results = run_moderation_batch( | |
| cleaned_texts, | |
| input_data.platform_dil or "tr", | |
| batch_size=batch_size, | |
| ) | |
| latency_ms = round((time.time() - start_time) * 1000, 2) | |
| performance = capture_process_metrics() | |
| performance["latency_ms"] = latency_ms | |
| items = [] | |
| for original_text, result in zip(cleaned_texts, batch_results): | |
| decision, reason, risk, lang, cleaned, details = result | |
| items.append( | |
| { | |
| "text": original_text, | |
| "cleaned_text": cleaned, | |
| "decision": decision, | |
| "reason": reason, | |
| "risk_level": risk, | |
| "language": lang, | |
| "details": details, | |
| } | |
| ) | |
| return { | |
| "count": len(items), | |
| "batch_size": batch_size, | |
| "latency_ms": latency_ms, | |
| "performance": performance, | |
| "results": items, | |
| } | |