Spaces:

moztrk
/

sentinel-api

Sleeping

sentinel-api / performance_test.py

Mustafa Öztürk

Deploy Sentinel API to HF Space

857d4f5 16 days ago

3.11 kB

	import time

	import requests

	API_URL = "http://127.0.0.1:8000/analyze"

	heavy_text_tr = """
	Merhaba, şu an geliştirmekte olduğumuz Sentinel moderasyon sisteminin performans limitlerini test etmek amacıyla bu uzun paragrafı oluşturuyorum.
	Yapay zeka modellerinin, özellikle BERTurk ve Detoxify gibi derin öğrenme mimarilerinin, metin uzunluğu arttıkça işlem süresini nasıl değiştirdiğini gözlemlemek bizim için kritik.
	Bu metin, herhangi bir küfür veya spam emaresi taşımadığı için sistemin tüm ön filtrelerinden geçerek doğrudan doğal dil işleme katmanına ulaşacaktır.
	Burada tokenization süreci, modelin çıkarım (inference) hızı ve donanım kaynaklarının kullanımı gibi metrikleri saniyeler bazında değil, milisaniyeler bazında ölçerek sistemin gerçek zamanlı
	isteklere ne kadar hazırlıklı olduğunu kanıtlamış olacağız. Umarım sonuçlar, sistemin ölçeklenebilirliği hakkında bize net bir veri sağlar.
	"""

	heavy_text_en = """
	Hello, this is a long paragraph designed to test the performance limits of the Sentinel moderation system in English.
	We are specifically looking at how the Detoxify original model handles longer contexts and multiple toxicity labels simultaneously.
	By sending this comprehensive text, we ensure that the system bypasses simple keyword filters and triggers the full deep learning pipeline.
	This will give us a clear baseline for latency in a global production environment.
	"""

	test_scenarios = [
	("TR - Kısa Temiz", "Merhaba, bugün hava çok güzel.", "tr"),
	("TR - Early Exit (Küfür)", "Lan naber o.ç.", "tr"),
	("TR - Ağır AI Yükü", heavy_text_tr, "tr"),
	("EN - Kısa Temiz", "Hello, I hope you are having a wonderful day.", "en"),
	("EN - Early Exit (Profanity)", "Shut the fuck up you bastard!", "en"),
	("EN - Ağır AI Yükü", heavy_text_en, "en"),
	]


	def run_performance_suite() -> None:
	print(f"{'Senaryo Adı':<30} \| {'Dil':<4} \| {'API Latency':<12} \| {'Toplam Süre':<12}")
	print("-" * 75)

	for label, text, lang in test_scenarios:
	start_time = time.time()
	try:
	payload = {"text": text, "platform_dil": lang}
	response = requests.post(API_URL, json=payload, timeout=60)
	total_time = (time.time() - start_time) * 1000
	except requests.RequestException as exc:
	print(f"{label:<30} \| {lang:<4} \| BAĞLANTI HATASI: {exc}")
	continue

	if response.status_code == 200:
	res_json = response.json()
	api_latency = float(res_json.get("latency_ms", 0))
	status_symbol = "⚡" if api_latency < 50 else "🧠"
	print(f"{label:<30} \| {lang.upper():<4} \| {api_latency:>8.2f} ms \| {total_time:>8.2f} ms {status_symbol}")
	else:
	print(f"{label:<30} \| {lang:<4} \| HATA: {response.status_code}")


	if __name__ == "__main__":
	requests.post(API_URL, json={"text": "warmup", "platform_dil": "tr"}, timeout=60)
	requests.post(API_URL, json={"text": "warmup", "platform_dil": "en"}, timeout=60)
	run_performance_suite()