"""AIGENCY V4 — Interactive demo Space. This Gradio app proxies user prompts to the eCloud production AIGENCY V4 API. Setup (when deploying to HuggingFace Spaces): 1. Add `AIGENCY_API_TOKEN` as a Space secret (Space settings → Variables and secrets). 2. Optionally add `AIGENCY_API_BASE` (default https://aigency.dev/api/v2) `AIGENCY_ASSISTANT_ID` (default 277) `AIGENCY_ASSISTANT_SLUG` (default alparslan-v4). The Space supports text and image-with-text input (one image per request, ≤ 30 MB, image/* MIME). """ from __future__ import annotations import io import os import time import threading from collections import deque from datetime import datetime, timezone import gradio as gr import requests from PIL import Image # ── Config ───────────────────────────────────────────────────────── API_BASE = os.environ.get("AIGENCY_API_BASE", "https://aigency.dev/api/v2") API_TOKEN = os.environ.get("AIGENCY_API_TOKEN", "") # set via Space secret ASSISTANT_ID = int(os.environ.get("AIGENCY_ASSISTANT_ID", "277")) ASSISTANT_SLUG = os.environ.get("AIGENCY_ASSISTANT_SLUG", "alparslan-v4") TIMEOUT = 60 # seconds DEMO_BANNER = ( "AIGENCY V4 — sovereign, multimodal, Turkish-first AI · " "128B parameters · 278K context · KVKK-resident" ) # ── Abuse mitigation knobs ───────────────────────────────────────── RATE_PER_MIN_PER_SESSION = int(os.environ.get("RATE_PER_MIN", "10")) MAX_PER_SESSION = int(os.environ.get("MAX_PER_SESSION", "50")) MAX_PROMPT_CHARS = int(os.environ.get("MAX_PROMPT_CHARS", "2000")) DAILY_CAP_GLOBAL = int(os.environ.get("DAILY_CAP", "5000")) # ── Global daily counter (thread-safe) ───────────────────────────── _global_lock = threading.Lock() _global_counter = {"date": "", "count": 0} def _utc_today() -> str: return datetime.now(timezone.utc).strftime("%Y-%m-%d") def _check_and_inc_daily() -> tuple[bool, int]: """Increment the global daily counter; return (allowed, current_count).""" today = _utc_today() with _global_lock: if _global_counter["date"] != today: _global_counter["date"] = today _global_counter["count"] = 0 if _global_counter["count"] >= DAILY_CAP_GLOBAL: return False, _global_counter["count"] _global_counter["count"] += 1 return True, _global_counter["count"] PLACEHOLDER_MSG = ( "🔒 The interactive chat is being activated.\n\n" "While the demo is finalised, you can already:\n" " · Browse the **Benchmark Leaderboard** tab — 22 benchmarks, 13,344 calls\n" " · Read the [model card](https://huggingface.co/aigencydev/AIGENCY-V4)\n" " · Inspect the [evaluation dataset](https://huggingface.co/datasets/aigencydev/aigency-v4-evaluation)\n" " · Read the whitepaper (32 pages, EN/TR) on [GitHub](https://github.com/ecloud-bh/aigency-v4-whitepaper)\n\n" "For production access: **info@e-cloud.web.tr · ai@aigency.dev**" ) # ── Bench numbers (static, mirrored from the model card) ─────────── BENCH = [ ("Belebele-TR (TR reading)", 0.8733, "Turkish · #1 globally"), ("ARC-Challenge", 0.9488, "Frontier-tied"), ("GSM8K", 0.9462, "Frontier-tied"), ("HellaSwag", 0.8860, "≈6pp behind frontier"), ("MBPP", 0.8482, "Upper-mid frontier"), ("HumanEval", 0.8415, "Upper-mid frontier"), ("TQuAD (TR QA)", 0.8240, "Turkish · #1 globally"), ("IFEval (strict)", 0.8022, "Frontier lower-mid"), ("MMLU", 0.8010, "Frontier lower-mid"), ("HumanEval+", 0.7988, "Upper-mid frontier"), ("DocVQA (≥0.5 ANLS)", 0.7917, "Multimodal (first-gen)"), ("TR Grammar", 0.7900, "Turkish · #1 globally"), ("MBPP+", 0.7804, "Upper-mid frontier"), ("TruthfulQA MC1", 0.7638, "Frontier-tied"), ("WinoGrande", 0.7466, "≈11pp behind frontier"), ("XNLI-TR", 0.7340, "Turkish · #1 globally"), ("TR-MMLU", 0.7080, "Turkish · #1 globally"), ("ChartQA (relaxed)", 0.6768, "Multimodal (first-gen)"), ("MMMU (val)", 0.5333, "Multimodal (first-gen)"), ("MMLU-Pro", 0.5020, "Development area"), ("GPQA Diamond", 0.3788, "Development area"), ("MathVista (testmini)", 0.3413, "Multimodal (first-gen)"), ] # ── Two-step API protocol (token in URL path) ───────────────────── def new_chat(message: str) -> tuple[str, str]: """Open a chat. Returns (chat_id, first_response).""" url = f"{API_BASE}/newChat/{ASSISTANT_ID}/{API_TOKEN}" r = requests.post(url, json={"message": message}, timeout=TIMEOUT) r.raise_for_status() body = r.json() return body.get("chat_id", ""), body.get("message", "") def send_message(chat_id: str, message: str) -> str: url = f"{API_BASE}/sendMessage/{API_TOKEN}" files = { "chat_id": (None, str(chat_id)), "message": (None, message), } r = requests.post(url, files=files, timeout=TIMEOUT) r.raise_for_status() return r.json().get("message", "") def send_with_image(chat_id: str, message: str, image: Image.Image) -> str: buf = io.BytesIO() image.save(buf, format="PNG") buf.seek(0) url = f"{API_BASE}/sendMessage/{API_TOKEN}" files = { "chat_id": (None, str(chat_id)), "message": (None, message), "attachements": ("image.png", buf.getvalue(), "image/png"), # canonical typo } r = requests.post(url, files=files, timeout=TIMEOUT) r.raise_for_status() return r.json().get("message", "") # ── Gradio handler with session-scoped chat_id + rate limiting ──── def chat(prompt, image, history, chat_id_state, ts_log_state, count_state): """ ts_log_state: deque of recent timestamps (sliding 60s window) for per-minute rate count_state: total messages in this session """ history = history or [] chat_id = chat_id_state or "" ts_log = ts_log_state or deque() count = count_state or 0 if not prompt.strip(): return history, "", chat_id, ts_log, count # 1) Prompt length limit if len(prompt) > MAX_PROMPT_CHARS: history.append((prompt[:200] + " […]", ( f"⚠️ Prompt too long ({len(prompt)} chars). " f"Max {MAX_PROMPT_CHARS} chars per message in this demo. " f"For longer contexts, use the production API at aigency.dev." ))) return history, "", chat_id, ts_log, count # 2) Per-session total if count >= MAX_PER_SESSION: history.append((prompt, ( f"⚠️ Session limit reached ({MAX_PER_SESSION} messages). " f"Refresh the page to start a new session, " f"or contact info@e-cloud.web.tr · ai@aigency.dev for production access." ))) return history, "", chat_id, ts_log, count # 3) Per-minute rate (sliding window) now = time.time() while ts_log and now - ts_log[0] > 60: ts_log.popleft() if len(ts_log) >= RATE_PER_MIN_PER_SESSION: wait = int(60 - (now - ts_log[0])) history.append((prompt, ( f"⚠️ Slow down — max {RATE_PER_MIN_PER_SESSION} messages/minute. " f"Try again in {wait}s." ))) return history, "", chat_id, ts_log, count # 4) Global daily cap allowed, daily_count = _check_and_inc_daily() if not allowed: history.append((prompt, ( f"⚠️ The demo has reached today's global limit ({DAILY_CAP_GLOBAL} requests). " f"It resets at 00:00 UTC. For uninterrupted access, contact " f"info@e-cloud.web.tr · ai@aigency.dev." ))) return history, "", chat_id, ts_log, count if not API_TOKEN: history.append((prompt, PLACEHOLDER_MSG)) return history, "", chat_id, ts_log, count # 5) Actual API call try: if not chat_id: if image is None: cid, answer = new_chat(prompt) chat_id = cid else: cid, _ = new_chat("Bir görsel inceleyeceksin.") chat_id = cid answer = send_with_image(chat_id, prompt, image) else: if image is None: answer = send_message(chat_id, prompt) else: answer = send_with_image(chat_id, prompt, image) except Exception as e: answer = f"Error: {e}" ts_log.append(now) history.append((prompt, answer)) return history, "", chat_id, ts_log, count + 1 def reset_session(): return [], "", "", deque(), 0 def make_leaderboard(): return [(name, f"{val:.2%}", note) for name, val, note in BENCH] # ── UI ───────────────────────────────────────────────────────────── TR_INTRO = """ ### 🇹🇷 AIGENCY V4 Demo **128 milyar parametreli yerli yapay zekâ.** Türkçe okuma anlamada dünya lideri, fen muhakemesi ve grade-school matematikte frontier seviyesinde. KVKK-yerel, tam-bağımsız mimari. Aşağıdaki sohbet kutusuna Türkçe veya İngilizce bir istem yazın; isteğe bağlı olarak bir görsel ekleyin. Sonuçlar canlı API'den gelir. """ EN_INTRO = """ ### 🇬🇧 AIGENCY V4 Demo **128B-parameter sovereign AI**, world-leader on Turkish reading comprehension and frontier-level on grade-school math and scientific reasoning. KVKK-resident, fully sovereign architecture. Enter a Turkish or English prompt below, optionally attach an image. Responses are served live by the production API. """ with gr.Blocks(title="AIGENCY V4 Demo", theme=gr.themes.Soft()) as demo: gr.Markdown(f"# AIGENCY V4\n\n*{DEMO_BANNER}*") chat_id_state = gr.State("") ts_log_state = gr.State(lambda: deque()) count_state = gr.State(0) with gr.Tab("Chat"): with gr.Row(): with gr.Column(scale=1): gr.Markdown(EN_INTRO) with gr.Column(scale=1): gr.Markdown(TR_INTRO) chatbot = gr.Chatbot(height=420, label="Conversation", type="tuples") with gr.Row(): with gr.Column(scale=4): msg = gr.Textbox( label="Prompt", placeholder="Type a question in Turkish or English…", lines=2, ) with gr.Column(scale=1): img = gr.Image(label="Optional image", type="pil", height=200) with gr.Row(): send = gr.Button("Send", variant="primary") clear = gr.Button("New conversation") gr.Markdown( f"*Demo limits: ≤ {MAX_PROMPT_CHARS} chars/message · " f"{RATE_PER_MIN_PER_SESSION} msg/min · " f"{MAX_PER_SESSION} msg/session · " f"{DAILY_CAP_GLOBAL} requests/day globally. " f"For unlimited production access: info@e-cloud.web.tr · ai@aigency.dev*" ) chat_inputs = [msg, img, chatbot, chat_id_state, ts_log_state, count_state] chat_outputs = [chatbot, msg, chat_id_state, ts_log_state, count_state] send.click(chat, chat_inputs, chat_outputs) msg.submit(chat, chat_inputs, chat_outputs) clear.click(reset_session, [], [chatbot, msg, chat_id_state, ts_log_state, count_state]) with gr.Tab("Benchmark Leaderboard"): gr.Markdown( "## 22 benchmarks · 13,344 real API calls · Wilson 95% CI\n" "Methodology: temperature 0.0, deterministic seed=42, single " "session 27 April 2026.\n\n" "*See [the dataset](https://huggingface.co/datasets/aigencydev/aigency-v4-evaluation) " "for the full reproducibility capsule.*" ) gr.Dataframe( headers=["Benchmark", "AIGENCY V4", "Position vs frontier"], value=make_leaderboard(), interactive=False, wrap=True, ) with gr.Tab("About"): gr.Markdown(""" ### About AIGENCY V4 AIGENCY V4 is the multimodal successor to AIGENCY V3, developed by **eCloud Yazılım Teknolojileri**. Released to production in Q2 2026. - **Architecture**: 120B sovereign decoder transformer + 8B vision encoder - **Context**: 278K tokens (HBM 3-tier with TG-Decay) - **Languages**: Turkish (primary), English - **Licence**: API-only commercial — see https://aigency.dev/license **Resources** - 📄 Whitepaper (EN): https://github.com/ecloud-bh/aigency-v4-whitepaper/blob/main/AIGENCY-V4-Whitepaper-EN.pdf - 📄 Whitepaper (TR): https://github.com/ecloud-bh/aigency-v4-whitepaper/blob/main/AIGENCY-V4-Whitepaper-TR.pdf - 🤗 Model card: https://huggingface.co/aigencydev/AIGENCY-V4 - 📊 Evaluation dataset: https://huggingface.co/datasets/aigencydev/aigency-v4-evaluation - 🔧 Benchmark code: https://github.com/ecloud-bh/aigency-benchmarks - 🔗 Production API: https://aigency.dev **Contact** info@e-cloud.web.tr · ai@aigency.dev · © 2026 eCloud Yazılım Teknolojileri """) if __name__ == "__main__": demo.queue(max_size=8).launch()