Spaces:

aigencydev
/

AIGENCY-V4-Demo

Sleeping

File size: 13,318 Bytes

"""AIGENCY V4 — Interactive demo Space.

This Gradio app proxies user prompts to the eCloud production AIGENCY V4 API.

Setup (when deploying to HuggingFace Spaces):
    1. Add `AIGENCY_API_TOKEN` as a Space secret (Space settings → Variables and secrets).
    2. Optionally add `AIGENCY_API_BASE`     (default https://aigency.dev/api/v2)
                       `AIGENCY_ASSISTANT_ID` (default 277)
                       `AIGENCY_ASSISTANT_SLUG` (default alparslan-v4).

The Space supports text and image-with-text input (one image per request,
≤ 30 MB, image/* MIME).
"""
from __future__ import annotations

import io
import os
import time
import threading
from collections import deque
from datetime import datetime, timezone

import gradio as gr
import requests
from PIL import Image

# ── Config ─────────────────────────────────────────────────────────
API_BASE = os.environ.get("AIGENCY_API_BASE", "https://aigency.dev/api/v2")
API_TOKEN = os.environ.get("AIGENCY_API_TOKEN", "")  # set via Space secret
ASSISTANT_ID = int(os.environ.get("AIGENCY_ASSISTANT_ID", "277"))
ASSISTANT_SLUG = os.environ.get("AIGENCY_ASSISTANT_SLUG", "alparslan-v4")
TIMEOUT = 60  # seconds
DEMO_BANNER = (
    "AIGENCY V4 — sovereign, multimodal, Turkish-first AI · "
    "128B parameters · 278K context · KVKK-resident"
)

# ── Abuse mitigation knobs ─────────────────────────────────────────
RATE_PER_MIN_PER_SESSION = int(os.environ.get("RATE_PER_MIN", "10"))
MAX_PER_SESSION = int(os.environ.get("MAX_PER_SESSION", "50"))
MAX_PROMPT_CHARS = int(os.environ.get("MAX_PROMPT_CHARS", "2000"))
DAILY_CAP_GLOBAL = int(os.environ.get("DAILY_CAP", "5000"))

# ── Global daily counter (thread-safe) ─────────────────────────────
_global_lock = threading.Lock()
_global_counter = {"date": "", "count": 0}


def _utc_today() -> str:
    return datetime.now(timezone.utc).strftime("%Y-%m-%d")


def _check_and_inc_daily() -> tuple[bool, int]:
    """Increment the global daily counter; return (allowed, current_count)."""
    today = _utc_today()
    with _global_lock:
        if _global_counter["date"] != today:
            _global_counter["date"] = today
            _global_counter["count"] = 0
        if _global_counter["count"] >= DAILY_CAP_GLOBAL:
            return False, _global_counter["count"]
        _global_counter["count"] += 1
        return True, _global_counter["count"]

PLACEHOLDER_MSG = (
    "🔒 The interactive chat is being activated.\n\n"
    "While the demo is finalised, you can already:\n"
    "  · Browse the **Benchmark Leaderboard** tab — 22 benchmarks, 13,344 calls\n"
    "  · Read the [model card](https://huggingface.co/aigencydev/AIGENCY-V4)\n"
    "  · Inspect the [evaluation dataset](https://huggingface.co/datasets/aigencydev/aigency-v4-evaluation)\n"
    "  · Read the whitepaper (32 pages, EN/TR) on [GitHub](https://github.com/ecloud-bh/aigency-v4-whitepaper)\n\n"
    "For production access: **info@e-cloud.web.tr · ai@aigency.dev**"
)


# ── Bench numbers (static, mirrored from the model card) ───────────
BENCH = [
    ("Belebele-TR (TR reading)", 0.8733, "Turkish · #1 globally"),
    ("ARC-Challenge", 0.9488, "Frontier-tied"),
    ("GSM8K", 0.9462, "Frontier-tied"),
    ("HellaSwag", 0.8860, "≈6pp behind frontier"),
    ("MBPP", 0.8482, "Upper-mid frontier"),
    ("HumanEval", 0.8415, "Upper-mid frontier"),
    ("TQuAD (TR QA)", 0.8240, "Turkish · #1 globally"),
    ("IFEval (strict)", 0.8022, "Frontier lower-mid"),
    ("MMLU", 0.8010, "Frontier lower-mid"),
    ("HumanEval+", 0.7988, "Upper-mid frontier"),
    ("DocVQA (≥0.5 ANLS)", 0.7917, "Multimodal (first-gen)"),
    ("TR Grammar", 0.7900, "Turkish · #1 globally"),
    ("MBPP+", 0.7804, "Upper-mid frontier"),
    ("TruthfulQA MC1", 0.7638, "Frontier-tied"),
    ("WinoGrande", 0.7466, "≈11pp behind frontier"),
    ("XNLI-TR", 0.7340, "Turkish · #1 globally"),
    ("TR-MMLU", 0.7080, "Turkish · #1 globally"),
    ("ChartQA (relaxed)", 0.6768, "Multimodal (first-gen)"),
    ("MMMU (val)", 0.5333, "Multimodal (first-gen)"),
    ("MMLU-Pro", 0.5020, "Development area"),
    ("GPQA Diamond", 0.3788, "Development area"),
    ("MathVista (testmini)", 0.3413, "Multimodal (first-gen)"),
]


# ── Two-step API protocol (token in URL path) ─────────────────────
def new_chat(message: str) -> tuple[str, str]:
    """Open a chat. Returns (chat_id, first_response)."""
    url = f"{API_BASE}/newChat/{ASSISTANT_ID}/{API_TOKEN}"
    r = requests.post(url, json={"message": message}, timeout=TIMEOUT)
    r.raise_for_status()
    body = r.json()
    return body.get("chat_id", ""), body.get("message", "")


def send_message(chat_id: str, message: str) -> str:
    url = f"{API_BASE}/sendMessage/{API_TOKEN}"
    files = {
        "chat_id": (None, str(chat_id)),
        "message": (None, message),
    }
    r = requests.post(url, files=files, timeout=TIMEOUT)
    r.raise_for_status()
    return r.json().get("message", "")


def send_with_image(chat_id: str, message: str, image: Image.Image) -> str:
    buf = io.BytesIO()
    image.save(buf, format="PNG")
    buf.seek(0)
    url = f"{API_BASE}/sendMessage/{API_TOKEN}"
    files = {
        "chat_id": (None, str(chat_id)),
        "message": (None, message),
        "attachements": ("image.png", buf.getvalue(), "image/png"),  # canonical typo
    }
    r = requests.post(url, files=files, timeout=TIMEOUT)
    r.raise_for_status()
    return r.json().get("message", "")


# ── Gradio handler with session-scoped chat_id + rate limiting ────
def chat(prompt, image, history, chat_id_state, ts_log_state, count_state):
    """
    ts_log_state: deque of recent timestamps (sliding 60s window) for per-minute rate
    count_state: total messages in this session
    """
    history = history or []
    chat_id = chat_id_state or ""
    ts_log = ts_log_state or deque()
    count = count_state or 0

    if not prompt.strip():
        return history, "", chat_id, ts_log, count

    # 1) Prompt length limit
    if len(prompt) > MAX_PROMPT_CHARS:
        history.append((prompt[:200] + " […]", (
            f"⚠️ Prompt too long ({len(prompt)} chars). "
            f"Max {MAX_PROMPT_CHARS} chars per message in this demo. "
            f"For longer contexts, use the production API at aigency.dev."
        )))
        return history, "", chat_id, ts_log, count

    # 2) Per-session total
    if count >= MAX_PER_SESSION:
        history.append((prompt, (
            f"⚠️ Session limit reached ({MAX_PER_SESSION} messages). "
            f"Refresh the page to start a new session, "
            f"or contact info@e-cloud.web.tr · ai@aigency.dev for production access."
        )))
        return history, "", chat_id, ts_log, count

    # 3) Per-minute rate (sliding window)
    now = time.time()
    while ts_log and now - ts_log[0] > 60:
        ts_log.popleft()
    if len(ts_log) >= RATE_PER_MIN_PER_SESSION:
        wait = int(60 - (now - ts_log[0]))
        history.append((prompt, (
            f"⚠️ Slow down — max {RATE_PER_MIN_PER_SESSION} messages/minute. "
            f"Try again in {wait}s."
        )))
        return history, "", chat_id, ts_log, count

    # 4) Global daily cap
    allowed, daily_count = _check_and_inc_daily()
    if not allowed:
        history.append((prompt, (
            f"⚠️ The demo has reached today's global limit ({DAILY_CAP_GLOBAL} requests). "
            f"It resets at 00:00 UTC. For uninterrupted access, contact "
            f"info@e-cloud.web.tr · ai@aigency.dev."
        )))
        return history, "", chat_id, ts_log, count

    if not API_TOKEN:
        history.append((prompt, PLACEHOLDER_MSG))
        return history, "", chat_id, ts_log, count

    # 5) Actual API call
    try:
        if not chat_id:
            if image is None:
                cid, answer = new_chat(prompt)
                chat_id = cid
            else:
                cid, _ = new_chat("Bir görsel inceleyeceksin.")
                chat_id = cid
                answer = send_with_image(chat_id, prompt, image)
        else:
            if image is None:
                answer = send_message(chat_id, prompt)
            else:
                answer = send_with_image(chat_id, prompt, image)
    except Exception as e:
        answer = f"Error: {e}"

    ts_log.append(now)
    history.append((prompt, answer))
    return history, "", chat_id, ts_log, count + 1


def reset_session():
    return [], "", "", deque(), 0


def make_leaderboard():
    return [(name, f"{val:.2%}", note) for name, val, note in BENCH]


# ── UI ─────────────────────────────────────────────────────────────
TR_INTRO = """
### 🇹🇷 AIGENCY V4 Demo

**128 milyar parametreli yerli yapay zekâ.** Türkçe okuma anlamada dünya
lideri, fen muhakemesi ve grade-school matematikte frontier seviyesinde.
KVKK-yerel, tam-bağımsız mimari.

Aşağıdaki sohbet kutusuna Türkçe veya İngilizce bir istem yazın; isteğe
bağlı olarak bir görsel ekleyin. Sonuçlar canlı API'den gelir.
"""

EN_INTRO = """
### 🇬🇧 AIGENCY V4 Demo

**128B-parameter sovereign AI**, world-leader on Turkish reading
comprehension and frontier-level on grade-school math and scientific
reasoning. KVKK-resident, fully sovereign architecture.

Enter a Turkish or English prompt below, optionally attach an image.
Responses are served live by the production API.
"""

with gr.Blocks(title="AIGENCY V4 Demo", theme=gr.themes.Soft()) as demo:
    gr.Markdown(f"# AIGENCY V4\n\n*{DEMO_BANNER}*")
    chat_id_state = gr.State("")
    ts_log_state = gr.State(lambda: deque())
    count_state = gr.State(0)

    with gr.Tab("Chat"):
        with gr.Row():
            with gr.Column(scale=1):
                gr.Markdown(EN_INTRO)
            with gr.Column(scale=1):
                gr.Markdown(TR_INTRO)
        chatbot = gr.Chatbot(height=420, label="Conversation", type="tuples")
        with gr.Row():
            with gr.Column(scale=4):
                msg = gr.Textbox(
                    label="Prompt",
                    placeholder="Type a question in Turkish or English…",
                    lines=2,
                )
            with gr.Column(scale=1):
                img = gr.Image(label="Optional image", type="pil", height=200)
        with gr.Row():
            send = gr.Button("Send", variant="primary")
            clear = gr.Button("New conversation")
        gr.Markdown(
            f"*Demo limits: ≤ {MAX_PROMPT_CHARS} chars/message · "
            f"{RATE_PER_MIN_PER_SESSION} msg/min · "
            f"{MAX_PER_SESSION} msg/session · "
            f"{DAILY_CAP_GLOBAL} requests/day globally. "
            f"For unlimited production access: info@e-cloud.web.tr · ai@aigency.dev*"
        )
        chat_inputs = [msg, img, chatbot, chat_id_state, ts_log_state, count_state]
        chat_outputs = [chatbot, msg, chat_id_state, ts_log_state, count_state]
        send.click(chat, chat_inputs, chat_outputs)
        msg.submit(chat, chat_inputs, chat_outputs)
        clear.click(reset_session, [], [chatbot, msg, chat_id_state, ts_log_state, count_state])

    with gr.Tab("Benchmark Leaderboard"):
        gr.Markdown(
            "## 22 benchmarks · 13,344 real API calls · Wilson 95% CI\n"
            "Methodology: temperature 0.0, deterministic seed=42, single "
            "session 27 April 2026.\n\n"
            "*See [the dataset](https://huggingface.co/datasets/aigencydev/aigency-v4-evaluation) "
            "for the full reproducibility capsule.*"
        )
        gr.Dataframe(
            headers=["Benchmark", "AIGENCY V4", "Position vs frontier"],
            value=make_leaderboard(),
            interactive=False,
            wrap=True,
        )

    with gr.Tab("About"):
        gr.Markdown("""
### About AIGENCY V4

AIGENCY V4 is the multimodal successor to AIGENCY V3, developed by
**eCloud Yazılım Teknolojileri**. Released to production in Q2 2026.

- **Architecture**: 120B sovereign decoder transformer + 8B vision encoder
- **Context**: 278K tokens (HBM 3-tier with TG-Decay)
- **Languages**: Turkish (primary), English
- **Licence**: API-only commercial — see https://aigency.dev/license

**Resources**

- 📄 Whitepaper (EN): https://github.com/ecloud-bh/aigency-v4-whitepaper/blob/main/AIGENCY-V4-Whitepaper-EN.pdf
- 📄 Whitepaper (TR): https://github.com/ecloud-bh/aigency-v4-whitepaper/blob/main/AIGENCY-V4-Whitepaper-TR.pdf
- 🤗 Model card: https://huggingface.co/aigencydev/AIGENCY-V4
- 📊 Evaluation dataset: https://huggingface.co/datasets/aigencydev/aigency-v4-evaluation
- 🔧 Benchmark code: https://github.com/ecloud-bh/aigency-benchmarks
- 🔗 Production API: https://aigency.dev

**Contact**

info@e-cloud.web.tr · ai@aigency.dev · © 2026 eCloud Yazılım Teknolojileri
""")

if __name__ == "__main__":
    demo.queue(max_size=8).launch()