ysharma's picture
ysharma HF Staff
Update app.py
53f5b2b verified
"""Tiny Aya — streaming multilingual chat, built for the Build Small Hackathon.
A gr.Server app: custom HTML/JS frontend (Cohere Labs + Build Small styling)
backed by Gradio's queue + ZeroGPU. The browser talks to the `/chat` route
through the Gradio JS client, so it streams token-by-token.
Deploy on Hugging Face Spaces:
- sdk: gradio (in README.md frontmatter)
- add HF_TOKEN as a Space secret (tiny-aya-global is a gated model)
- upload the logo file alongside this app: Cohere Labs-LockUp-Blue-CMYK.png
"""
import os
import threading
import torch
import gradio as gr
from fastapi.responses import HTMLResponse
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
try:
import spaces
_HAS_SPACES = True
except ImportError:
_HAS_SPACES = False
# --------------------------------------------------------------------------- #
# Model
# --------------------------------------------------------------------------- #
MODEL_ID = "CohereLabs/tiny-aya-global"
HF_TOKEN = os.environ.get("HF_TOKEN") # gated repo -> needs a token
device = "cuda" if torch.cuda.is_available() else "cpu"
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=HF_TOKEN)
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype="auto", token=HF_TOKEN)
model.to(device) # module-level: ZeroGPU fast-restore
def _stream(messages: list):
inputs = tokenizer.apply_chat_template(
messages,
add_generation_prompt=True,
tokenize=True,
return_dict=True,
return_tensors="pt",
).to(model.device)
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
gen_kwargs = dict(
**inputs,
max_new_tokens=512,
do_sample=True,
temperature=0.3,
streamer=streamer,
)
thread = threading.Thread(target=model.generate, kwargs=gen_kwargs)
thread.start()
acc = ""
for token in streamer:
acc += token
yield acc
thread.join()
# @spaces.GPU only on ZeroGPU; cap duration at 120s (the ZeroGPU max).
if _HAS_SPACES:
_stream = spaces.GPU(duration=120)(_stream)
# --------------------------------------------------------------------------- #
# Server
# --------------------------------------------------------------------------- #
server = gr.Server()
@server.get("/", response_class=HTMLResponse)
async def homepage() -> str:
return FRONTEND_HTML
@server.api(name="chat")
def chat_api(messages: list) -> str: # generator -> annotate with the YIELDED type
yield from _stream(messages)
# --------------------------------------------------------------------------- #
# Frontend
# --------------------------------------------------------------------------- #
BANNER_URL = "https://cdn-uploads.huggingface.co/production/uploads/60d2dc1007da9c17c72708f8/Z0dKQfn56SAMmjVQTEaA0.png"
COHERE_LOGO_URL = "https://cdn-uploads.huggingface.co/production/uploads/60d2dc1007da9c17c72708f8/fnuLx-qT2qzlYmEp6cszN.png"
FRONTEND_HTML = f"""
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Tiny Aya · Build Small Hackathon</title>
<style>
:root {{
--cream:#FAF6EF; --panel:#FFFFFF; --ink:#1B1B1B; --muted:#6B7280;
--blue:#4D6CCB; --blue-dark:#3F5BB8; --orange:#FF7A18;
--bot:#F2EDE3; --border:#E8E1D4;
}}
* {{ box-sizing:border-box; }}
body {{ margin:0; height:100vh; display:flex; flex-direction:column;
background:var(--cream); color:var(--ink);
font-family: ui-sans-serif, system-ui, -apple-system, "Segoe UI", Roboto, sans-serif; }}
/* brand bar — Cohere Labs front and center */
header {{ position:relative; display:flex; flex-direction:column; align-items:center;
gap:6px; padding:22px 22px 16px; background:var(--panel);
border-bottom:3px solid var(--orange); }}
header img.logo {{ height:56px; width:auto; }}
header .title {{ font-weight:700; font-size:18px; display:flex; align-items:center; gap:8px; }}
header .title .dot {{ width:8px; height:8px; border-radius:50%; background:var(--orange);
box-shadow:0 0 8px var(--orange); }}
header .sub {{ font-size:13px; color:var(--muted); }}
header .badge {{ position:absolute; right:18px; top:18px; font-size:12px; font-weight:600;
color:var(--blue); background:#EDF0FB; border:1px solid #D8DEF6;
padding:5px 11px; border-radius:999px; }}
/* chat — banner sits faded in the background */
#chat {{ flex:1; overflow-y:auto; padding:26px; display:flex; flex-direction:column;
gap:14px; max-width:840px; width:100%; margin:0 auto;
background-image:
linear-gradient(to bottom, rgba(250,246,239,.40), rgba(250,246,239,.97) 58%),
url("{BANNER_URL}");
background-repeat:no-repeat, no-repeat;
background-position:center top, center top;
background-size:100% 100%, 100% auto;
background-attachment:local, local; }}
.msg {{ max-width:78%; padding:11px 15px; border-radius:16px; line-height:1.55;
white-space:pre-wrap; word-wrap:break-word; font-size:15px; }}
.user {{ align-self:flex-end; background:var(--blue); color:#fff;
border-bottom-right-radius:5px; }}
.bot {{ align-self:flex-start; background:var(--bot); color:var(--ink);
border:1px solid var(--border); border-bottom-left-radius:5px; }}
.typing {{ color:var(--muted); font-style:italic; }}
/* empty state sits just BELOW the banner: the banner scales with the chat
column width (max 840px), so the top margin tracks the viewport width
and is clamped for wide screens. Tune 42vw / 380px to taste. */
.empty {{ margin:min(42vw, 380px) auto 0; text-align:center; color:var(--muted); max-width:420px; }}
.empty h2 {{ color:var(--ink); margin:0 0 6px; }}
/* composer */
form {{ display:flex; gap:10px; padding:16px 22px; background:var(--panel);
border-top:1px solid var(--border); max-width:840px; width:100%; margin:0 auto; }}
textarea {{ flex:1; resize:none; background:var(--cream); color:var(--ink);
border:1px solid var(--border); border-radius:12px; padding:12px 14px;
font-size:15px; font-family:inherit; max-height:160px; }}
textarea:focus {{ outline:none; border-color:var(--blue); box-shadow:0 0 0 3px #4D6CCB22; }}
button {{ background:var(--blue); color:#fff; border:none; border-radius:12px;
padding:0 24px; font-size:15px; font-weight:600; cursor:pointer; }}
button:hover:not(:disabled) {{ background:var(--blue-dark); }}
button:disabled {{ opacity:.5; cursor:not-allowed; }}
footer {{ text-align:center; font-size:11.5px; color:var(--muted); padding:8px; background:var(--panel); }}
footer a {{ color:var(--blue); text-decoration:none; }}
</style>
</head>
<body>
<header>
<img class="logo" src="{COHERE_LOGO_URL}" alt="Cohere Labs">
<div class="title">Tiny Aya <span class="dot"></span></div>
<div class="sub">Multilingual chat · 70+ languages</div>
<span class="badge">Build Small Hackathon</span>
</header>
<div id="chat">
<div class="empty" id="empty">
<h2>👋 Hola · नमस्ते · Bonjour · مرحبا</h2>
<p>Chat with <b>Tiny Aya</b>, Cohere Labs' 3.35B multilingual model. Ask in any of 70+ languages.</p>
</div>
</div>
<form id="form">
<textarea id="input" rows="1" placeholder="Message Tiny Aya… (Enter to send, Shift+Enter for newline)"></textarea>
<button id="send" type="submit">Send</button>
</form>
<footer>
Powered by <a href="https://huggingface.co/CohereLabs/tiny-aya-global" target="_blank">Cohere Labs · Tiny Aya</a>
+ <a href="https://www.gradio.app/guides/server-mode" target="_blank">gr.Server</a>
· built for the <a href="https://huggingface.co/build-small-hackathon" target="_blank">Build Small Hackathon</a>
</footer>
<script type="module">
import {{ Client }} from "https://cdn.jsdelivr.net/npm/@gradio/client/dist/index.min.js";
const chat = document.getElementById("chat");
const empty = document.getElementById("empty");
const form = document.getElementById("form");
const input = document.getElementById("input");
const send = document.getElementById("send");
const history = [];
const client = await Client.connect(window.location.origin);
const scroll = () => {{ chat.scrollTop = chat.scrollHeight; }};
function bubble(role, text, extra = "") {{
const el = document.createElement("div");
el.className = `msg ${{role === "user" ? "user" : "bot"}} ${{extra}}`;
el.textContent = text;
chat.appendChild(el);
scroll();
return el;
}}
input.addEventListener("input", () => {{
input.style.height = "auto";
input.style.height = input.scrollHeight + "px";
}});
input.addEventListener("keydown", (e) => {{
if (e.key === "Enter" && !e.shiftKey) {{ e.preventDefault(); form.requestSubmit(); }}
}});
form.addEventListener("submit", async (e) => {{
e.preventDefault();
const text = input.value.trim();
if (!text) return;
if (empty) empty.remove();
bubble("user", text);
history.push({{ role: "user", content: text }});
input.value = ""; input.style.height = "auto";
send.disabled = true; input.disabled = true;
const botEl = bubble("assistant", "▍", "typing");
let full = "";
try {{
const job = client.submit("/chat", {{ messages: history }});
for await (const msg of job) {{
if (msg.type === "data") {{
full = msg.data[0];
botEl.classList.remove("typing");
botEl.textContent = full;
scroll();
}}
}}
history.push({{ role: "assistant", content: full }});
}} catch (err) {{
botEl.classList.remove("typing");
botEl.textContent = "⚠️ " + err;
console.error(err);
}} finally {{
send.disabled = false; input.disabled = false; input.focus();
}}
}});
</script>
</body>
</html>
"""
if __name__ == "__main__":
server.launch(server_name="0.0.0.0", server_port=7860, show_error=True)