Spaces:
Running on Zero
Running on Zero
File size: 10,204 Bytes
87718ac 0798537 87718ac 0798537 79da83f 0798537 87718ac 0798537 87718ac 0798537 87718ac 0798537 87718ac 0798537 87718ac 0798537 87718ac 0798537 87718ac 0798537 87718ac 0798537 87718ac 79da83f 87718ac 0798537 87718ac 0798537 87718ac 79da83f 87718ac 79da83f 87718ac 79da83f 87718ac 79da83f 87718ac 53f5b2b 87718ac 0798537 87718ac 79da83f 87718ac 0798537 87718ac 0798537 87718ac 0798537 87718ac 0798537 87718ac 0798537 87718ac 0798537 87718ac 0798537 87718ac 0798537 87718ac 0798537 87718ac 0798537 87718ac 0798537 87718ac 0798537 87718ac 0798537 87718ac 0798537 87718ac 0798537 53f5b2b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 | """Tiny Aya — streaming multilingual chat, built for the Build Small Hackathon.
A gr.Server app: custom HTML/JS frontend (Cohere Labs + Build Small styling)
backed by Gradio's queue + ZeroGPU. The browser talks to the `/chat` route
through the Gradio JS client, so it streams token-by-token.
Deploy on Hugging Face Spaces:
- sdk: gradio (in README.md frontmatter)
- add HF_TOKEN as a Space secret (tiny-aya-global is a gated model)
- upload the logo file alongside this app: Cohere Labs-LockUp-Blue-CMYK.png
"""
import os
import threading
import torch
import gradio as gr
from fastapi.responses import HTMLResponse
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
try:
import spaces
_HAS_SPACES = True
except ImportError:
_HAS_SPACES = False
# --------------------------------------------------------------------------- #
# Model
# --------------------------------------------------------------------------- #
MODEL_ID = "CohereLabs/tiny-aya-global"
HF_TOKEN = os.environ.get("HF_TOKEN") # gated repo -> needs a token
device = "cuda" if torch.cuda.is_available() else "cpu"
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=HF_TOKEN)
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype="auto", token=HF_TOKEN)
model.to(device) # module-level: ZeroGPU fast-restore
def _stream(messages: list):
inputs = tokenizer.apply_chat_template(
messages,
add_generation_prompt=True,
tokenize=True,
return_dict=True,
return_tensors="pt",
).to(model.device)
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
gen_kwargs = dict(
**inputs,
max_new_tokens=512,
do_sample=True,
temperature=0.3,
streamer=streamer,
)
thread = threading.Thread(target=model.generate, kwargs=gen_kwargs)
thread.start()
acc = ""
for token in streamer:
acc += token
yield acc
thread.join()
# @spaces.GPU only on ZeroGPU; cap duration at 120s (the ZeroGPU max).
if _HAS_SPACES:
_stream = spaces.GPU(duration=120)(_stream)
# --------------------------------------------------------------------------- #
# Server
# --------------------------------------------------------------------------- #
server = gr.Server()
@server.get("/", response_class=HTMLResponse)
async def homepage() -> str:
return FRONTEND_HTML
@server.api(name="chat")
def chat_api(messages: list) -> str: # generator -> annotate with the YIELDED type
yield from _stream(messages)
# --------------------------------------------------------------------------- #
# Frontend
# --------------------------------------------------------------------------- #
BANNER_URL = "https://cdn-uploads.huggingface.co/production/uploads/60d2dc1007da9c17c72708f8/Z0dKQfn56SAMmjVQTEaA0.png"
COHERE_LOGO_URL = "https://cdn-uploads.huggingface.co/production/uploads/60d2dc1007da9c17c72708f8/fnuLx-qT2qzlYmEp6cszN.png"
FRONTEND_HTML = f"""
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Tiny Aya · Build Small Hackathon</title>
<style>
:root {{
--cream:#FAF6EF; --panel:#FFFFFF; --ink:#1B1B1B; --muted:#6B7280;
--blue:#4D6CCB; --blue-dark:#3F5BB8; --orange:#FF7A18;
--bot:#F2EDE3; --border:#E8E1D4;
}}
* {{ box-sizing:border-box; }}
body {{ margin:0; height:100vh; display:flex; flex-direction:column;
background:var(--cream); color:var(--ink);
font-family: ui-sans-serif, system-ui, -apple-system, "Segoe UI", Roboto, sans-serif; }}
/* brand bar — Cohere Labs front and center */
header {{ position:relative; display:flex; flex-direction:column; align-items:center;
gap:6px; padding:22px 22px 16px; background:var(--panel);
border-bottom:3px solid var(--orange); }}
header img.logo {{ height:56px; width:auto; }}
header .title {{ font-weight:700; font-size:18px; display:flex; align-items:center; gap:8px; }}
header .title .dot {{ width:8px; height:8px; border-radius:50%; background:var(--orange);
box-shadow:0 0 8px var(--orange); }}
header .sub {{ font-size:13px; color:var(--muted); }}
header .badge {{ position:absolute; right:18px; top:18px; font-size:12px; font-weight:600;
color:var(--blue); background:#EDF0FB; border:1px solid #D8DEF6;
padding:5px 11px; border-radius:999px; }}
/* chat — banner sits faded in the background */
#chat {{ flex:1; overflow-y:auto; padding:26px; display:flex; flex-direction:column;
gap:14px; max-width:840px; width:100%; margin:0 auto;
background-image:
linear-gradient(to bottom, rgba(250,246,239,.40), rgba(250,246,239,.97) 58%),
url("{BANNER_URL}");
background-repeat:no-repeat, no-repeat;
background-position:center top, center top;
background-size:100% 100%, 100% auto;
background-attachment:local, local; }}
.msg {{ max-width:78%; padding:11px 15px; border-radius:16px; line-height:1.55;
white-space:pre-wrap; word-wrap:break-word; font-size:15px; }}
.user {{ align-self:flex-end; background:var(--blue); color:#fff;
border-bottom-right-radius:5px; }}
.bot {{ align-self:flex-start; background:var(--bot); color:var(--ink);
border:1px solid var(--border); border-bottom-left-radius:5px; }}
.typing {{ color:var(--muted); font-style:italic; }}
/* empty state sits just BELOW the banner: the banner scales with the chat
column width (max 840px), so the top margin tracks the viewport width
and is clamped for wide screens. Tune 42vw / 380px to taste. */
.empty {{ margin:min(42vw, 380px) auto 0; text-align:center; color:var(--muted); max-width:420px; }}
.empty h2 {{ color:var(--ink); margin:0 0 6px; }}
/* composer */
form {{ display:flex; gap:10px; padding:16px 22px; background:var(--panel);
border-top:1px solid var(--border); max-width:840px; width:100%; margin:0 auto; }}
textarea {{ flex:1; resize:none; background:var(--cream); color:var(--ink);
border:1px solid var(--border); border-radius:12px; padding:12px 14px;
font-size:15px; font-family:inherit; max-height:160px; }}
textarea:focus {{ outline:none; border-color:var(--blue); box-shadow:0 0 0 3px #4D6CCB22; }}
button {{ background:var(--blue); color:#fff; border:none; border-radius:12px;
padding:0 24px; font-size:15px; font-weight:600; cursor:pointer; }}
button:hover:not(:disabled) {{ background:var(--blue-dark); }}
button:disabled {{ opacity:.5; cursor:not-allowed; }}
footer {{ text-align:center; font-size:11.5px; color:var(--muted); padding:8px; background:var(--panel); }}
footer a {{ color:var(--blue); text-decoration:none; }}
</style>
</head>
<body>
<header>
<img class="logo" src="{COHERE_LOGO_URL}" alt="Cohere Labs">
<div class="title">Tiny Aya <span class="dot"></span></div>
<div class="sub">Multilingual chat · 70+ languages</div>
<span class="badge">Build Small Hackathon</span>
</header>
<div id="chat">
<div class="empty" id="empty">
<h2>👋 Hola · नमस्ते · Bonjour · مرحبا</h2>
<p>Chat with <b>Tiny Aya</b>, Cohere Labs' 3.35B multilingual model. Ask in any of 70+ languages.</p>
</div>
</div>
<form id="form">
<textarea id="input" rows="1" placeholder="Message Tiny Aya… (Enter to send, Shift+Enter for newline)"></textarea>
<button id="send" type="submit">Send</button>
</form>
<footer>
Powered by <a href="https://huggingface.co/CohereLabs/tiny-aya-global" target="_blank">Cohere Labs · Tiny Aya</a>
+ <a href="https://www.gradio.app/guides/server-mode" target="_blank">gr.Server</a>
· built for the <a href="https://huggingface.co/build-small-hackathon" target="_blank">Build Small Hackathon</a>
</footer>
<script type="module">
import {{ Client }} from "https://cdn.jsdelivr.net/npm/@gradio/client/dist/index.min.js";
const chat = document.getElementById("chat");
const empty = document.getElementById("empty");
const form = document.getElementById("form");
const input = document.getElementById("input");
const send = document.getElementById("send");
const history = [];
const client = await Client.connect(window.location.origin);
const scroll = () => {{ chat.scrollTop = chat.scrollHeight; }};
function bubble(role, text, extra = "") {{
const el = document.createElement("div");
el.className = `msg ${{role === "user" ? "user" : "bot"}} ${{extra}}`;
el.textContent = text;
chat.appendChild(el);
scroll();
return el;
}}
input.addEventListener("input", () => {{
input.style.height = "auto";
input.style.height = input.scrollHeight + "px";
}});
input.addEventListener("keydown", (e) => {{
if (e.key === "Enter" && !e.shiftKey) {{ e.preventDefault(); form.requestSubmit(); }}
}});
form.addEventListener("submit", async (e) => {{
e.preventDefault();
const text = input.value.trim();
if (!text) return;
if (empty) empty.remove();
bubble("user", text);
history.push({{ role: "user", content: text }});
input.value = ""; input.style.height = "auto";
send.disabled = true; input.disabled = true;
const botEl = bubble("assistant", "▍", "typing");
let full = "";
try {{
const job = client.submit("/chat", {{ messages: history }});
for await (const msg of job) {{
if (msg.type === "data") {{
full = msg.data[0];
botEl.classList.remove("typing");
botEl.textContent = full;
scroll();
}}
}}
history.push({{ role: "assistant", content: full }});
}} catch (err) {{
botEl.classList.remove("typing");
botEl.textContent = "⚠️ " + err;
console.error(err);
}} finally {{
send.disabled = false; input.disabled = false; input.focus();
}}
}});
</script>
</body>
</html>
"""
if __name__ == "__main__":
server.launch(server_name="0.0.0.0", server_port=7860, show_error=True) |