File size: 10,204 Bytes
87718ac
 
 
 
 
 
 
 
 
 
 
0798537
87718ac
0798537
 
79da83f
0798537
 
 
 
 
 
 
 
87718ac
 
 
 
 
0798537
 
87718ac
 
 
0798537
 
 
 
 
 
 
 
 
 
 
87718ac
0798537
 
 
 
 
 
 
 
 
 
 
87718ac
0798537
87718ac
0798537
 
 
87718ac
0798537
 
 
87718ac
 
 
0798537
 
 
 
 
 
 
 
 
87718ac
0798537
 
 
87718ac
 
 
 
 
79da83f
 
87718ac
0798537
 
 
 
 
87718ac
0798537
87718ac
 
 
 
 
 
 
 
 
79da83f
 
 
 
 
 
87718ac
 
79da83f
 
 
87718ac
79da83f
87718ac
79da83f
 
 
 
 
 
 
 
87718ac
 
 
 
 
 
 
53f5b2b
 
 
 
87718ac
 
 
 
 
 
 
 
 
 
 
 
 
 
0798537
 
 
87718ac
79da83f
 
 
87718ac
 
 
 
 
 
 
 
0798537
 
 
 
87718ac
 
 
 
 
0798537
87718ac
0798537
87718ac
0798537
 
 
87718ac
0798537
87718ac
 
0798537
87718ac
0798537
 
 
 
87718ac
 
0798537
 
87718ac
 
 
 
 
0798537
 
 
87718ac
0798537
87718ac
0798537
 
 
 
87718ac
 
 
 
0798537
 
 
 
87718ac
 
 
 
0798537
 
 
87718ac
0798537
87718ac
 
0798537
 
 
 
 
 
53f5b2b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
"""Tiny Aya — streaming multilingual chat, built for the Build Small Hackathon.
A gr.Server app: custom HTML/JS frontend (Cohere Labs + Build Small styling)
backed by Gradio's queue + ZeroGPU. The browser talks to the `/chat` route
through the Gradio JS client, so it streams token-by-token.
Deploy on Hugging Face Spaces:
  - sdk: gradio  (in README.md frontmatter)
  - add HF_TOKEN as a Space secret (tiny-aya-global is a gated model)
  - upload the logo file alongside this app:  Cohere Labs-LockUp-Blue-CMYK.png
"""

import os
import threading

import torch
import gradio as gr
from fastapi.responses import HTMLResponse
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer

try:
    import spaces
    _HAS_SPACES = True
except ImportError:
    _HAS_SPACES = False

# --------------------------------------------------------------------------- #
# Model
# --------------------------------------------------------------------------- #
MODEL_ID = "CohereLabs/tiny-aya-global"
HF_TOKEN = os.environ.get("HF_TOKEN")          # gated repo -> needs a token
device = "cuda" if torch.cuda.is_available() else "cpu"

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=HF_TOKEN)
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype="auto", token=HF_TOKEN)
model.to(device)                                # module-level: ZeroGPU fast-restore


def _stream(messages: list):
    inputs = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        tokenize=True,
        return_dict=True,
        return_tensors="pt",
    ).to(model.device)

    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
    gen_kwargs = dict(
        **inputs,
        max_new_tokens=512,
        do_sample=True,
        temperature=0.3,
        streamer=streamer,
    )
    thread = threading.Thread(target=model.generate, kwargs=gen_kwargs)
    thread.start()

    acc = ""
    for token in streamer:
        acc += token
        yield acc
    thread.join()


# @spaces.GPU only on ZeroGPU; cap duration at 120s (the ZeroGPU max).
if _HAS_SPACES:
    _stream = spaces.GPU(duration=120)(_stream)

# --------------------------------------------------------------------------- #
# Server
# --------------------------------------------------------------------------- #
server = gr.Server()


@server.get("/", response_class=HTMLResponse)
async def homepage() -> str:
    return FRONTEND_HTML


@server.api(name="chat")
def chat_api(messages: list) -> str:        # generator -> annotate with the YIELDED type
    yield from _stream(messages)


# --------------------------------------------------------------------------- #
# Frontend
# --------------------------------------------------------------------------- #
BANNER_URL = "https://cdn-uploads.huggingface.co/production/uploads/60d2dc1007da9c17c72708f8/Z0dKQfn56SAMmjVQTEaA0.png"

COHERE_LOGO_URL = "https://cdn-uploads.huggingface.co/production/uploads/60d2dc1007da9c17c72708f8/fnuLx-qT2qzlYmEp6cszN.png"

FRONTEND_HTML = f"""
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Tiny Aya · Build Small Hackathon</title>
<style>
  :root {{
    --cream:#FAF6EF; --panel:#FFFFFF; --ink:#1B1B1B; --muted:#6B7280;
    --blue:#4D6CCB; --blue-dark:#3F5BB8; --orange:#FF7A18;
    --bot:#F2EDE3; --border:#E8E1D4;
  }}
  * {{ box-sizing:border-box; }}
  body {{ margin:0; height:100vh; display:flex; flex-direction:column;
         background:var(--cream); color:var(--ink);
         font-family: ui-sans-serif, system-ui, -apple-system, "Segoe UI", Roboto, sans-serif; }}
  /* brand bar — Cohere Labs front and center */
  header {{ position:relative; display:flex; flex-direction:column; align-items:center;
           gap:6px; padding:22px 22px 16px; background:var(--panel);
           border-bottom:3px solid var(--orange); }}
  header img.logo {{ height:56px; width:auto; }}
  header .title {{ font-weight:700; font-size:18px; display:flex; align-items:center; gap:8px; }}
  header .title .dot {{ width:8px; height:8px; border-radius:50%; background:var(--orange);
                       box-shadow:0 0 8px var(--orange); }}
  header .sub {{ font-size:13px; color:var(--muted); }}
  header .badge {{ position:absolute; right:18px; top:18px; font-size:12px; font-weight:600;
                  color:var(--blue); background:#EDF0FB; border:1px solid #D8DEF6;
                  padding:5px 11px; border-radius:999px; }}
  /* chat — banner sits faded in the background */
  #chat {{ flex:1; overflow-y:auto; padding:26px; display:flex; flex-direction:column;
          gap:14px; max-width:840px; width:100%; margin:0 auto;
          background-image:
            linear-gradient(to bottom, rgba(250,246,239,.40), rgba(250,246,239,.97) 58%),
            url("{BANNER_URL}");
          background-repeat:no-repeat, no-repeat;
          background-position:center top, center top;
          background-size:100% 100%, 100% auto;
          background-attachment:local, local; }}
  .msg {{ max-width:78%; padding:11px 15px; border-radius:16px; line-height:1.55;
         white-space:pre-wrap; word-wrap:break-word; font-size:15px; }}
  .user {{ align-self:flex-end; background:var(--blue); color:#fff;
          border-bottom-right-radius:5px; }}
  .bot  {{ align-self:flex-start; background:var(--bot); color:var(--ink);
          border:1px solid var(--border); border-bottom-left-radius:5px; }}
  .typing {{ color:var(--muted); font-style:italic; }}
  /* empty state sits just BELOW the banner: the banner scales with the chat
     column width (max 840px), so the top margin tracks the viewport width
     and is clamped for wide screens. Tune 42vw / 380px to taste. */
  .empty {{ margin:min(42vw, 380px) auto 0; text-align:center; color:var(--muted); max-width:420px; }}
  .empty h2 {{ color:var(--ink); margin:0 0 6px; }}
  /* composer */
  form {{ display:flex; gap:10px; padding:16px 22px; background:var(--panel);
         border-top:1px solid var(--border); max-width:840px; width:100%; margin:0 auto; }}
  textarea {{ flex:1; resize:none; background:var(--cream); color:var(--ink);
             border:1px solid var(--border); border-radius:12px; padding:12px 14px;
             font-size:15px; font-family:inherit; max-height:160px; }}
  textarea:focus {{ outline:none; border-color:var(--blue); box-shadow:0 0 0 3px #4D6CCB22; }}
  button {{ background:var(--blue); color:#fff; border:none; border-radius:12px;
           padding:0 24px; font-size:15px; font-weight:600; cursor:pointer; }}
  button:hover:not(:disabled) {{ background:var(--blue-dark); }}
  button:disabled {{ opacity:.5; cursor:not-allowed; }}
  footer {{ text-align:center; font-size:11.5px; color:var(--muted); padding:8px; background:var(--panel); }}
  footer a {{ color:var(--blue); text-decoration:none; }}
</style>
</head>
<body>
  <header>
    <img class="logo" src="{COHERE_LOGO_URL}" alt="Cohere Labs">
    <div class="title">Tiny Aya <span class="dot"></span></div>
    <div class="sub">Multilingual chat · 70+ languages</div>
    <span class="badge">Build Small Hackathon</span>
  </header>
  <div id="chat">
    <div class="empty" id="empty">
      <h2>👋 Hola · नमस्ते · Bonjour · مرحبا</h2>
      <p>Chat with <b>Tiny Aya</b>, Cohere Labs' 3.35B multilingual model. Ask in any of 70+ languages.</p>
    </div>
  </div>
  <form id="form">
    <textarea id="input" rows="1" placeholder="Message Tiny Aya…  (Enter to send, Shift+Enter for newline)"></textarea>
    <button id="send" type="submit">Send</button>
  </form>
  <footer>
    Powered by <a href="https://huggingface.co/CohereLabs/tiny-aya-global" target="_blank">Cohere Labs · Tiny Aya</a>
    + <a href="https://www.gradio.app/guides/server-mode" target="_blank">gr.Server</a>
    · built for the <a href="https://huggingface.co/build-small-hackathon" target="_blank">Build Small Hackathon</a>
  </footer>
<script type="module">
  import {{ Client }} from "https://cdn.jsdelivr.net/npm/@gradio/client/dist/index.min.js";
  const chat   = document.getElementById("chat");
  const empty  = document.getElementById("empty");
  const form   = document.getElementById("form");
  const input  = document.getElementById("input");
  const send   = document.getElementById("send");
  const history = [];
  const client = await Client.connect(window.location.origin);
  const scroll = () => {{ chat.scrollTop = chat.scrollHeight; }};
  function bubble(role, text, extra = "") {{
    const el = document.createElement("div");
    el.className = `msg ${{role === "user" ? "user" : "bot"}} ${{extra}}`;
    el.textContent = text;
    chat.appendChild(el);
    scroll();
    return el;
  }}
  input.addEventListener("input", () => {{
    input.style.height = "auto";
    input.style.height = input.scrollHeight + "px";
  }});
  input.addEventListener("keydown", (e) => {{
    if (e.key === "Enter" && !e.shiftKey) {{ e.preventDefault(); form.requestSubmit(); }}
  }});
  form.addEventListener("submit", async (e) => {{
    e.preventDefault();
    const text = input.value.trim();
    if (!text) return;
    if (empty) empty.remove();
    bubble("user", text);
    history.push({{ role: "user", content: text }});
    input.value = ""; input.style.height = "auto";
    send.disabled = true; input.disabled = true;
    const botEl = bubble("assistant", "▍", "typing");
    let full = "";
    try {{
      const job = client.submit("/chat", {{ messages: history }});
      for await (const msg of job) {{
        if (msg.type === "data") {{
          full = msg.data[0];
          botEl.classList.remove("typing");
          botEl.textContent = full;
          scroll();
        }}
      }}
      history.push({{ role: "assistant", content: full }});
    }} catch (err) {{
      botEl.classList.remove("typing");
      botEl.textContent = "⚠️ " + err;
      console.error(err);
    }} finally {{
      send.disabled = false; input.disabled = false; input.focus();
    }}
  }});
</script>
</body>
</html>
"""

if __name__ == "__main__":
    server.launch(server_name="0.0.0.0", server_port=7860, show_error=True)