Spaces:

MoEprometheus
/

Prometheus-chat

Sleeping

App Files Files Community

MoEprometheus commited on Mar 23

Commit

7bf134f

verified ·

1 Parent(s): 6923a28

Create app.py

Browse files

Files changed (1) hide show

app.py +145 -0

app.py ADDED Viewed

	@@ -0,0 +1,145 @@

+import os
+import jax
+import jax.numpy as jnp
+import flax.linen as nn
+import pickle
+import numpy as np
+import gradio as gr
+from huggingface_hub import hf_hub_download
+HF_REPO = "MoEprometheus/Prometheus-base"
+print("📥 Загружаем Prometheus...")
+path = hf_hub_download(HF_REPO, "expert1.pkl")
+with open(path, "rb") as f:
+    ckpt = pickle.load(f)
+itos   = ckpt["vocab"]
+stoi   = {v: k for k, v in itos.items()}
+CONFIG = ckpt["config"]
+encode = lambda s: [stoi.get(c, 0) for c in s]
+decode = lambda l: "".join([itos.get(i, "") for i in l])
+print(f"✅ Загружено — шаг {ckpt['step']}")
+class PrometheusAttention(nn.Module):
+    n_heads: int
+    n_embed: int
+    block_size: int
+    dropout: float
+    @nn.compact
+    def __call__(self, x, deterministic=True):
+        B, T, C = x.shape
+        head_size = self.n_embed // self.n_heads
+        qkv = nn.Dense(3 * self.n_embed, use_bias=False)(x)
+        q, k, v = jnp.split(qkv, 3, axis=-1)
+        q = q.reshape(B, T, self.n_heads, head_size).transpose(0, 2, 1, 3)
+        k = k.reshape(B, T, self.n_heads, head_size).transpose(0, 2, 1, 3)
+        v = v.reshape(B, T, self.n_heads, head_size).transpose(0, 2, 1, 3)
+        att = (q @ k.transpose(0, 1, 3, 2)) * (head_size ** -0.5)
+        mask = jnp.tril(jnp.ones((T, T)))
+        att = jnp.where(mask == 0, -1e9, att)
+        att = jax.nn.softmax(att, axis=-1)
+        att = nn.Dropout(self.dropout)(att, deterministic=deterministic)
+        out = (att @ v).transpose(0, 2, 1, 3).reshape(B, T, C)
+        out = nn.Dense(self.n_embed)(out)
+        return nn.Dropout(self.dropout)(out, deterministic=deterministic)
+class PrometheusMLP(nn.Module):
+    n_embed: int
+    dropout: float
+    @nn.compact
+    def __call__(self, x, deterministic=True):
+        x = nn.Dense(4 * self.n_embed)(x)
+        x = nn.gelu(x)
+        x = nn.Dense(self.n_embed)(x)
+        return nn.Dropout(self.dropout)(x, deterministic=deterministic)
+class PrometheusBlock(nn.Module):
+    n_embed: int
+    n_heads: int
+    block_size: int
+    dropout: float
+    @nn.compact
+    def __call__(self, x, deterministic=True):
+        x = x + PrometheusAttention(
+            self.n_heads, self.n_embed,
+            self.block_size, self.dropout
+        )(nn.LayerNorm()(x), deterministic)
+        x = x + PrometheusMLP(
+            self.n_embed, self.dropout
+        )(nn.LayerNorm()(x), deterministic)
+        return x
+class Prometheus(nn.Module):
+    vocab_size: int
+    n_embed: int
+    n_heads: int
+    n_layers: int
+    block_size: int
+    dropout: float
+    @nn.compact
+    def __call__(self, idx, training=False):
+        B, T = idx.shape
+        tok = nn.Embed(self.vocab_size, self.n_embed)(idx)
+        pos = nn.Embed(self.block_size, self.n_embed)(jnp.arange(T))
+        x = nn.Dropout(self.dropout)(tok + pos, deterministic=True)
+        BlockRemat = nn.remat(PrometheusBlock, static_argnums=(2,))
+        for _ in range(self.n_layers):
+            x = BlockRemat(
+                self.n_embed, self.n_heads,
+                self.block_size, self.dropout
+            )(x, True)
+        return nn.Dense(self.vocab_size)(nn.LayerNorm()(x))
+model = Prometheus(
+    vocab_size = CONFIG["vocab_size"],
+    n_embed    = CONFIG["n_embed"],
+    n_heads    = CONFIG["n_heads"],
+    n_layers   = CONFIG["n_layers"],
+    block_size = CONFIG["block_size"],
+    dropout    = CONFIG["dropout"],
+)
+params = ckpt["params"]
+def generate(prompt, max_new_tokens=80, temperature=1.1):
+    tokens = encode(prompt)
+    tokens = tokens[-(CONFIG["block_size"]-1):]
+    for _ in range(max_new_tokens):
+        x = jnp.array([tokens])
+        logits = model.apply(params, x, training=False)
+        logits = logits[0, -1, :] / temperature
+        top_k = 40
+        top_k_logits, top_k_indices = jax.lax.top_k(logits, top_k)
+        probs = jax.nn.softmax(top_k_logits)
+        chosen = int(jax.random.categorical(
+            jax.random.PRNGKey(np.random.randint(0, 99999)),
+            jnp.log(probs)
+        ))
+        next_token = int(top_k_indices[chosen])
+        tokens.append(next_token)
+    return decode(tokens)
+def chat(message, history):
+    result = generate(message, max_new_tokens=80, temperature=1.1)
+    # Убираем промпт из ответа
+    if len(result) > len(message):
+        answer = result[len(message):]
+    else:
+        answer = result
+    return answer.strip()
+demo = gr.ChatInterface(
+    fn=chat,
+    title="🔥 Prometheus AI",
+    description="Языковая модель 1.2B параметров. Создана с нуля одним человеком.",
+    examples=[
+        "Москва —",
+        "Россия — это",
+        "Нейронная сеть — это",
+        "Python — язык",
+    ],
+    theme="soft",
+)
+demo.launch()