Spaces:

MoEprometheus
/

Prometheus-chat

Sleeping

App Files Files Community

Prometheus-chat / app.py

MoEprometheus

Update app.py

4b42203 verified 2 months ago

raw

history blame contribute delete

4.77 kB

	import os
	import jax
	import jax.numpy as jnp
	import flax.linen as nn
	import pickle
	import numpy as np
	import gradio as gr
	from huggingface_hub import hf_hub_download

	HF_REPO = "MoEprometheus/Prometheus-base"

	print("📥 Загружаем Prometheus...")
	path = hf_hub_download(HF_REPO, "expert1.pkl")
	with open(path, "rb") as f:
	ckpt = pickle.load(f)

	itos = ckpt["vocab"]
	stoi = {v: k for k, v in itos.items()}
	CONFIG = ckpt["config"]
	encode = lambda s: [stoi.get(c, 0) for c in s]
	decode = lambda l: "".join([itos.get(i, "") for i in l])
	print(f"✅ Загружено — шаг {ckpt['step']}")

	class PrometheusAttention(nn.Module):
	n_heads: int
	n_embed: int
	block_size: int
	dropout: float
	@nn.compact
	def __call__(self, x, deterministic=True):
	B, T, C = x.shape
	head_size = self.n_embed // self.n_heads
	qkv = nn.Dense(3 * self.n_embed, use_bias=False)(x)
	q, k, v = jnp.split(qkv, 3, axis=-1)
	q = q.reshape(B, T, self.n_heads, head_size).transpose(0, 2, 1, 3)
	k = k.reshape(B, T, self.n_heads, head_size).transpose(0, 2, 1, 3)
	v = v.reshape(B, T, self.n_heads, head_size).transpose(0, 2, 1, 3)
	att = (q @ k.transpose(0, 1, 3, 2)) * (head_size ** -0.5)
	mask = jnp.tril(jnp.ones((T, T)))
	att = jnp.where(mask == 0, -1e9, att)
	att = jax.nn.softmax(att, axis=-1)
	att = nn.Dropout(self.dropout)(att, deterministic=deterministic)
	out = (att @ v).transpose(0, 2, 1, 3).reshape(B, T, C)
	out = nn.Dense(self.n_embed)(out)
	return nn.Dropout(self.dropout)(out, deterministic=deterministic)

	class PrometheusMLP(nn.Module):
	n_embed: int
	dropout: float
	@nn.compact
	def __call__(self, x, deterministic=True):
	x = nn.Dense(4 * self.n_embed)(x)
	x = nn.gelu(x)
	x = nn.Dense(self.n_embed)(x)
	return nn.Dropout(self.dropout)(x, deterministic=deterministic)

	class PrometheusBlock(nn.Module):
	n_embed: int
	n_heads: int
	block_size: int
	dropout: float
	@nn.compact
	def __call__(self, x, deterministic=True):
	x = x + PrometheusAttention(
	self.n_heads, self.n_embed,
	self.block_size, self.dropout
	)(nn.LayerNorm()(x), deterministic)
	x = x + PrometheusMLP(
	self.n_embed, self.dropout
	)(nn.LayerNorm()(x), deterministic)
	return x

	class Prometheus(nn.Module):
	vocab_size: int
	n_embed: int
	n_heads: int
	n_layers: int
	block_size: int
	dropout: float
	@nn.compact
	def __call__(self, idx, training=False):
	B, T = idx.shape
	tok = nn.Embed(self.vocab_size, self.n_embed)(idx)
	pos = nn.Embed(self.block_size, self.n_embed)(jnp.arange(T))
	x = nn.Dropout(self.dropout)(tok + pos, deterministic=True)
	BlockRemat = nn.remat(PrometheusBlock, static_argnums=(2,))
	for _ in range(self.n_layers):
	x = BlockRemat(
	self.n_embed, self.n_heads,
	self.block_size, self.dropout
	)(x, True)
	return nn.Dense(self.vocab_size)(nn.LayerNorm()(x))

	model = Prometheus(
	vocab_size = CONFIG["vocab_size"],
	n_embed = CONFIG["n_embed"],
	n_heads = CONFIG["n_heads"],
	n_layers = CONFIG["n_layers"],
	block_size = CONFIG["block_size"],
	dropout = CONFIG["dropout"],
	)

	params = ckpt["params"]

	def generate(prompt, max_new_tokens=80, temperature=1.1):
	tokens = encode(prompt)
	tokens = tokens[-(CONFIG["block_size"]-1):]
	for _ in range(max_new_tokens):
	x = jnp.array([tokens])
	logits = model.apply(params, x, training=False)
	logits = logits[0, -1, :] / temperature
	top_k = 40
	top_k_logits, top_k_indices = jax.lax.top_k(logits, top_k)
	probs = jax.nn.softmax(top_k_logits)
	chosen = int(jax.random.categorical(
	jax.random.PRNGKey(np.random.randint(0, 99999)),
	jnp.log(probs)
	))
	next_token = int(top_k_indices[chosen])
	tokens.append(next_token)
	return decode(tokens)

	def chat(message, history):
	result = generate(message, max_new_tokens=80, temperature=1.1)
	# Убираем промпт из ответа
	if len(result) > len(message):
	answer = result[len(message):]
	else:
	answer = result
	return answer.strip()

	demo = gr.ChatInterface(
	fn=chat,
	title="🔥 Prometheus AI",
	description="Языковая модель 1.2B параметров. Создана с нуля одним человеком.",
	examples=[
	"Москва —",
	"Россия — это",
	"Нейронная сеть — это",
	"Python — язык",
	],
	)

	demo.launch()