Spaces:
Sleeping
Sleeping
File size: 4,494 Bytes
1dd5b4a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
import subprocess
import sys
import time
from collections import defaultdict, deque
# Otomatik kurulum
def install_and_import(package):
try:
__import__(package)
except ImportError:
print(f"{package} is not installed, installing...")
subprocess.check_call([sys.executable, "-m", "pip", "install", package])
install_and_import("gradio")
install_and_import("transformers")
install_and_import("torch")
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
# === RATE LIMIT ===
click_logs = defaultdict(lambda: {"minute": deque(), "hour": deque(), "day": deque()})
LIMITS = {"minute": (13, 60), "hour": (90, 3600), "day": (1350, 86400)}
def check_rate_limit(session_id):
now = time.time()
logs = click_logs[session_id]
remaining, reset_times = {}, {}
for key, (limit, interval) in LIMITS.items():
# Geçmiş istekleri temizle
while logs[key] and now - logs[key][0] > interval:
logs[key].popleft()
used = len(logs[key])
remaining[key] = max(0, limit - used)
reset_times[key] = int(interval - (now - logs[key][0]) if logs[key] else interval)
if used >= limit:
return False, f"⛔ {key.capitalize()} rate limit exceeded ({limit}/{key})", remaining, reset_times
# Limit aşılmadıysa log'a şimdi ekle
for key in LIMITS:
logs[key].append(now)
return True, None, remaining, reset_times
# === CHAT ÜRETİM FONKSİYONU ===
def extract_response_between_tokens(text: str) -> str:
start = "<|im_start|>assistant<|im_sep|>"
end = "<|im_end|>"
try:
return text.split(start)[1].split(end)[0]
except Exception:
return text
# Model yükleme
model_name = "Bertug1911/BrtGPT-1-Pre"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()
# Özel token ID
im_end_id = tokenizer.convert_tokens_to_ids("<|im_end|>")
# Üretim fonksiyonu chat_generate
def chat_generate(prompt, temperature, top_k, max_new_tokens, session_id):
ok, msg, rem, resets = check_rate_limit(session_id)
if not ok:
return msg, format_status(rem, resets)
# Jinja chat format
messages = [{"role": "user", "content": prompt}]
formatted = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(formatted, return_tensors="pt").to(device)
gen = inputs["input_ids"]
# Döngüsel üretim
for _ in range(int(max_new_tokens)):
out = model(gen)
logits = out.logits[:, -1, :] / float(temperature)
if int(top_k) > 0:
vals, idxs = torch.topk(logits, int(top_k))
filt = torch.full_like(logits, float('-inf'))
filt.scatter_(1, idxs, vals)
logits = filt
probs = torch.softmax(logits, dim=-1)
nxt = torch.multinomial(probs, num_samples=1)
gen = torch.cat([gen, nxt], dim=1)
if nxt.item() == im_end_id:
break
out_text = tokenizer.decode(gen[0], skip_special_tokens=False)
# Format düzeltme
no_sp = out_text.replace(" ", "").replace("Ġ", " ")
formatted_out = no_sp.replace("Ċ", "\n")
if not formatted_out.strip().endswith("<|im_end|>"):
formatted_out += "<|im_end|>"
resp = extract_response_between_tokens(formatted_out)
return resp, format_status(rem, resets)
# Durum metni formatlama
def format_status(rem, resets):
return "\n".join([f"🕒 {k.capitalize()}: {rem[k]} left — resets in {resets[k]} sec" for k in ["minute","hour","day"]])
# === UI ===
with gr.Blocks() as app:
session_id = gr.State(str(time.time()))
gr.Markdown("""
# 🤖 BrtGPT-1-Pre
""" )
with gr.Row():
prompt = gr.Textbox(lines=3, placeholder="Enter your message...", label="Prompt")
output = gr.Textbox(label="Response")
with gr.Row():
temperature = gr.Slider(0.01,1.0,value=0.5,step=0.01,label="Temperature")
top_k = gr.Slider(1,50,value=10,step=1,label="Top-K")
max_new_tokens = gr.Slider(1,128,value=15,step=1,label="Max New Tokens")
generate_button = gr.Button("Generate")
status = gr.Markdown()
generate_button.click(
fn=chat_generate,
inputs=[prompt, temperature, top_k, max_new_tokens, session_id],
outputs=[output, status]
)
app.launch() |