Spaces:
Sleeping
Sleeping
| import subprocess | |
| import sys | |
| import time | |
| from collections import defaultdict, deque | |
| # Otomatik kurulum | |
| def install_and_import(package): | |
| try: | |
| __import__(package) | |
| except ImportError: | |
| print(f"{package} is not installed, installing...") | |
| subprocess.check_call([sys.executable, "-m", "pip", "install", package]) | |
| install_and_import("gradio") | |
| install_and_import("transformers") | |
| install_and_import("torch") | |
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| import torch | |
| # === RATE LIMIT === | |
| click_logs = defaultdict(lambda: {"minute": deque(), "hour": deque(), "day": deque()}) | |
| LIMITS = {"minute": (13, 60), "hour": (90, 3600), "day": (1350, 86400)} | |
| def check_rate_limit(session_id): | |
| now = time.time() | |
| logs = click_logs[session_id] | |
| remaining, reset_times = {}, {} | |
| for key, (limit, interval) in LIMITS.items(): | |
| # Geçmiş istekleri temizle | |
| while logs[key] and now - logs[key][0] > interval: | |
| logs[key].popleft() | |
| used = len(logs[key]) | |
| remaining[key] = max(0, limit - used) | |
| reset_times[key] = int(interval - (now - logs[key][0]) if logs[key] else interval) | |
| if used >= limit: | |
| return False, f"⛔ {key.capitalize()} rate limit exceeded ({limit}/{key})", remaining, reset_times | |
| # Limit aşılmadıysa log'a şimdi ekle | |
| for key in LIMITS: | |
| logs[key].append(now) | |
| return True, None, remaining, reset_times | |
| # === CHAT ÜRETİM FONKSİYONU === | |
| def extract_response_between_tokens(text: str) -> str: | |
| start = "<|im_start|>assistant<|im_sep|>" | |
| end = "<|im_end|>" | |
| try: | |
| return text.split(start)[1].split(end)[0] | |
| except Exception: | |
| return text | |
| # Model yükleme | |
| model_name = "Bertug1911/BrtGPT-1-Pre" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForCausalLM.from_pretrained(model_name) | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| model.to(device) | |
| model.eval() | |
| # Özel token ID | |
| im_end_id = tokenizer.convert_tokens_to_ids("<|im_end|>") | |
| # Üretim fonksiyonu chat_generate | |
| def chat_generate(prompt, temperature, top_k, max_new_tokens, session_id): | |
| ok, msg, rem, resets = check_rate_limit(session_id) | |
| if not ok: | |
| return msg, format_status(rem, resets) | |
| # Jinja chat format | |
| messages = [{"role": "user", "content": prompt}] | |
| formatted = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | |
| inputs = tokenizer(formatted, return_tensors="pt").to(device) | |
| gen = inputs["input_ids"] | |
| # Döngüsel üretim | |
| for _ in range(int(max_new_tokens)): | |
| out = model(gen) | |
| logits = out.logits[:, -1, :] / float(temperature) | |
| if int(top_k) > 0: | |
| vals, idxs = torch.topk(logits, int(top_k)) | |
| filt = torch.full_like(logits, float('-inf')) | |
| filt.scatter_(1, idxs, vals) | |
| logits = filt | |
| probs = torch.softmax(logits, dim=-1) | |
| nxt = torch.multinomial(probs, num_samples=1) | |
| gen = torch.cat([gen, nxt], dim=1) | |
| if nxt.item() == im_end_id: | |
| break | |
| out_text = tokenizer.decode(gen[0], skip_special_tokens=False) | |
| # Format düzeltme | |
| no_sp = out_text.replace(" ", "").replace("Ġ", " ") | |
| formatted_out = no_sp.replace("Ċ", "\n") | |
| if not formatted_out.strip().endswith("<|im_end|>"): | |
| formatted_out += "<|im_end|>" | |
| resp = extract_response_between_tokens(formatted_out) | |
| return resp, format_status(rem, resets) | |
| # Durum metni formatlama | |
| def format_status(rem, resets): | |
| return "\n".join([f"🕒 {k.capitalize()}: {rem[k]} left — resets in {resets[k]} sec" for k in ["minute","hour","day"]]) | |
| # === UI === | |
| with gr.Blocks() as app: | |
| session_id = gr.State(str(time.time())) | |
| gr.Markdown(""" | |
| # 🤖 BrtGPT-1-Pre | |
| """ ) | |
| with gr.Row(): | |
| prompt = gr.Textbox(lines=3, placeholder="Enter your message...", label="Prompt") | |
| output = gr.Textbox(label="Response") | |
| with gr.Row(): | |
| temperature = gr.Slider(0.01,1.0,value=0.5,step=0.01,label="Temperature") | |
| top_k = gr.Slider(1,50,value=10,step=1,label="Top-K") | |
| max_new_tokens = gr.Slider(1,128,value=15,step=1,label="Max New Tokens") | |
| generate_button = gr.Button("Generate") | |
| status = gr.Markdown() | |
| generate_button.click( | |
| fn=chat_generate, | |
| inputs=[prompt, temperature, top_k, max_new_tokens, session_id], | |
| outputs=[output, status] | |
| ) | |
| app.launch() |