MiniMindTest / app.py
AoEiuV020's picture
Update app.py
7ec3982 verified
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import random
import gradio as gr
# Replace with your actual model path
transformers_model_path = "AoEiuV020/MiniMind"
# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(transformers_model_path)
model = AutoModelForCausalLM.from_pretrained(transformers_model_path, trust_remote_code=True).eval()
def setup_seed(seed):
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
random.seed(seed)
def predict(prompt):
messages = []
max_seq_len = 128
history_cnt = 0
model_mode = 2
setup_seed(random.randint(0, 2048))
messages = messages[-history_cnt:] if history_cnt else []
messages.append({"role": "user", "content": prompt})
new_prompt = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)[-max_seq_len - 1:] if model_mode != 0 else (tokenizer.bos_token + prompt)
with torch.no_grad():
x = torch.tensor(tokenizer(new_prompt)['input_ids'], device='cpu').unsqueeze(0)
outputs = model.generate(
x,
eos_token_id=tokenizer.eos_token_id,
max_new_tokens=max_seq_len,
temperature=0.7,
top_p=0.95,
pad_token_id=tokenizer.pad_token_id
)
return tokenizer.decode(outputs.squeeze()[x.shape[1]:].tolist(), skip_special_tokens=True)
iface = gr.Interface(
fn=predict,
inputs=gr.Textbox(lines=2, placeholder="Enter your text here..."),
outputs="text",
title="MiniMind Chatbot",
description="Enter text and see the model's response."
)
iface.launch()