import torch from transformers import AutoTokenizer, AutoModelForCausalLM import random import gradio as gr # Replace with your actual model path transformers_model_path = "AoEiuV020/MiniMind" # Load the tokenizer and model tokenizer = AutoTokenizer.from_pretrained(transformers_model_path) model = AutoModelForCausalLM.from_pretrained(transformers_model_path, trust_remote_code=True).eval() def setup_seed(seed): torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) random.seed(seed) def predict(prompt): messages = [] max_seq_len = 128 history_cnt = 0 model_mode = 2 setup_seed(random.randint(0, 2048)) messages = messages[-history_cnt:] if history_cnt else [] messages.append({"role": "user", "content": prompt}) new_prompt = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True )[-max_seq_len - 1:] if model_mode != 0 else (tokenizer.bos_token + prompt) with torch.no_grad(): x = torch.tensor(tokenizer(new_prompt)['input_ids'], device='cpu').unsqueeze(0) outputs = model.generate( x, eos_token_id=tokenizer.eos_token_id, max_new_tokens=max_seq_len, temperature=0.7, top_p=0.95, pad_token_id=tokenizer.pad_token_id ) return tokenizer.decode(outputs.squeeze()[x.shape[1]:].tolist(), skip_special_tokens=True) iface = gr.Interface( fn=predict, inputs=gr.Textbox(lines=2, placeholder="Enter your text here..."), outputs="text", title="MiniMind Chatbot", description="Enter text and see the model's response." ) iface.launch()