import torch import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM MODEL_NAME = "NeuraCraft/Lance-AI-V2" torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False print("Loading model...") tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, trust_remote_code=True ) model.eval() def generate(text): inputs = tokenizer(text, return_tensors="pt") with torch.no_grad(): output = model.generate( **inputs, max_new_tokens=100, do_sample=True, temperature=0.7, top_p=0.9, repetition_penalty=1.2, pad_token_id=tokenizer.eos_token_id ) return tokenizer.decode(output[0], skip_special_tokens=True) demo = gr.Interface( fn=generate, inputs=gr.Textbox(label="Input"), outputs=gr.Textbox(label="Lance AI"), title="Lance AI V2" ) demo.launch()