import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer import torch # 1. Setup the Model Name model_name = "haykgrigorian/TimeCapsuleLLM-v2-llama-1.2B" # 2. Load the Model and Tokenizer print("Loading model... this usually takes 1-2 minutes on first run.") tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) # 3. Define the Generate Function def generate_text(prompt, max_tokens=100, temperature=0.7): # FIX: We added return_token_type_ids=False to stop the error inputs = tokenizer(prompt, return_tensors="pt", return_token_type_ids=False) # Generate with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=int(max_tokens), temperature=float(temperature), do_sample=True, pad_token_id=tokenizer.eos_token_id ) # Decode result return tokenizer.decode(outputs[0], skip_special_tokens=True) # 4. Launch the Gradio Interface iface = gr.Interface( fn=generate_text, inputs=[ gr.Textbox(label="Prompt", placeholder="Enter your text here..."), gr.Slider(minimum=10, maximum=300, value=100, label="Max New Tokens"), gr.Slider(minimum=0.1, maximum=1.0, value=0.7, label="Temperature") ], outputs="text", title="TimeCapsule LLM API", description="API for n8n connection." ) iface.launch()