timecapsuleLLM / app.py
cat4laugh's picture
Update app.py
5f63a02 verified
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# 1. Setup the Model Name
model_name = "haykgrigorian/TimeCapsuleLLM-v2-llama-1.2B"
# 2. Load the Model and Tokenizer
print("Loading model... this usually takes 1-2 minutes on first run.")
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
# 3. Define the Generate Function
def generate_text(prompt, max_tokens=100, temperature=0.7):
# FIX: We added return_token_type_ids=False to stop the error
inputs = tokenizer(prompt, return_tensors="pt", return_token_type_ids=False)
# Generate
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=int(max_tokens),
temperature=float(temperature),
do_sample=True,
pad_token_id=tokenizer.eos_token_id
)
# Decode result
return tokenizer.decode(outputs[0], skip_special_tokens=True)
# 4. Launch the Gradio Interface
iface = gr.Interface(
fn=generate_text,
inputs=[
gr.Textbox(label="Prompt", placeholder="Enter your text here..."),
gr.Slider(minimum=10, maximum=300, value=100, label="Max New Tokens"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.7, label="Temperature")
],
outputs="text",
title="TimeCapsule LLM API",
description="API for n8n connection."
)
iface.launch()