import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

MODEL_NAME = "tiiuae/falcon-7b-instruct"

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype="auto")

# If your model doesn't define a pad token, you can use the eos token instead:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
if model.config.pad_token_id is None:
    model.config.pad_token_id = tokenizer.eos_token_id

# Create a text-generation pipeline
text_gen = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=512,
    truncation=True,  # <-- Explicitly enable truncation
    do_sample=True,
    temperature=0.7
)

def chat(user_input):
    outputs = text_gen(
        user_input, 
        max_length=512,
        truncation=True  # <-- Also ensure truncation is True here
    )
    return outputs[0]["generated_text"]

demo = gr.Interface(
    fn=chat,
    inputs="text",
    outputs="text",
    title="Falcon-7B-Instruct Chat (Example)",
    description="A chat interface for Falcon-7B-Instruct."
)

if __name__ == "__main__":
    demo.launch()