import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline MODEL_NAME = "tiiuae/falcon-7b-instruct" # Load tokenizer and model tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype="auto") # If your model doesn't define a pad token, you can use the eos token instead: if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token if model.config.pad_token_id is None: model.config.pad_token_id = tokenizer.eos_token_id # Create a text-generation pipeline text_gen = pipeline( "text-generation", model=model, tokenizer=tokenizer, max_length=512, truncation=True, # <-- Explicitly enable truncation do_sample=True, temperature=0.7 ) def chat(user_input): outputs = text_gen( user_input, max_length=512, truncation=True # <-- Also ensure truncation is True here ) return outputs[0]["generated_text"] demo = gr.Interface( fn=chat, inputs="text", outputs="text", title="Falcon-7B-Instruct Chat (Example)", description="A chat interface for Falcon-7B-Instruct." ) if __name__ == "__main__": demo.launch()