import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM # Load the tokenizer and model print("Loading tokenizer...") tokenizer = AutoTokenizer.from_pretrained("kolbeins/model") print("Tokenizer loaded.") print("Loading model...") model = AutoModelForCausalLM.from_pretrained("kolbeins/model") print("Model loaded.") def chat(input_txt): """ Function to generate a response using the model for the given input text. """ try: print("Tokenizing input...") # Tokenizing the input text, making sure to add special tokens if necessary inputs = tokenizer(input_txt, return_tensors="pt", padding=True, truncation=True, max_length=512) print(f"Tokenized inputs: {inputs}") print("Generating output...") # Generate the output using the model outputs = model.generate(**inputs) print(f"Generated output: {outputs}") print("Decoding output...") # Decode the output (the model generates token IDs, so we need to decode them back to text) response = tokenizer.decode(outputs[0], skip_special_tokens=True) print(f"Decoded response: {response}") # Return the generated response return response except Exception as e: print(f"Error during inference: {e}") return f"Error: {e}" # Define the Gradio interface for the chatbot demo = gr.Interface(fn=chat, inputs="text", outputs="text") # Launch the interface print("Launching Gradio interface...") demo.launch()