import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline import torch MODEL_ID = "sakthi54321/power_NLP" # Load model & tokenizer tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) model = AutoModelForCausalLM.from_pretrained( MODEL_ID, dtype=torch.float16 if torch.cuda.is_available() else torch.float32, device_map="auto" ) # Build pipeline (❌ no device arg here!) generator = pipeline( "text-generation", model=model, tokenizer=tokenizer ) # Chat function def chat_fn(message, history): outputs = generator( message, max_new_tokens=1000, temperature=0.7, top_p=0.9, do_sample=True ) reply = outputs[0]["generated_text"] return reply # Gradio Chat UI demo = gr.ChatInterface( fn=chat_fn, title="Power NLP - Qwen 0.5 Finetuned", description="Chat with my fine-tuned Qwen 0.5 model!" ) if __name__ == "__main__": demo.launch()