Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
| import torch | |
| MODEL_ID = "sakthi54321/power_NLP" | |
| # Load model & tokenizer | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_ID, | |
| dtype=torch.float16 if torch.cuda.is_available() else torch.float32, | |
| device_map="auto" | |
| ) | |
| # Build pipeline (❌ no device arg here!) | |
| generator = pipeline( | |
| "text-generation", | |
| model=model, | |
| tokenizer=tokenizer | |
| ) | |
| # Chat function | |
| def chat_fn(message, history): | |
| outputs = generator( | |
| message, | |
| max_new_tokens=1000, | |
| temperature=0.7, | |
| top_p=0.9, | |
| do_sample=True | |
| ) | |
| reply = outputs[0]["generated_text"] | |
| return reply | |
| # Gradio Chat UI | |
| demo = gr.ChatInterface( | |
| fn=chat_fn, | |
| title="Power NLP - Qwen 0.5 Finetuned", | |
| description="Chat with my fine-tuned Qwen 0.5 model!" | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |