import os import gradio as gr import torch MODEL_ID = "SatyamSinghal/taskmind-1.1b-chat-lora" HF_TOKEN = os.getenv("HF_TOKEN") pipe = None def load_model(): global pipe if pipe is not None: return from peft import AutoPeftModelForCausalLM from transformers import AutoTokenizer, pipeline print("Loading tokenizer...") tokenizer = AutoTokenizer.from_pretrained( MODEL_ID, token=HF_TOKEN, ) print("Loading model...") model = AutoPeftModelForCausalLM.from_pretrained( MODEL_ID, token=HF_TOKEN, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, low_cpu_mem_usage=True, ) pipe = pipeline( "text-generation", model=model, tokenizer=tokenizer, ) print("Model loaded successfully.") def respond(message, history): try: load_model() except Exception as e: return f"❌ Model failed to load: {str(e)}" messages = [] for user_msg, assistant_msg in history: if user_msg: messages.append({"role": "user", "content": user_msg}) if assistant_msg: messages.append({"role": "assistant", "content": assistant_msg}) messages.append({"role": "user", "content": message}) result = pipe( messages, max_new_tokens=256, do_sample=True, temperature=0.7, top_p=0.9, ) generated = result[0]["generated_text"] if isinstance(generated, list): return generated[-1]["content"] return str(generated) demo = gr.ChatInterface( fn=respond, title="TaskMind Interface", description="Chat with the TaskMind LoRA model.", examples=[ "Who are you?", "@Satyam fix the growstreams deck ASAP NO Delay", "done bhai, merged the PR", "login page 60% ho gaya", "getting 500 error on registration", ], ) if __name__ == "__main__": demo.launch()