import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer import torch model_name = "bharatgenai/AgriParam" print("Loading AgriParam model... This may take some time on free CPU") tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=False) model = AutoModelForCausalLM.from_pretrained( model_name, trust_remote_code=True, torch_dtype=torch.float16, # Changed to float16 (more compatible on CPU) device_map="cpu", low_cpu_mem_usage=True, attn_implementation="eager" # This helps avoid rope_scaling issues ) def chat(message, history): prompt = f" {message} " inputs = tokenizer(prompt, return_tensors="pt").to(model.device) outputs = model.generate( **inputs, max_new_tokens=400, temperature=0.7, do_sample=True, pad_token_id=tokenizer.eos_token_id, eos_token_id=tokenizer.eos_token_id, use_cache=False # Important for stability on CPU ) response = tokenizer.decode(outputs[0], skip_special_tokens=True) cleaned = response.split("")[-1].strip() return cleaned demo = gr.ChatInterface( fn=chat, title="ЁЯМ╛ AgriParam - Agriculture Assistant", description="Test version on free CPU тАв English, Hindi & Marathi", examples=[ ["рдорд╣рд╛рд░рд╛рд╖реНрдЯреНрд░рд╛рдд рдХрд╛рдкреВрд╕ рдкрд┐рдХрд╛рд╡рд░ рд▓рд╛рд▓ рдХрд┐рдбреАрдЪрд╛ рдЙрдкрд╛рдп рдХрд╛рдп рдЖрд╣реЗ?"], ["Best practices for organic wheat farming in Maharashtra?"], ["рдЙрддреНрддрд░ рдкреНрд░рджреЗрд╢ рдореЗрдВ рдЧреЗрд╣реВрдВ рдХреА рдЦреЗрддреА рдХреЗ рд▓рд┐рдП рдЬреИрд╡рд┐рдХ рдЦрд╛рдж рдХреИрд╕реЗ рдмрдирд╛рдПрдВ?"], ["рдорд╣рд╛рд░рд╛рд╖реНрдЯреНрд░рд╛рддреАрд▓ рд╕реЛрдпрд╛рдмреАрди рдкрд┐рдХрд╛рд▓рд╛ рдкрд┐рд╡рд│реА рдкрд╛рдиреЗ рдпреЗрдгреНрдпрд╛рдЪреЗ рдХрд╛рд░рдг рдЖрдгрд┐ рдЙрдкрд╛рдп?"] ] ) demo.launch()