import os import gradio as gr import torch from transformers import AutoTokenizer, AutoModelForCausalLM MODEL_ID = os.getenv("MODEL_ID", "mistralai/Mistral-7B-Instruct-v0.3") HF_TOKEN = os.getenv("HF_TOKEN", None) tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=HF_TOKEN) model = AutoModelForCausalLM.from_pretrained( MODEL_ID, token=HF_TOKEN, torch_dtype="auto", device_map="auto", ) def draft_reply(subject, thread): system = ( "You are an email assistant. Draft a reply email.\n" "- Be clear and polite.\n" "- Ask up to 2 clarifying questions if needed.\n" "- Do not invent facts.\n" "- Output ONLY the email body.\n" ) user = f"Subject: {subject}\n\nEmail thread:\n{thread}\n\nWrite the reply now." messages = [{"role": "system", "content": system}, {"role": "user", "content": user}] prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) inputs = tokenizer(prompt, return_tensors="pt").to(model.device) with torch.no_grad(): outputs = model.generate(**inputs, max_new_tokens=250, temperature=0.4, do_sample=True) return tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True).strip() demo = gr.Interface( fn=draft_reply, inputs=[gr.Textbox(label="Subject"), gr.Textbox(label="Email Thread", lines=10)], outputs=gr.Textbox(label="Draft Reply", lines=12), title="Email Reply Drafting Assistant", ) demo.launch()