Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| MODEL_ID = os.getenv("MODEL_ID", "mistralai/Mistral-7B-Instruct-v0.3") | |
| HF_TOKEN = os.getenv("HF_TOKEN", None) | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=HF_TOKEN) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_ID, | |
| token=HF_TOKEN, | |
| torch_dtype="auto", | |
| device_map="auto", | |
| ) | |
| def draft_reply(subject, thread): | |
| system = ( | |
| "You are an email assistant. Draft a reply email.\n" | |
| "- Be clear and polite.\n" | |
| "- Ask up to 2 clarifying questions if needed.\n" | |
| "- Do not invent facts.\n" | |
| "- Output ONLY the email body.\n" | |
| ) | |
| user = f"Subject: {subject}\n\nEmail thread:\n{thread}\n\nWrite the reply now." | |
| messages = [{"role": "system", "content": system}, {"role": "user", "content": user}] | |
| prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | |
| inputs = tokenizer(prompt, return_tensors="pt").to(model.device) | |
| with torch.no_grad(): | |
| outputs = model.generate(**inputs, max_new_tokens=250, temperature=0.4, do_sample=True) | |
| return tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True).strip() | |
| demo = gr.Interface( | |
| fn=draft_reply, | |
| inputs=[gr.Textbox(label="Subject"), gr.Textbox(label="Email Thread", lines=10)], | |
| outputs=gr.Textbox(label="Draft Reply", lines=12), | |
| title="Email Reply Drafting Assistant", | |
| ) | |
| demo.launch() |