# app.py import os import torch from transformers import AutoModelForCausalLM, AutoTokenizer import gradio as gr MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.1" # Optional: get token from environment HF_TOKEN = os.getenv("HF_TOKEN") # make sure you added this secret in HF Space # Load model & tokenizer tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=HF_TOKEN) model = AutoModelForCausalLM.from_pretrained( MODEL_ID, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, device_map="auto", token=HF_TOKEN ) def chat_with_mistral(prompt, history=None): history = history or [] # Build conversation prompt manually full_prompt = "" for user_text, bot_text in history: full_prompt += f"User: {user_text}\nAssistant: {bot_text}\n" full_prompt += f"User: {prompt}\nAssistant:" inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device) with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=200, do_sample=True, top_p=0.95, temperature=0.7, pad_token_id=tokenizer.eos_token_id ) decoded = tokenizer.decode(outputs[0], skip_special_tokens=True) # Extract only the assistant reply answer = decoded.split("Assistant:")[-1].strip() history.append((prompt, answer)) return answer, history with gr.Blocks(title="Mistral Chatbot") as app: gr.Markdown("# 🐦 Mistral Chatbot (with HF Token)") chatbot = gr.Chatbot() msg = gr.Textbox(label="Your message") clear = gr.Button("Clear") conversation_state = gr.State([]) def handle_user_message(user_message, history): answer, new_history = chat_with_mistral(user_message, history) return "", new_history msg.submit(handle_user_message, [msg, conversation_state], [msg, chatbot, conversation_state]) clear.click(lambda: ([], []), inputs=None, outputs=[chatbot, conversation_state]) if __name__ == "__main__": app.launch()