import torch import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM # Path to local model in the same repo (e.g., "Mixtral-8x7B-Instruct-v0.1" folder uploaded to Space) MODEL_DIR = "Mixtral-8x7B-Instruct-v0.1" # Load tokenizer and model tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained( MODEL_DIR, torch_dtype=torch.float16, device_map="auto", trust_remote_code=True ) # Generation function def generate_text(prompt): messages = [{"role": "user", "content": prompt}] inputs = tokenizer.apply_chat_template( messages, return_tensors="pt", add_generation_prompt=True ).to(model.device) output = model.generate( **inputs, max_new_tokens=300, temperature=0.7, top_p=0.95, do_sample=True, pad_token_id=tokenizer.eos_token_id ) decoded = tokenizer.decode(output[0], skip_special_tokens=True) if prompt in decoded: return decoded.split(prompt)[-1].strip() return decoded.strip() # Gradio interface demo = gr.Interface( fn=generate_text, inputs=gr.Textbox(lines=4, label="Enter your message (FR / AR / EN...)"), outputs="text", title="🧠 Mixtral 8x7B Instruct Chat", description="Multilingual response generation with Mistral Mixtral 8x7B Instruct model.", ) # Launch demo.launch()