Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| import torch | |
| # Chargement conditionnel du modèle | |
| model = None | |
| def load_model(): | |
| global model | |
| if model is None: | |
| model = AutoModelForCausalLM.from_pretrained("salmapm/llama2_salma") | |
| model.half() # Utilisation de l'inference en mode FP16 | |
| return model | |
| # Génération de texte | |
| def generate_text(prompt): | |
| model = load_model() | |
| input_ids = tokenizer.encode(prompt, return_tensors="pt") | |
| output = model.generate(input_ids, max_length=50, num_return_sequences=1, do_sample=True, top_k=50, top_p=0.95, num_beams=5) | |
| result = tokenizer.decode(output[0], skip_special_tokens=True) | |
| torch.cuda.empty_cache() # Libération de la mémoire | |
| return result | |
| # Création de l'interface Gradio | |
| tokenizer = AutoTokenizer.from_pretrained("your-hugging-face-model-name") | |
| demo = gr.Interface( | |
| fn=generate_text, | |
| inputs="text", | |
| outputs="text", | |
| title="Mon modèle fine-tuné LLAMA2", | |
| description="Entrez un prompt et le modèle générera du texte." | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |