Spaces:
Runtime error
Runtime error
File size: 1,122 Bytes
4945658 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# Chargement conditionnel du modèle
model = None
def load_model():
global model
if model is None:
model = AutoModelForCausalLM.from_pretrained("salmapm/llama2_salma")
model.half() # Utilisation de l'inference en mode FP16
return model
# Génération de texte
def generate_text(prompt):
model = load_model()
input_ids = tokenizer.encode(prompt, return_tensors="pt")
output = model.generate(input_ids, max_length=50, num_return_sequences=1, do_sample=True, top_k=50, top_p=0.95, num_beams=5)
result = tokenizer.decode(output[0], skip_special_tokens=True)
torch.cuda.empty_cache() # Libération de la mémoire
return result
# Création de l'interface Gradio
tokenizer = AutoTokenizer.from_pretrained("your-hugging-face-model-name")
demo = gr.Interface(
fn=generate_text,
inputs="text",
outputs="text",
title="Mon modèle fine-tuné LLAMA2",
description="Entrez un prompt et le modèle générera du texte."
)
if __name__ == "__main__":
demo.launch() |