from huggingface_hub import snapshot_download from transformers import AutoModelForCausalLM, AutoTokenizer from flask import Flask, request, jsonify import torch app = Flask(__name__) # Carregue o modelo e o tokenizador model_id = "gpt2" device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = AutoModelForCausalLM.from_pretrained(model_id).to(device) tokenizer = AutoTokenizer.from_pretrained(model_id) @app.route("/generate", methods=["POST"]) def generate(): data = request.get_json() prompt = data.get("prompt", "") # Tokenize a entrada inputs = tokenizer.encode(prompt, return_tensors="pt").to(device) # Gere a resposta usando o modelo outputs = model.generate(inputs, max_length=100, temperature=0.7) # Decodifique a resposta response = tokenizer.decode(outputs[0], skip_special_tokens=True) return jsonify(response) if __name__ == "__main__": app.run(debug=True)