from flask import Flask, request, jsonify from transformers import AutoTokenizer, AutoModelForCausalLM import torch app = Flask(__name__) model_id = "google/gemma-3-1b-it" tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained( model_id, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, device_map="auto" ) def generate(prompt): inputs = tokenizer(prompt, return_tensors="pt").to(model.device) out = model.generate( **inputs, max_new_tokens=60, temperature=0.9, do_sample=True ) return tokenizer.decode(out[0], skip_special_tokens=True) @app.route("/narrate", methods=["POST"]) def narrate(): data = request.json prompt = f""" You are Myco, a mystical forest spirit narrator in a browser game. Respond in 1 short poetic sentence (max 20 words). Event: {data['event']} Score: {data.get('score', 0)} """ text = generate(prompt) return jsonify({"text": text}) app.run(port=5000)