| from flask import Flask, request, render_template
|
| from transformers import AutoModelForCausalLM, AutoTokenizer
|
| import torch
|
|
|
| app = Flask(__name__)
|
|
|
|
|
| model_path = "./qwen2.5_1.5b_model"
|
| tokenizer = AutoTokenizer.from_pretrained(model_path)
|
| model = AutoModelForCausalLM.from_pretrained(
|
| model_path,
|
| device_map="cpu",
|
| torch_dtype=torch.float16,
|
| trust_remote_code=True
|
| )
|
|
|
| @app.route("/", methods=["GET", "POST"])
|
| def index():
|
| if request.method == "POST":
|
| prompt = request.form["prompt"]
|
| inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=50).to("cpu")
|
| outputs = model.generate(
|
| inputs.input_ids,
|
| attention_mask=inputs.attention_mask,
|
| max_length=100,
|
| num_return_sequences=1,
|
| do_sample=True,
|
| top_k=50,
|
| top_p=0.9
|
| )
|
| response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| return render_template("index.html", response=response, prompt=prompt)
|
| return render_template("index.html", response=None, prompt=None)
|
|
|
| if __name__ == "__main__":
|
| app.run(debug=True) |