| | from flask import Flask, request, render_template
|
| | import os
|
| | from transformers import AutoModelForCausalLM, AutoTokenizer
|
| | import torch
|
| |
|
| | app = Flask(__name__)
|
| |
|
| |
|
| | os.environ["CUDA_VISIBLE_DEVICES"] = ""
|
| | device = torch.device("cpu")
|
| |
|
| |
|
| | model_path = "./fine_tuned_model"
|
| | tokenizer_path = "./fine_tuned_model"
|
| |
|
| | try:
|
| | tokenizer = AutoTokenizer.from_pretrained(tokenizer_path, local_files_only=True)
|
| | model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float32, local_files_only=True)
|
| | model.to(device)
|
| | model.eval()
|
| | except Exception as e:
|
| | print(f"Error loading model or tokenizer: {e}")
|
| | exit(1)
|
| |
|
| |
|
| | if tokenizer.pad_token_id is None:
|
| | tokenizer.pad_token_id = tokenizer.eos_token_id
|
| |
|
| | @app.route("/", methods=["GET", "POST"])
|
| | def index():
|
| | generated_text = ""
|
| | if request.method == "POST":
|
| | prompt = request.form.get("prompt", "")
|
| | if prompt:
|
| | inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=128).to(device)
|
| | outputs = model.generate(
|
| | input_ids=inputs["input_ids"],
|
| | attention_mask=inputs["attention_mask"],
|
| | max_length=50,
|
| | num_return_sequences=1,
|
| | no_repeat_ngram_size=2,
|
| | do_sample=True,
|
| | top_k=50,
|
| | top_p=0.95,
|
| | temperature=0.7,
|
| | pad_token_id=tokenizer.eos_token_id
|
| | )
|
| | generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| | return render_template("index.html", generated_text=generated_text)
|
| |
|
| | if __name__ == "__main__":
|
| | app.run(debug=True) |