Spaces:
Sleeping
Sleeping
| import os | |
| import torch | |
| from flask import Flask, render_template, request, redirect, url_for | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig | |
| from peft import PeftModel | |
| # --- Load Model & Tokenizer --- | |
| base_model_name = "unsloth/llama-3.2-3b-bnb-4bit" | |
| adapter_model_name = "aismaanly/ai_synthetic" | |
| bnb_config = BitsAndBytesConfig( | |
| load_in_4bit=True, | |
| bnb_4bit_quant_type="nf4", | |
| bnb_4bit_use_double_quant=True, | |
| bnb_4bit_compute_dtype=torch.bfloat16 | |
| ) | |
| print("Loading base model...") | |
| model = AutoModelForCausalLM.from_pretrained( | |
| base_model_name, | |
| quantization_config=bnb_config, | |
| device_map="auto" | |
| ) | |
| print("Loading tokenizer...") | |
| tokenizer = AutoTokenizer.from_pretrained(base_model_name) | |
| print("Loading PEFT adapter...") | |
| model = PeftModel.from_pretrained(model, adapter_model_name) | |
| model = model.merge_and_unload() | |
| print("Model ready!") | |
| # --- Flask App --- | |
| app = Flask(__name__) | |
| def index(): | |
| return render_template("index.html") | |
| def generate(): | |
| prompt = request.form["prompt"] | |
| inputs = tokenizer(prompt, return_tensors="pt").to(model.device) | |
| outputs = model.generate(**inputs, max_new_tokens=100) | |
| text = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| return redirect(url_for("result", generated_text=text)) | |
| def result(): | |
| generated_text = request.args.get("generated_text", "") | |
| return render_template("result.html", generated_text=generated_text) | |
| if __name__ == "__main__": | |
| port = int(os.environ.get("PORT", 7860)) | |
| app.run(host="0.0.0.0", port=port) |