startup-advisor / llm.py
sanjusanjay's picture
Deploy Startup Advisor (RAG + Phi-2 LoRA)
ca4ada9
raw
history blame contribute delete
953 Bytes
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
BASE_MODEL = "microsoft/phi-2"
LORA_MODEL = "sanjusanjay/phi-2-startup-advisor-lora"
def load_model():
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
tokenizer.pad_token = tokenizer.eos_token
base_model = AutoModelForCausalLM.from_pretrained(
BASE_MODEL,
torch_dtype=torch.float16,
device_map="auto"
)
model = PeftModel.from_pretrained(base_model, LORA_MODEL)
model.eval()
return model, tokenizer
def generate_response(prompt, model, tokenizer):
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=350,
temperature=0.4,
top_p=0.9,
do_sample=True
)
return tokenizer.decode(outputs[0], skip_special_tokens=True)