Spaces:

nitya001
/

newingtest

Sleeping

App Files Files Community

newingtest / app.py

nitya001

Create app.py

a3ec0fc verified about 2 months ago

raw

history blame contribute delete

2.41 kB

	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM
	from peft import PeftModel
	import gradio as gr

	# --------------------
	# Model setup
	# --------------------
	BASE_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
	LORA_REPO = "nitya001/autotrain-oa5ez-0dtoc"

	device = "cuda" if torch.cuda.is_available() else "cpu"
	dtype = torch.float16 if device == "cuda" else torch.float32

	print("Loading tokenizer...")
	tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)

	print("Loading base model...")
	base_model = AutoModelForCausalLM.from_pretrained(
	BASE_MODEL,
	torch_dtype=dtype,
	device_map="auto" if device == "cuda" else None,
	)

	print("Loading LoRA adapter:", LORA_REPO)
	model = PeftModel.from_pretrained(base_model, LORA_REPO)
	model.to(device)
	model.eval()

	# Generic system prompt
	SYSTEM_PROMPT = (
	"You are a helpful AI assistant. "
	"Answer clearly, accurately, and concisely. "
	"If you do not know something, say so honestly."
	)

	# --------------------
	# Generation function
	# --------------------
	def generate_reply(message: str, history: list):

	conversation = f"<\|system\|>{SYSTEM_PROMPT}</s>\n"

	if history:
	for msg in history:
	role = msg.get("role", "user")
	content = msg.get("content", "")
	if role == "user":
	conversation += f"<\|user\|>{content}</s>\n"
	elif role == "assistant":
	conversation += f"<\|assistant\|>{content}</s>\n"

	conversation += f"<\|user\|>{message}</s>\n<\|assistant\|>"

	inputs = tokenizer(
	conversation,
	return_tensors="pt",
	truncation=True,
	max_length=2048,
	).to(device)

	with torch.no_grad():
	output_ids = model.generate(
	**inputs,
	max_new_tokens=256,
	temperature=0.7,
	top_p=0.9,
	do_sample=True,
	pad_token_id=tokenizer.eos_token_id,
	)

	generated_ids = output_ids[0][inputs["input_ids"].shape[-1]:]
	answer = tokenizer.decode(generated_ids, skip_special_tokens=True).strip()

	if not answer:
	answer = "I'm not sure how to answer that. Could you rephrase?"

	return answer

	# --------------------
	# Gradio UI
	# --------------------
	demo = gr.ChatInterface(
	fn=generate_reply,
	title="Custom AI Assistant",
	description="Ask anything.",
	)

	if __name__ == "__main__":
	demo.launch()