Spaces:

aberbossio
/

llmchat

Sleeping

llmchat / app.py

Upload 3 files

7d64ae1 verified 3 months ago

1.56 kB

	import gradio as gr
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM

	MODEL_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

	tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_ID,
	torch_dtype=torch.float32
	)


	def build_prompt(message, history):
	prompt = ""
	for user_msg, bot_msg in history:
	prompt += f"<\|user\|>\n{user_msg}\n<\|assistant\|>\n{bot_msg}\n"
	prompt += f"<\|user\|>\n{message}\n<\|assistant\|>\n"
	return prompt


	def chat(message, history):
	prompt = build_prompt(message, history)

	inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)

	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=200,
	do_sample=True,
	temperature=0.7,
	top_p=0.9,
	repetition_penalty=1.1,
	pad_token_id=tokenizer.eos_token_id
	)

	full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
	answer = full_text[len(tokenizer.decode(inputs["input_ids"][0], skip_special_tokens=True)):].strip()

	if not answer:
	answer = "I am here. Ask me something."

	return answer


	demo = gr.ChatInterface(
	fn=chat,
	title="My Local LLM Chat",
	description="TinyLlama chatbot running locally without HF_TOKEN",
	examples=[
	"Hello",
	"Who are you?",
	"Explain pain in simple words",
	"Write a short Python code"
	]
	)

	if __name__ == "__main__":
	demo.launch()