Spaces:

obsidianai-tech
/

obsidian-chatbot

Runtime error

App Files Files Community

obsidian-chatbot / app.py

sseymens

Update app.py

6596796 verified 10 months ago

raw

history blame contribute delete

1.92 kB

	import os, json, torch, gc
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import gradio as gr

	# Configuration
	HF_TOKEN = os.environ.get("HF_TOKEN") # Securely load token from Hugging Face secrets
	MODEL_ID = "Qwen/Qwen1.5-14B-Chat"
	CACHE_DIR = "./qwen-cache"
	MEMORY_FILE = os.path.join(CACHE_DIR, "chat_history.json")
	os.makedirs(CACHE_DIR, exist_ok=True)

	# Load tokenizer & model
	tokenizer = AutoTokenizer.from_pretrained(
	MODEL_ID,
	cache_dir=CACHE_DIR,
	token=HF_TOKEN,
	trust_remote_code=True,
	)

	model = AutoModelForCausalLM.from_pretrained(
	MODEL_ID,
	cache_dir=CACHE_DIR,
	token=HF_TOKEN,
	torch_dtype=torch.bfloat16,
	device_map="auto",
	trust_remote_code=True,
	)
	model.eval()

	# Chat function
	def chat_fn(message, history):
	msgs = [{"role": "system", "content": "You are Obsidian, a helpful AI assistant."}]
	for user_msg, assistant_msg in history:
	msgs.append({"role": "user", "content": user_msg})
	msgs.append({"role": "assistant", "content": assistant_msg})
	msgs.append({"role": "user", "content": message})

	prompt = tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True)
	inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

	output_ids = model.generate(
	inputs.input_ids,
	max_new_tokens=32000,
	eos_token_id=tokenizer.eos_token_id,
	do_sample=True,
	temperature=0.7,
	top_p=0.9
	)

	generated_ids = output_ids[0][inputs.input_ids.shape[-1]:]
	reply = tokenizer.decode(generated_ids, skip_special_tokens=True).strip()

	updated_history = history + [(message, reply)]
	with open(MEMORY_FILE, "w") as f:
	json.dump(updated_history[-20:], f)

	gc.collect()
	torch.cuda.empty_cache()
	return reply

	# Launch Gradio app
	gr.ChatInterface(
	fn=chat_fn,
	title="Obsidian Chatbot",
	theme="soft",
	).launch()