sseymens's picture
Update app.py
6596796 verified
import os, json, torch, gc
from transformers import AutoTokenizer, AutoModelForCausalLM
import gradio as gr
# Configuration
HF_TOKEN = os.environ.get("HF_TOKEN") # Securely load token from Hugging Face secrets
MODEL_ID = "Qwen/Qwen1.5-14B-Chat"
CACHE_DIR = "./qwen-cache"
MEMORY_FILE = os.path.join(CACHE_DIR, "chat_history.json")
os.makedirs(CACHE_DIR, exist_ok=True)
# Load tokenizer & model
tokenizer = AutoTokenizer.from_pretrained(
MODEL_ID,
cache_dir=CACHE_DIR,
token=HF_TOKEN,
trust_remote_code=True,
)
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
cache_dir=CACHE_DIR,
token=HF_TOKEN,
torch_dtype=torch.bfloat16,
device_map="auto",
trust_remote_code=True,
)
model.eval()
# Chat function
def chat_fn(message, history):
msgs = [{"role": "system", "content": "You are Obsidian, a helpful AI assistant."}]
for user_msg, assistant_msg in history:
msgs.append({"role": "user", "content": user_msg})
msgs.append({"role": "assistant", "content": assistant_msg})
msgs.append({"role": "user", "content": message})
prompt = tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
output_ids = model.generate(
inputs.input_ids,
max_new_tokens=32000,
eos_token_id=tokenizer.eos_token_id,
do_sample=True,
temperature=0.7,
top_p=0.9
)
generated_ids = output_ids[0][inputs.input_ids.shape[-1]:]
reply = tokenizer.decode(generated_ids, skip_special_tokens=True).strip()
updated_history = history + [(message, reply)]
with open(MEMORY_FILE, "w") as f:
json.dump(updated_history[-20:], f)
gc.collect()
torch.cuda.empty_cache()
return reply
# Launch Gradio app
gr.ChatInterface(
fn=chat_fn,
title="Obsidian Chatbot",
theme="soft",
).launch()