Spaces:

rammurmu
/

RunAshChat

Build error

App Files Files Community

RunAshChat / app.py

rammurmu

Create app.py (#3)

7648e04 verified 4 months ago

raw

history blame contribute delete

3.3 kB

	import gradio as gr
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import torch

	# --- MODEL CONFIG ---
	MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3" # You can swap for Llama 3, Qwen, etc.
	# For better speed on free GPUs, use a quantized version like:
	# MODEL_NAME = "TheBloke/Mistral-7B-Instruct-v0.3-GGUF" # GGUF + llama.cpp (requires different loader)
	# But for simplicity & HF compatibility, we'll use the HF version with 4-bit quantization

	# Load tokenizer and model with 4-bit quantization for low-memory usage
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_NAME,
	device_map="auto",
	torch_dtype=torch.float16,
	load_in_4bit=True,
	trust_remote_code=True,
	)

	# System prompt to guide behavior (like HuggingChat)
	SYSTEM_PROMPT = """You are RunAshChat, a helpful, honest, and harmless AI assistant.
	You are open-source, privacy-respecting, and do not store any user data.
	Answer clearly, concisely, and thoughtfully. Avoid harmful, unethical, or biased content.
	If you don't know something, say so."""

	def format_prompt(message, history):
	# Format for Mistral-Instruct: [INST] prompt [/INST]
	full_prompt = f"<s>[INST] {SYSTEM_PROMPT}\n\n"
	for user_msg, bot_msg in history:
	full_prompt += f"{user_msg} [/INST] {bot_msg}</s><s>[INST] "
	full_prompt += f"{message} [/INST]"
	return full_prompt

	def respond(message, history):
	prompt = format_prompt(message, history)
	inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=512,
	temperature=0.7,
	top_p=0.9,
	repetition_penalty=1.1,
	do_sample=True,
	pad_token_id=tokenizer.eos_token_id,
	)

	response = tokenizer.decode(outputs[0], skip_special_tokens=True)
	# Extract only the assistant's reply (after last [/INST])
	response = response.split("[/INST]")[-1].strip()
	return response

	# --- GRADIO INTERFACE ---
	with gr.Blocks(theme=gr.themes.Soft(), title="RunAshChat") as demo:
	gr.Markdown("""
	# 🚀 RunAshChat
	Your open-source, privacy-first AI chat companion — inspired by HuggingChat.
	""")

	chatbot = gr.Chatbot(
	height=600,
	bubble_full_width=False,
	avatar_images=(None, "https://huggingface.co/datasets/huggingface/branding/resolve/main/huggingface-logo.svg")
	)

	msg = gr.Textbox(
	placeholder="Ask me anything... (e.g., 'Explain quantum computing like I'm 10')",
	label="Your message",
	container=False
	)

	with gr.Row():
	clear = gr.Button("🧹 Clear Chat")
	export = gr.Button("💾 Export Chat")

	def clear_chat():
	return None, ""

	def export_chat(chat_history):
	if not chat_history:
	return "No conversation to export."
	export_text = "\n\n".join([f"👤 You: {q}\n🤖 RunAshChat: {a}" for q, a in chat_history])
	return export_text

	msg.submit(respond, [msg, chatbot], [chatbot])
	clear.click(clear_chat, None, [chatbot, msg])
	export.click(export_chat, chatbot, gr.Textbox(label="Exported Chat", lines=15))

	demo.launch()