Spaces:

NeoPy
/

anycoder-0dc45528

Runtime error

App Files Files Community

anycoder-0dc45528 / app.py

NeoPy

Update app.py

8a0db6c verified 13 days ago

raw

history blame contribute delete

13 kB


	import gradio as gr
	from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
	import torch
	from threading import Thread
	import re

	# Model configuration - using a smaller model that works well on CPU
	MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

	# Global variables for model and tokenizer
	model = None
	tokenizer = None

	def load_model():
	"""Load the model and tokenizer"""
	global model, tokenizer

	if model is None:
	print("Loading model... This may take a moment on CPU.")
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_NAME,
	torch_dtype=torch.float32,
	device_map="cpu",
	low_cpu_mem_usage=True
	)
	print("Model loaded successfully!")

	return model, tokenizer

	# Default system prompts
	SYSTEM_PROMPTS = {
	"Default Assistant": "You are a helpful, harmless, and honest AI assistant. Provide clear, accurate, and thoughtful responses.",
	"Creative Writer": "You are a creative writing assistant. Help users with storytelling, poetry, and imaginative content. Be expressive and artistic.",
	"Code Helper": "You are an expert programmer. Help users write, debug, and understand code. Provide clear explanations and best practices.",
	"Socratic Teacher": "You are a Socratic teacher. Instead of giving direct answers, guide users to discover answers through thoughtful questions.",
	"Friendly Chat": "You are a friendly conversational partner. Be warm, engaging, and personable. Use casual language and show genuine interest.",
	"Custom": ""
	}

	def format_chat_prompt(messages, system_prompt):
	"""Format messages for TinyLlama chat format"""
	formatted = f"<\|system\|>\n{system_prompt}</s>\n"

	for msg in messages:
	if msg["role"] == "user":
	formatted += f"<\|user\|>\n{msg['content']}</s>\n"
	elif msg["role"] == "assistant":
	formatted += f"<\|assistant\|>\n{msg['content']}</s>\n"

	formatted += "<\|assistant\|>\n"
	return formatted

	def chat(message, history, system_prompt_choice, custom_system_prompt, temperature, max_tokens, top_p):
	"""Main chat function with streaming support"""
	global model, tokenizer

	# Load model if not loaded
	if model is None:
	yield "⏳ Loading model for the first time... Please wait (this may take 1-2 minutes on CPU)..."
	load_model()

	# Determine system prompt
	if system_prompt_choice == "Custom":
	system_content = custom_system_prompt if custom_system_prompt.strip() else SYSTEM_PROMPTS["Default Assistant"]
	else:
	system_content = SYSTEM_PROMPTS.get(system_prompt_choice, SYSTEM_PROMPTS["Default Assistant"])

	# Build messages list
	messages = []
	for msg in history:
	if msg["role"] in ["user", "assistant"]:
	messages.append({"role": msg["role"], "content": msg["content"]})

	messages.append({"role": "user", "content": message})

	try:
	# Format the prompt
	prompt = format_chat_prompt(messages, system_content)

	# Tokenize
	inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048)

	# Set up streamer
	streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

	# Generation parameters
	generation_kwargs = {
	"input_ids": inputs["input_ids"],
	"attention_mask": inputs["attention_mask"],
	"max_new_tokens": max_tokens,
	"temperature": temperature if temperature > 0 else 0.1,
	"top_p": top_p,
	"do_sample": temperature > 0,
	"streamer": streamer,
	"pad_token_id": tokenizer.eos_token_id,
	"eos_token_id": tokenizer.eos_token_id,
	}

	# Run generation in a separate thread
	thread = Thread(target=model.generate, kwargs=generation_kwargs)
	thread.start()

	# Stream the response
	response = ""
	for new_text in streamer:
	response += new_text
	# Clean up any remaining special tokens
	clean_response = response.replace("</s>", "").strip()
	yield clean_response

	thread.join()

	except Exception as e:
	yield f"❌ Error: {str(e)}\n\nPlease try again with a shorter message or lower max tokens."

	def clear_chat():
	"""Clear the chat history"""
	return [], ""

	def export_chat(history):
	"""Export chat history as text"""
	if not history:
	return "No chat history to export."

	export_text = "# Chat Export\n\n"
	for msg in history:
	role = "👤 User" if msg["role"] == "user" else "🤖 Assistant"
	export_text += f"## {role}\n{msg['content']}\n\n---\n\n"

	return export_text

	# Custom CSS
	css = """
	.header-container {
	text-align: center;
	padding: 20px;
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	border-radius: 12px;
	margin-bottom: 20px;
	}
	.header-container h1 {
	color: white;
	margin: 0;
	font-size: 2em;
	}
	.header-container p {
	color: rgba(255,255,255,0.9);
	margin: 10px 0 0 0;
	}
	.header-container a {
	color: #ffd700;
	text-decoration: none;
	font-weight: bold;
	}
	.header-container a:hover {
	text-decoration: underline;
	}
	.info-box {
	background: var(--background-fill-secondary);
	padding: 10px 15px;
	border-radius: 8px;
	margin: 10px 0;
	border-left: 4px solid #667eea;
	}
	.chatbot-container {
	min-height: 500px;
	}
	"""

	# Build the interface
	with gr.Blocks(
	title="TinyLlama Chatbot (CPU)",
	theme=gr.themes.Soft(),
	css=css,
	fill_height=True,
	footer_links=[
	{"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"},
	{"label": "Model", "url": "https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0"}
	]
	) as demo:

	# Header
	gr.HTML("""
	<div class="header-container">
	<h1>🦙 TinyLlama Chatbot</h1>
	<p>Powered by TinyLlama-1.1B-Chat - Running locally on CPU</p>
	<p><a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank">Built with anycoder</a></p>
	</div>
	""")

	gr.HTML("""
	<div class="info-box">
	ℹ️ <strong>CPU Mode:</strong> This chatbot runs entirely on CPU without any API calls.
	First response may take longer as the model loads. Responses are generated locally.
	</div>
	""")

	with gr.Row():
	# Main chat column
	with gr.Column(scale=3):
	chatbot = gr.Chatbot(
	label="Chat",
	height=500,
	type="messages",
	show_copy_button=True,
	render_markdown=True,
	elem_classes=["chatbot-container"]
	)

	with gr.Row():
	msg = gr.Textbox(
	placeholder="Type your message here... (Press Enter to send)",
	label="Message",
	scale=4,
	lines=2,
	max_lines=5,
	autofocus=True
	)
	send_btn = gr.Button("Send 📤", variant="primary", scale=1)

	with gr.Row():
	clear_btn = gr.Button("🗑️ Clear Chat", variant="secondary")
	regenerate_btn = gr.Button("🔄 Regenerate", variant="secondary")
	export_btn = gr.Button("📥 Export", variant="secondary")

	# Settings sidebar
	with gr.Column(scale=1):
	gr.Markdown("### ⚙️ Settings")

	with gr.Accordion("System Prompt", open=True):
	system_prompt_choice = gr.Dropdown(
	choices=list(SYSTEM_PROMPTS.keys()),
	value="Default Assistant",
	label="Preset Prompts",
	interactive=True
	)

	custom_system_prompt = gr.Textbox(
	label="Custom System Prompt",
	placeholder="Enter your custom system prompt here...",
	lines=4,
	visible=False
	)

	with gr.Accordion("Generation Parameters", open=False):
	temperature = gr.Slider(
	minimum=0.0,
	maximum=2.0,
	value=0.7,
	step=0.1,
	label="Temperature",
	info="Higher = more creative, Lower = more focused"
	)

	max_tokens = gr.Slider(
	minimum=32,
	maximum=512,
	value=256,
	step=32,
	label="Max Tokens",
	info="Maximum response length (lower = faster on CPU)"
	)

	top_p = gr.Slider(
	minimum=0.0,
	maximum=1.0,
	value=0.9,
	step=0.05,
	label="Top P",
	info="Nucleus sampling parameter"
	)

	# Export output
	export_output = gr.Textbox(
	label="Exported Chat",
	lines=10,
	visible=False,
	show_copy_button=True
	)

	# Examples
	gr.Markdown("### 💡 Example Prompts")
	gr.Examples(
	examples=[
	["Explain what machine learning is in simple terms"],
	["Write a short poem about the ocean"],
	["What are three tips for staying productive?"],
	["Tell me a fun fact about space"],
	["How do I make a simple pasta dish?"],
	],
	inputs=msg,
	label=""
	)

	# Event handlers
	def toggle_custom_prompt(choice):
	return gr.Textbox(visible=(choice == "Custom"))

	system_prompt_choice.change(
	toggle_custom_prompt,
	inputs=[system_prompt_choice],
	outputs=[custom_system_prompt]
	)

	def user_message(message, history):
	if message.strip():
	history.append({"role": "user", "content": message})
	return "", history

	def bot_response(history, system_prompt_choice, custom_system_prompt, temperature, max_tokens, top_p):
	if not history:
	yield history
	return

	user_msg = history[-1]["content"]
	history_for_api = history[:-1]

	history.append({"role": "assistant", "content": ""})

	for response in chat(user_msg, history_for_api, system_prompt_choice, custom_system_prompt, temperature, max_tokens, top_p):
	history[-1]["content"] = response
	yield history

	def regenerate(history, system_prompt_choice, custom_system_prompt, temperature, max_tokens, top_p):
	if len(history) >= 2:
	# Remove last assistant message
	history = history[:-1]
	# Get last user message
	user_msg = history[-1]["content"]
	history_for_api = history[:-1]

	history.append({"role": "assistant", "content": ""})

	for response in chat(user_msg, history_for_api, system_prompt_choice, custom_system_prompt, temperature, max_tokens, top_p):
	history[-1]["content"] = response
	yield history
	else:
	yield history

	def show_export(history):
	export_text = export_chat(history)
	return gr.Textbox(visible=True, value=export_text)

	# Wire up events
	msg.submit(
	user_message,
	inputs=[msg, chatbot],
	outputs=[msg, chatbot],
	queue=False
	).then(
	bot_response,
	inputs=[chatbot, system_prompt_choice, custom_system_prompt, temperature, max_tokens, top_p],
	outputs=[chatbot]
	)

	send_btn.click(
	user_message,
	inputs=[msg, chatbot],
	outputs=[msg, chatbot],
	queue=False
	).then(
	bot_response,
	inputs=[chatbot, system_prompt_choice, custom_system_prompt, temperature, max_tokens, top_p],
	outputs=[chatbot]
	)

	clear_btn.click(
	clear_chat,
	outputs=[chatbot, msg]
	)

	regenerate_btn.click(
	regenerate,
	inputs=[chatbot, system_prompt_choice, custom_system_prompt, temperature, max_tokens, top_p],
	outputs=[chatbot]
	)

	export_btn.click(
	show_export,
	inputs=[chatbot],
	outputs=[export_output]
	)

	if __name__ == "__main__":
	# Pre-load model on startup (optional - can be commented out for faster startup)
	print("Starting TinyLlama Chatbot...")
	demo.launch()