Spaces:

Javedalam
/

my-fresh-gen

Running on Zero

App Files Files Community

my-fresh-gen / app.py

Javedalam

Update Gradio app with multiple files

84a65c0 verified about 1 month ago

raw

history blame

3.63 kB

	import gradio as gr
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import spaces

	# Model configuration
	MODEL_ID = "WeiboAI/VibeThinker-1.5B"
	SYSTEM_PROMPT = "You are a concise solver. Respond briefly."

	# Load model and tokenizer
	def load_model():
	"""Load the model and tokenizer"""
	try:
	print(f"Loading model: {MODEL_ID}")
	tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_ID,
	torch_dtype=torch.float16,
	device_map="auto",
	)
	print("Model loaded successfully!")
	return model, tokenizer
	except Exception as e:
	print(f"Error loading model: {e}")
	raise

	# Initialize model and tokenizer
	try:
	model, tokenizer = load_model()
	except Exception as e:
	print(f"Failed to load model: {e}")
	model = None
	tokenizer = None

	@spaces.GPU
	def chat_response(message, history):
	"""
	Generate response for the chat interface.

	Args:
	message (str): Current user message
	history (list): Chat history as list of tuples [(user_msg, assistant_msg), ...]

	Returns:
	str: Generated response
	"""
	if model is None or tokenizer is None:
	return "Model not loaded. Please check the model configuration."

	try:
	# Build conversation format
	messages = [{"role": "system", "content": SYSTEM_PROMPT}]

	# Add chat history
	for user_msg, assistant_msg in history:
	messages.append({"role": "user", "content": user_msg})
	messages.append({"role": "assistant", "content": assistant_msg})

	# Add current message
	messages.append({"role": "user", "content": message})

	# Apply chat template
	formatted_input = tokenizer.apply_chat_template(
	messages,
	tokenize=False,
	add_generation_prompt=True
	)

	# Tokenize input
	model_inputs = tokenizer([formatted_input], return_tensors="pt").to(model.device)

	# Generate response
	with torch.no_grad():
	generated_ids = model.generate(
	**model_inputs,
	max_new_tokens=512,
	do_sample=True,
	temperature=0.7,
	top_p=0.9,
	pad_token_id=tokenizer.eos_token_id
	)

	# Decode response
	generated_ids = [
	output_ids[len(input_ids):]
	for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
	]

	response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

	return response.strip()

	except Exception as e:
	print(f"Error generating response: {e}")
	return f"Sorry, I encountered an error: {str(e)}"

	def create_demo():
	"""Create the Gradio chat interface"""

	# Create chat interface
	demo = gr.ChatInterface(
	fn=chat_response,
	title="VibeThinker-1.5B Chat",
	description=f"Chat with {MODEL_ID}. {SYSTEM_PROMPT}",
	examples=[
	"What is 2+2?",
	"Explain quantum physics briefly",
	"Write a short poem",
	"How do I make good decisions?"
	],
	theme=gr.themes.Soft(),
	show_progress="minimal",
	retry_btn="🔄 Retry",
	undo_btn="↩️ Undo",
	clear_btn="🗑️ Clear",
	)

	return demo

	if __name__ == "__main__":
	demo = create_demo()
	demo.launch(share=False)