Mezaya
/

MezayaAI

Text Generation

Model card Files Files and versions

MezayaAI / app.py

Mezaya's picture

Upload app.py with huggingface_hub

948874b verified 3 months ago

history blame contribute delete

2.5 kB


	import gradio as gr
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import torch
	import os

	# Load model and tokenizer from Hugging Face Hub
	# This assumes your model is uploaded to your-username/my-qwen-model
	# and that MODEL_ID is set correctly either as an env var or hardcoded.
	# For Hugging Face Spaces, your repo_id is usually inferred.

	# You can also hardcode your model ID if you prefer:
	# MODEL_ID = "your-username/my-qwen-2.5-3b-instruct"
	MODEL_ID = os.getenv("MODEL_ID", "Qwen/Qwen2.5-3B-Instruct")

	tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_ID,
	torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
	device_map="auto"
	)

	def hf_chat(system_prompt: str, user_text: str, max_tokens: int = 220) -> str:
	prompt = f'''<\|system\|>
	{system_prompt.strip()}
	<\|user\|>
	{user_text.strip()}
	<\|assistant\|>
	'''

	inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

	with torch.inference_mode():
	output_ids = model.generate(
	**inputs,
	max_new_tokens=max_tokens,
	do_sample=False,
	temperature=0.0,
	use_cache=True
	)

	generated_ids = output_ids[0][len(inputs.input_ids[0]):]
	text = tokenizer.decode(generated_ids, skip_special_tokens=True)

	for token in ["<\|assistant\|>", "<\|user\|>", "<\|system\|>", "<\|im_end\|>", "<\|im_start\|>" ]:
	if token in text:
	text = text.split(token)[0].strip()

	return text.strip()


	def predict(message, history, system_prompt_input):
	# Reconstruct chat history for the model if needed, or just use current message
	# For this simple example, we'll only use the current message and system prompt
	response = hf_chat(system_prompt_input, message)
	return response


	with gr.Blocks() as demo:
	gr.Markdown("# MezayaAI Qwen2.5-3B-Instruct Demo")
	system_prompt_input = gr.Textbox(label="System Prompt", value="You are a helpful AI assistant.", lines=2)
	chatbot = gr.ChatInterface(
	predict,
	chatbot=gr.Chatbot(height=300),
	textbox=gr.Textbox(placeholder="Ask me a question", container=False, scale=7),
	# examples=["What is the capital of France?", "Explain quantum physics."],
	title="Qwen2.5-3B-Instruct Chat",
	description="Ask Qwen2.5-3B-Instruct anything!",
	theme="soft",
	)

	if __name__ == "__main__":
	demo.launch(debug=True)