Spaces:

TobDeBer
/

anycoder-d5d087c8

Sleeping

App Files Files Community

anycoder-d5d087c8 / app.py

TobDeBer

Upload folder using huggingface_hub

6ae5993 verified about 2 months ago

raw

history blame contribute delete

9.62 kB

	import gradio as gr
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
	import time
	import random

	# Model configuration - using TinyLlama for efficient CPU inference
	MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

	# Global variables for model components
	tokenizer = None
	model = None
	text_generator = None

	def load_model():
	"""Load the Smol LLM model and tokenizer"""
	global tokenizer, model, text_generator
	try:
	print(f"Loading model: {MODEL_NAME}")
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_NAME,
	torch_dtype=torch.float32, # Use float32 for CPU
	device_map="auto"
	)

	# Create text generation pipeline
	text_generator = pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer,
	max_new_tokens=512,
	temperature=0.7,
	top_p=0.95,
	do_sample=True
	)

	# Set pad token if not present
	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token

	return "✅ Model loaded successfully!"
	except Exception as e:
	return f"❌ Error loading model: {str(e)}"

	def format_prompt(prompt, system_prompt=None):
	"""Format the prompt for chat-style models"""
	if system_prompt:
	formatted = f"<\|system\|>\n{system_prompt}\n<\|user\|>\n{prompt}\n<\|assistant\|>"
	else:
	formatted = f"<\|user\|>\n{prompt}\n<\|assistant\|>"
	return formatted

	def generate_text(
	prompt,
	max_length=200,
	temperature=0.7,
	top_p=0.95,
	repetition_penalty=1.1,
	system_prompt="You are a helpful AI assistant. Provide clear and concise answers."
	):
	"""Generate text using the loaded model"""
	global text_generator

	if text_generator is None:
	return "⚠️ Please load the model first using the 'Load Model' button."

	if not prompt.strip():
	return "⚠️ Please enter a prompt."

	try:
	# Format the prompt
	formatted_prompt = format_prompt(prompt, system_prompt)

	# Update pipeline parameters
	text_generator.max_new_tokens = max_length
	text_generator.temperature = temperature
	text_generator.top_p = top_p
	text_generator.repetition_penalty = repetition_penalty

	# Generate response
	start_time = time.time()
	result = text_generator(
	formatted_prompt,
	max_new_tokens=max_length,
	temperature=temperature,
	top_p=top_p,
	repetition_penalty=repetition_penalty,
	do_sample=True,
	pad_token_id=tokenizer.eos_token_id,
	eos_token_id=tokenizer.eos_token_id
	)

	generation_time = time.time() - start_time

	# Extract the generated text
	generated_text = result[0]["generated_text"]

	# Extract only the assistant's response
	if "<\|assistant\|>" in generated_text:
	response = generated_text.split("<\|assistant\|>")[-1].strip()
	else:
	response = generated_text

	# Format output with metadata
	output = f"Response:\n{response}\n\n---\nGenerated in {generation_time:.2f} seconds"

	return output

	except Exception as e:
	return f"❌ Error during generation: {str(e)}"

	def clear_chat():
	"""Clear the chat interface"""
	return "", ""

	# Create custom theme
	custom_theme = gr.themes.Soft(
	primary_hue="blue",
	secondary_hue="indigo",
	neutral_hue="slate",
	font=gr.themes.GoogleFont("Inter"),
	text_size="lg",
	spacing_size="lg",
	radius_size="md"
	).set(
	button_primary_background_fill="*primary_600",
	button_primary_background_fill_hover="*primary_700",
	block_title_text_weight="600",
	)

	# Build the Gradio interface
	with gr.Blocks() as demo:
	gr.Markdown(
	"""
	# 🤖 Smol LLM Inference GUI

	Built with [anycoder](https://huggingface.co/spaces/akhaliq/anycoder) -
	Efficient text generation using TinyLlama

	This application runs a compact language model locally for text generation.
	Perfect for chat, completion tasks, and creative writing.
	"""
	)

	with gr.Row():
	with gr.Column(scale=2):
	# Model loading section
	with gr.Group():
	gr.Markdown("### 📦 Model Management")
	model_status = gr.Textbox(
	label="Model Status",
	value="Model not loaded. Click 'Load Model' to start.",
	interactive=False
	)
	load_btn = gr.Button(
	"🔄 Load Model",
	variant="primary",
	size="lg"
	)

	# Generation parameters
	gr.Markdown("### ⚙️ Generation Parameters")

	with gr.Row():
	max_length = gr.Slider(
	minimum=50,
	maximum=1024,
	value=200,
	step=50,
	label="Max Tokens"
	)
	temperature = gr.Slider(
	minimum=0.1,
	maximum=2.0,
	value=0.7,
	step=0.1,
	label="Temperature"
	)

	with gr.Row():
	top_p = gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.95,
	step=0.05,
	label="Top-p"
	)
	repetition_penalty = gr.Slider(
	minimum=1.0,
	maximum=2.0,
	value=1.1,
	step=0.1,
	label="Repetition Penalty"
	)

	system_prompt = gr.Textbox(
	label="System Prompt",
	value="You are a helpful AI assistant. Provide clear and concise answers.",
	lines=3,
	placeholder="Enter a system prompt to guide the model's behavior..."
	)

	with gr.Column(scale=3):
	# Main interface
	with gr.Group():
	gr.Markdown("### 💬 Text Generation")

	prompt_input = gr.Textbox(
	label="Enter your prompt",
	placeholder="Type your message here...",
	lines=4,
	autofocus=True
	)

	with gr.Row():
	generate_btn = gr.Button(
	"🚀 Generate",
	variant="primary",
	size="lg"
	)
	clear_btn = gr.Button(
	"🗑️ Clear",
	variant="secondary"
	)

	output_text = gr.Markdown(
	label="Generated Response",
	value="Response will appear here..."
	)

	# Example prompts
	with gr.Accordion("📝 Example Prompts", open=False):
	gr.Examples(
	examples=[
	["Write a short story about a robot discovering music."],
	["Explain quantum computing in simple terms."],
	["Create a poem about the changing seasons."],
	["What are the benefits of renewable energy?"],
	["Write a Python function to calculate fibonacci numbers."],
	["Describe the perfect day in your own words."],
	["Explain the concept of machine learning to a beginner."],
	["Create a dialogue between two friends planning a trip."]
	],
	inputs=[prompt_input],
	label="Click an example to get started"
	)

	# Event handlers
	load_btn.click(
	fn=load_model,
	outputs=[model_status],
	api_visibility="public"
	)

	generate_btn.click(
	fn=generate_text,
	inputs=[
	prompt_input,
	max_length,
	temperature,
	top_p,
	repetition_penalty,
	system_prompt
	],
	outputs=[output_text],
	api_visibility="public"
	)

	clear_btn.click(
	fn=clear_chat,
	outputs=[prompt_input],
	api_visibility="private"
	)

	# Allow Enter key to generate
	prompt_input.submit(
	fn=generate_text,
	inputs=[
	prompt_input,
	max_length,
	temperature,
	top_p,
	repetition_penalty,
	system_prompt
	],
	outputs=[output_text],
	api_visibility="public"
	)

	# Launch the application
	demo.launch(
	theme=custom_theme,
	footer_links=[
	{"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"},
	{"label": "TinyLlama Model", "url": "https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0"},
	{"label": "Gradio", "url": "https://gradio.app"}
	],
	share=False,
	show_error=True
	)